Software /
code /
prosody
Annotate
tools/erlparse.lua @ 2945:475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
author | Matthew Wild <mwild1@gmail.com> |
---|---|
date | Thu, 25 Mar 2010 19:32:35 +0000 |
parent | 2923:b7049746bd29 |
child | 2946:ad306c5ae689 |
rev | line source |
---|---|
1523
841d61be198f
Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents:
896
diff
changeset
|
1 -- Prosody IM |
2923
b7049746bd29
Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents:
1783
diff
changeset
|
2 -- Copyright (C) 2008-2010 Matthew Wild |
b7049746bd29
Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents:
1783
diff
changeset
|
3 -- Copyright (C) 2008-2010 Waqas Hussain |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
4 -- |
758 | 5 -- This project is MIT/X11 licensed. Please see the |
6 -- COPYING file in the source package for more information. | |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
7 -- |
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
8 |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
9 local string_byte, string_char = string.byte, string.char; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
10 local t_concat, t_insert = table.concat, table.insert; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
11 local type, tonumber, tostring = type, tonumber, tostring; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
12 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
13 local file = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
14 local last = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
15 local function read(expected) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
16 local ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
17 if last then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
18 ch = last; last = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
19 else ch = file:read(1); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
20 if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil")); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
21 return ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
22 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
23 local function pushback(ch) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
24 if last then error(); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
25 last = ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
26 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
27 local function peek() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
28 if not last then last = read(); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
29 return last; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
30 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
31 |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
32 local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
33 local function isLowerAlpha(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
34 ch = string_byte(ch) or 0; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
35 return (ch >= _a and ch <= _z); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
36 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
37 local function isNumeric(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
38 ch = string_byte(ch) or 0; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
39 return (ch >= _0 and ch <= _9) or ch == _minus; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
40 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
41 local function isAtom(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
42 ch = string_byte(ch) or 0; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
43 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
44 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
45 local function isSpace(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
46 ch = string_byte(ch) or "x"; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
47 return ch <= _space; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
48 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
49 |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
50 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"}; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
51 local function readString() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
52 read("\""); -- skip quote |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
53 local slash = nil; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
54 local str = {}; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
55 while true do |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
56 local ch = read(); |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
57 if slash then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
58 slash = slash..ch; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
59 if not escapes[slash] then error("Unknown escape sequence: "..slash); end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
60 str[#str+1] = escapes[slash]; |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
61 slash = nil; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
62 elseif ch == "\"" then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
63 break; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
64 elseif ch == "\\" then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
65 slash = ch; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
66 else |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
67 str[#str+1] = ch; |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
68 end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
69 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
70 return t_concat(str); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
71 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
72 local function readAtom1() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
73 local var = { read() }; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
74 while isAtom(peek()) do |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
75 var[#var+1] = read(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
76 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
77 return t_concat(var); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
78 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
79 local function readAtom2() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
80 local str = { read("'") }; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
81 local slash = nil; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
82 while true do |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
83 local ch = read(); |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
84 str[#str+1] = ch; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
85 if ch == "'" and not slash then break; end |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
86 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
87 return t_concat(str); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
88 end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
89 local function readNumber() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
90 local num = { read() }; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
91 while isNumeric(peek()) do |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
92 num[#num+1] = read(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
93 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
94 return tonumber(t_concat(num)); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
95 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
96 local readItem = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
97 local function readTuple() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
98 local t = {}; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
99 local s = {}; -- string representation |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
100 read(); -- read {, or [, or < |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
101 while true do |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
102 local item = readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
103 if not item then break; end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
104 if type(item) ~= "number" or item > 255 then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
105 s = nil; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
106 elseif s then |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
107 s[#s+1] = string_char(item); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
108 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
109 t_insert(t, item); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
110 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
111 read(); -- read }, or ], or > |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
112 if s and #s > 0 then |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
113 return t_concat(s) |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
114 else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
115 return t |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
116 end; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
117 end |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
118 local function readBinary() |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
119 read("<"); -- read < |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
120 local t = readTuple(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
121 read(">") -- read > |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
122 local ch = peek(); |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
123 if type(t) == "string" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
124 -- binary is a list of integers |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
125 return t; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
126 elseif type(t) == "table" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
127 if t[1] then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
128 -- binary contains string |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
129 return t[1]; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
130 else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
131 -- binary is empty |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
132 return ""; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
133 end; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
134 else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
135 error(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
136 end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
137 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
138 readItem = function() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
139 local ch = peek(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
140 if ch == nil then return nil end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
141 if ch == "{" or ch == "[" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
142 return readTuple(); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
143 elseif isLowerAlpha(ch) then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
144 return readAtom1(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
145 elseif ch == "'" then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
146 return readAtom2(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
147 elseif isNumeric(ch) then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
148 return readNumber(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
149 elseif ch == "\"" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
150 return readString(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
151 elseif ch == "<" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
152 return readBinary(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
153 elseif isSpace(ch) or ch == "," or ch == "|" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
154 read(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
155 return readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
156 else |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
157 --print("Unknown char: "..ch); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
158 return nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
159 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
160 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
161 local function readChunk() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
162 local x = readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
163 if x then read("."); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
164 return x; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
165 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
166 local function readFile(filename) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
167 file = io.open(filename); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
168 if not file then error("File not found: "..filename); os.exit(0); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
169 return function() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
170 local x = readChunk(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
171 if not x and peek() then error("Invalid char: "..peek()); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
172 return x; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
173 end; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
174 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
175 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
176 module "erlparse" |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
177 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
178 function parseFile(file) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
179 return readFile(file); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
180 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
181 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
182 return _M; |