Software /
code /
prosody
Annotate
tools/erlparse.lua @ 3927:1b57e83266f0
util.xmppstream: Allow stream_ns = "" for parsing streams with no xmlns
author | Matthew Wild <mwild1@gmail.com> |
---|---|
date | Tue, 21 Dec 2010 14:44:13 +0000 |
parent | 2947:ff7f6668b34f |
child | 5079:2ab99e239d45 |
rev | line source |
---|---|
1523
841d61be198f
Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents:
896
diff
changeset
|
1 -- Prosody IM |
2923
b7049746bd29
Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents:
1783
diff
changeset
|
2 -- Copyright (C) 2008-2010 Matthew Wild |
b7049746bd29
Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents:
1783
diff
changeset
|
3 -- Copyright (C) 2008-2010 Waqas Hussain |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
4 -- |
758 | 5 -- This project is MIT/X11 licensed. Please see the |
6 -- COPYING file in the source package for more information. | |
519
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
7 -- |
cccd610a0ef9
Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents:
489
diff
changeset
|
8 |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
9 local string_byte, string_char = string.byte, string.char; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
10 local t_concat, t_insert = table.concat, table.insert; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
11 local type, tonumber, tostring = type, tonumber, tostring; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
12 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
13 local file = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
14 local last = nil; |
2946
ad306c5ae689
tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents:
2945
diff
changeset
|
15 local line = 1; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
16 local function read(expected) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
17 local ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
18 if last then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
19 ch = last; last = nil; |
2946
ad306c5ae689
tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents:
2945
diff
changeset
|
20 else |
ad306c5ae689
tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents:
2945
diff
changeset
|
21 ch = file:read(1); |
ad306c5ae689
tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents:
2945
diff
changeset
|
22 if ch == "\n" then line = line + 1; end |
ad306c5ae689
tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents:
2945
diff
changeset
|
23 end |
ad306c5ae689
tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents:
2945
diff
changeset
|
24 if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil").." on line "..line); end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
25 return ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
26 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
27 local function pushback(ch) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
28 if last then error(); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
29 last = ch; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
30 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
31 local function peek() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
32 if not last then last = read(); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
33 return last; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
34 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
35 |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
36 local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
37 local function isLowerAlpha(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
38 ch = string_byte(ch) or 0; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
39 return (ch >= _a and ch <= _z); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
40 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
41 local function isNumeric(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
42 ch = string_byte(ch) or 0; |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
43 return (ch >= _0 and ch <= _9) or ch == _minus; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
44 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
45 local function isAtom(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
46 ch = string_byte(ch) or 0; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
47 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
48 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
49 local function isSpace(ch) |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
50 ch = string_byte(ch) or "x"; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
51 return ch <= _space; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
52 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
53 |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
54 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"}; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
55 local function readString() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
56 read("\""); -- skip quote |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
57 local slash = nil; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
58 local str = {}; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
59 while true do |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
60 local ch = read(); |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
61 if slash then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
62 slash = slash..ch; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
63 if not escapes[slash] then error("Unknown escape sequence: "..slash); end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
64 str[#str+1] = escapes[slash]; |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
65 slash = nil; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
66 elseif ch == "\"" then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
67 break; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
68 elseif ch == "\\" then |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
69 slash = ch; |
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
70 else |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
71 str[#str+1] = ch; |
1783
f79972ad8965
ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents:
1572
diff
changeset
|
72 end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
73 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
74 return t_concat(str); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
75 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
76 local function readAtom1() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
77 local var = { read() }; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
78 while isAtom(peek()) do |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
79 var[#var+1] = read(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
80 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
81 return t_concat(var); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
82 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
83 local function readAtom2() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
84 local str = { read("'") }; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
85 local slash = nil; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
86 while true do |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
87 local ch = read(); |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
88 str[#str+1] = ch; |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
89 if ch == "'" and not slash then break; end |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
90 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
91 return t_concat(str); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
92 end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
93 local function readNumber() |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
94 local num = { read() }; |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
95 while isNumeric(peek()) do |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
96 num[#num+1] = read(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
97 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
98 return tonumber(t_concat(num)); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
99 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
100 local readItem = nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
101 local function readTuple() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
102 local t = {}; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
103 local s = {}; -- string representation |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
104 read(); -- read {, or [, or < |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
105 while true do |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
106 local item = readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
107 if not item then break; end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
108 if type(item) ~= "number" or item > 255 then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
109 s = nil; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
110 elseif s then |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
111 s[#s+1] = string_char(item); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
112 end |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
113 t_insert(t, item); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
114 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
115 read(); -- read }, or ], or > |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
116 if s and #s > 0 then |
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
117 return t_concat(s) |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
118 else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
119 return t |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
120 end; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
121 end |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
122 local function readBinary() |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
123 read("<"); -- read < |
2947
ff7f6668b34f
tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents:
2946
diff
changeset
|
124 -- Discard PIDs |
ff7f6668b34f
tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents:
2946
diff
changeset
|
125 if isNumeric(peek()) then |
ff7f6668b34f
tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents:
2946
diff
changeset
|
126 while peek() ~= ">" do read(); end |
ff7f6668b34f
tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents:
2946
diff
changeset
|
127 read(">"); |
ff7f6668b34f
tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents:
2946
diff
changeset
|
128 return {}; |
ff7f6668b34f
tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents:
2946
diff
changeset
|
129 end |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
130 local t = readTuple(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
131 read(">") -- read > |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
132 local ch = peek(); |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
133 if type(t) == "string" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
134 -- binary is a list of integers |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
135 return t; |
2945
475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents:
2923
diff
changeset
|
136 elseif type(t) == "table" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
137 if t[1] then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
138 -- binary contains string |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
139 return t[1]; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
140 else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
141 -- binary is empty |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
142 return ""; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
143 end; |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
144 else |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
145 error(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
146 end |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
147 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
148 readItem = function() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
149 local ch = peek(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
150 if ch == nil then return nil end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
151 if ch == "{" or ch == "[" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
152 return readTuple(); |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
153 elseif isLowerAlpha(ch) then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
154 return readAtom1(); |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
155 elseif ch == "'" then |
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
156 return readAtom2(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
157 elseif isNumeric(ch) then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
158 return readNumber(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
159 elseif ch == "\"" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
160 return readString(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
161 elseif ch == "<" then |
1572
1b87dfb76caa
ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents:
1567
diff
changeset
|
162 return readBinary(); |
1567
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
163 elseif isSpace(ch) or ch == "," or ch == "|" then |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
164 read(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
165 return readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
166 else |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
167 --print("Unknown char: "..ch); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
168 return nil; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
169 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
170 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
171 local function readChunk() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
172 local x = readItem(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
173 if x then read("."); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
174 return x; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
175 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
176 local function readFile(filename) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
177 file = io.open(filename); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
178 if not file then error("File not found: "..filename); os.exit(0); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
179 return function() |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
180 local x = readChunk(); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
181 if not x and peek() then error("Invalid char: "..peek()); end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
182 return x; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
183 end; |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
184 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
185 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
186 module "erlparse" |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
187 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
188 function parseFile(file) |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
189 return readFile(file); |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
190 end |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
191 |
e052a3bdb8b5
ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents:
1523
diff
changeset
|
192 return _M; |