Annotate

tools/erlparse.lua @ 9833:86fe021f16a6 0.11

spec: Add test case for #1322
author Kim Alvefur <zash@zash.se>
date Wed, 27 Feb 2019 10:27:17 +0100
parent 7819:ad709ee7d3d8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1523
841d61be198f Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents: 896
diff changeset
1 -- Prosody IM
2923
b7049746bd29 Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents: 1783
diff changeset
2 -- Copyright (C) 2008-2010 Matthew Wild
b7049746bd29 Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents: 1783
diff changeset
3 -- Copyright (C) 2008-2010 Waqas Hussain
5776
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5080
diff changeset
4 --
758
b1885732e979 GPL->MIT!
Matthew Wild <mwild1@gmail.com>
parents: 615
diff changeset
5 -- This project is MIT/X11 licensed. Please see the
b1885732e979 GPL->MIT!
Matthew Wild <mwild1@gmail.com>
parents: 615
diff changeset
6 -- COPYING file in the source package for more information.
519
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
7 --
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
8
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
9 local string_byte, string_char = string.byte, string.char;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
10 local t_concat, t_insert = table.concat, table.insert;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
11 local type, tonumber, tostring = type, tonumber, tostring;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
12
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
13 local file = nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
14 local last = nil;
2946
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
15 local line = 1;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
16 local function read(expected)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
17 local ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
18 if last then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
19 ch = last; last = nil;
2946
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
20 else
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
21 ch = file:read(1);
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
22 if ch == "\n" then line = line + 1; end
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
23 end
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
24 if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil").." on line "..line); end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
25 return ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
26 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
27 local function pushback(ch)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
28 if last then error(); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
29 last = ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
30 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
31 local function peek()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
32 if not last then last = read(); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
33 return last;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
34 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
35
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
36 local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
37 local function isLowerAlpha(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
38 ch = string_byte(ch) or 0;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
39 return (ch >= _a and ch <= _z);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
40 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
41 local function isNumeric(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
42 ch = string_byte(ch) or 0;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
43 return (ch >= _0 and ch <= _9) or ch == _minus;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
44 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
45 local function isAtom(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
46 ch = string_byte(ch) or 0;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
47 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
48 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
49 local function isSpace(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
50 ch = string_byte(ch) or "x";
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
51 return ch <= _space;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
52 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
53
5079
2ab99e239d45 tools/erlparse: Fix erlang string escape sequences.
Waqas Hussain <waqas20@gmail.com>
parents: 2947
diff changeset
54 local escapes = {["\\b"]="\b", ["\\d"]="\127", ["\\e"]="\27", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]=" ", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
55 local function readString()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
56 read("\""); -- skip quote
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
57 local slash = nil;
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
58 local str = {};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
59 while true do
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
60 local ch = read();
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
61 if slash then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
62 slash = slash..ch;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
63 if not escapes[slash] then error("Unknown escape sequence: "..slash); end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
64 str[#str+1] = escapes[slash];
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
65 slash = nil;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
66 elseif ch == "\"" then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
67 break;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
68 elseif ch == "\\" then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
69 slash = ch;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
70 else
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
71 str[#str+1] = ch;
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
72 end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
73 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
74 return t_concat(str);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
75 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
76 local function readAtom1()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
77 local var = { read() };
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
78 while isAtom(peek()) do
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
79 var[#var+1] = read();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
80 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
81 return t_concat(var);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
82 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
83 local function readAtom2()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
84 local str = { read("'") };
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
85 local slash = nil;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
86 while true do
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
87 local ch = read();
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
88 str[#str+1] = ch;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
89 if ch == "'" and not slash then break; end
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
90 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
91 return t_concat(str);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
92 end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
93 local function readNumber()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
94 local num = { read() };
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
95 while isNumeric(peek()) do
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
96 num[#num+1] = read();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
97 end
5080
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
98 if peek() == "." then
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
99 num[#num+1] = read();
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
100 while isNumeric(peek()) do
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
101 num[#num+1] = read();
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
102 end
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
103 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
104 return tonumber(t_concat(num));
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
105 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
106 local readItem = nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
107 local function readTuple()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
108 local t = {};
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
109 local s = {}; -- string representation
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
110 read(); -- read {, or [, or <
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
111 while true do
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
112 local item = readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
113 if not item then break; end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
114 if type(item) ~= "number" or item > 255 then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
115 s = nil;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
116 elseif s then
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
117 s[#s+1] = string_char(item);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
118 end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
119 t_insert(t, item);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
120 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
121 read(); -- read }, or ], or >
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
122 if s and #s > 0 then
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
123 return t_concat(s)
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
124 else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
125 return t
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
126 end;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
127 end
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
128 local function readBinary()
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
129 read("<"); -- read <
2947
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
130 -- Discard PIDs
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
131 if isNumeric(peek()) then
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
132 while peek() ~= ">" do read(); end
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
133 read(">");
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
134 return {};
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
135 end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
136 local t = readTuple();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
137 read(">") -- read >
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
138 local ch = peek();
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
139 if type(t) == "string" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
140 -- binary is a list of integers
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
141 return t;
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
142 elseif type(t) == "table" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
143 if t[1] then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
144 -- binary contains string
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
145 return t[1];
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
146 else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
147 -- binary is empty
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
148 return "";
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
149 end;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
150 else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
151 error();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
152 end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
153 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
154 readItem = function()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
155 local ch = peek();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
156 if ch == nil then return nil end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
157 if ch == "{" or ch == "[" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
158 return readTuple();
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
159 elseif isLowerAlpha(ch) then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
160 return readAtom1();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
161 elseif ch == "'" then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
162 return readAtom2();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
163 elseif isNumeric(ch) then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
164 return readNumber();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
165 elseif ch == "\"" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
166 return readString();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
167 elseif ch == "<" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
168 return readBinary();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
169 elseif isSpace(ch) or ch == "," or ch == "|" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
170 read();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
171 return readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
172 else
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
173 --print("Unknown char: "..ch);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
174 return nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
175 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
176 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
177 local function readChunk()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
178 local x = readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
179 if x then read("."); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
180 return x;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
181 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
182 local function readFile(filename)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
183 file = io.open(filename);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
184 if not file then error("File not found: "..filename); os.exit(0); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
185 return function()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
186 local x = readChunk();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
187 if not x and peek() then error("Invalid char: "..peek()); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
188 return x;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
189 end;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
190 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
191
7819
ad709ee7d3d8 tools/erlparse: Remove use of deprecated module() function
Kim Alvefur <zash@zash.se>
parents: 5776
diff changeset
192 local _M = {};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
193
7819
ad709ee7d3d8 tools/erlparse: Remove use of deprecated module() function
Kim Alvefur <zash@zash.se>
parents: 5776
diff changeset
194 function _M.parseFile(file)
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
195 return readFile(file);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
196 end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
197
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
198 return _M;