Software /
code /
prosody
Comparison
tools/erlparse.lua @ 2945:475dee08b400
tools/erlparse: Optimisations aplenty for faster processing of large files
author | Matthew Wild <mwild1@gmail.com> |
---|---|
date | Thu, 25 Mar 2010 19:32:35 +0000 |
parent | 2923:b7049746bd29 |
child | 2946:ad306c5ae689 |
comparison
equal
deleted
inserted
replaced
2943:9236a7856688 | 2945:475dee08b400 |
---|---|
4 -- | 4 -- |
5 -- This project is MIT/X11 licensed. Please see the | 5 -- This project is MIT/X11 licensed. Please see the |
6 -- COPYING file in the source package for more information. | 6 -- COPYING file in the source package for more information. |
7 -- | 7 -- |
8 | 8 |
9 | 9 local string_byte, string_char = string.byte, string.char; |
10 local t_concat, t_insert = table.concat, table.insert; | |
11 local type, tonumber, tostring = type, tonumber, tostring; | |
10 | 12 |
11 local file = nil; | 13 local file = nil; |
12 local last = nil; | 14 local last = nil; |
13 local function read(expected) | 15 local function read(expected) |
14 local ch; | 16 local ch; |
25 local function peek() | 27 local function peek() |
26 if not last then last = read(); end | 28 if not last then last = read(); end |
27 return last; | 29 return last; |
28 end | 30 end |
29 | 31 |
30 local _A, _a, _Z, _z, _0, _9, __, _at, _space = string.byte("AaZz09@_ ", 1, 9); | 32 local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10); |
31 local function isLowerAlpha(ch) | 33 local function isLowerAlpha(ch) |
32 ch = string.byte(ch) or 0; | 34 ch = string_byte(ch) or 0; |
33 return (ch >= _a and ch <= _z); | 35 return (ch >= _a and ch <= _z); |
34 end | 36 end |
35 local function isNumeric(ch) | 37 local function isNumeric(ch) |
36 ch = string.byte(ch) or 0; | 38 ch = string_byte(ch) or 0; |
37 return (ch >= _0 and ch <= _9); | 39 return (ch >= _0 and ch <= _9) or ch == _minus; |
38 end | 40 end |
39 local function isAtom(ch) | 41 local function isAtom(ch) |
40 ch = string.byte(ch) or 0; | 42 ch = string_byte(ch) or 0; |
41 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at; | 43 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at; |
42 end | 44 end |
43 local function isSpace(ch) | 45 local function isSpace(ch) |
44 ch = string.byte(ch) or "x"; | 46 ch = string_byte(ch) or "x"; |
45 return ch <= _space; | 47 return ch <= _space; |
46 end | 48 end |
47 | 49 |
48 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"}; | 50 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"}; |
49 local function readString() | 51 local function readString() |
50 read("\""); -- skip quote | 52 read("\""); -- skip quote |
51 local slash = nil; | 53 local slash = nil; |
52 local str = ""; | 54 local str = {}; |
53 while true do | 55 while true do |
54 local ch = read(); | 56 local ch = read(); |
55 if slash then | 57 if slash then |
56 slash = slash..ch; | 58 slash = slash..ch; |
57 if not escapes[slash] then error("Unknown escape sequence: "..slash); end | 59 if not escapes[slash] then error("Unknown escape sequence: "..slash); end |
58 str = str..escapes[slash]; | 60 str[#str+1] = escapes[slash]; |
59 slash = nil; | 61 slash = nil; |
60 elseif ch == "\"" then | 62 elseif ch == "\"" then |
61 break; | 63 break; |
62 elseif ch == "\\" then | 64 elseif ch == "\\" then |
63 slash = ch; | 65 slash = ch; |
64 else | 66 else |
65 str = str..ch; | 67 str[#str+1] = ch; |
66 end | 68 end |
67 end | 69 end |
68 return str; | 70 return t_concat(str); |
69 end | 71 end |
70 local function readAtom1() | 72 local function readAtom1() |
71 local var = read(); | 73 local var = { read() }; |
72 while isAtom(peek()) do | 74 while isAtom(peek()) do |
73 var = var..read(); | 75 var[#var+1] = read(); |
74 end | 76 end |
75 return var; | 77 return t_concat(var); |
76 end | 78 end |
77 local function readAtom2() | 79 local function readAtom2() |
78 local str = read("'"); | 80 local str = { read("'") }; |
79 local slash = nil; | 81 local slash = nil; |
80 while true do | 82 while true do |
81 local ch = read(); | 83 local ch = read(); |
82 str = str..ch; | 84 str[#str+1] = ch; |
83 if ch == "'" and not slash then break; end | 85 if ch == "'" and not slash then break; end |
84 end | 86 end |
85 return str; | 87 return t_concat(str); |
86 end | 88 end |
87 local function readNumber() | 89 local function readNumber() |
88 local num = read(); | 90 local num = { read() }; |
89 while isNumeric(peek()) do | 91 while isNumeric(peek()) do |
90 num = num..read(); | 92 num[#num+1] = read(); |
91 end | 93 end |
92 return tonumber(num); | 94 return tonumber(t_concat(num)); |
93 end | 95 end |
94 local readItem = nil; | 96 local readItem = nil; |
95 local function readTuple() | 97 local function readTuple() |
96 local t = {}; | 98 local t = {}; |
97 local s = ""; -- string representation | 99 local s = {}; -- string representation |
98 read(); -- read {, or [, or < | 100 read(); -- read {, or [, or < |
99 while true do | 101 while true do |
100 local item = readItem(); | 102 local item = readItem(); |
101 if not item then break; end | 103 if not item then break; end |
102 if type(item) ~= type(0) or item > 255 then | 104 if type(item) ~= "number" or item > 255 then |
103 s = nil; | 105 s = nil; |
104 elseif s then | 106 elseif s then |
105 s = s..string.char(item); | 107 s[#s+1] = string_char(item); |
106 end | 108 end |
107 table.insert(t, item); | 109 t_insert(t, item); |
108 end | 110 end |
109 read(); -- read }, or ], or > | 111 read(); -- read }, or ], or > |
110 if s and s ~= "" then | 112 if s and #s > 0 then |
111 return s | 113 return t_concat(s) |
112 else | 114 else |
113 return t | 115 return t |
114 end; | 116 end; |
115 end | 117 end |
116 local function readBinary() | 118 local function readBinary() |
117 read("<"); -- read < | 119 read("<"); -- read < |
118 local t = readTuple(); | 120 local t = readTuple(); |
119 read(">") -- read > | 121 read(">") -- read > |
120 local ch = peek(); | 122 local ch = peek(); |
121 if type(t) == type("") then | 123 if type(t) == "string" then |
122 -- binary is a list of integers | 124 -- binary is a list of integers |
123 return t; | 125 return t; |
124 elseif type(t) == type({}) then | 126 elseif type(t) == "table" then |
125 if t[1] then | 127 if t[1] then |
126 -- binary contains string | 128 -- binary contains string |
127 return t[1]; | 129 return t[1]; |
128 else | 130 else |
129 -- binary is empty | 131 -- binary is empty |