Comparison

tools/erlparse.lua @ 2945:475dee08b400

tools/erlparse: Optimisations aplenty for faster processing of large files
author Matthew Wild <mwild1@gmail.com>
date Thu, 25 Mar 2010 19:32:35 +0000
parent 2923:b7049746bd29
child 2946:ad306c5ae689
comparison
equal deleted inserted replaced
2943:9236a7856688 2945:475dee08b400
4 -- 4 --
5 -- This project is MIT/X11 licensed. Please see the 5 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information. 6 -- COPYING file in the source package for more information.
7 -- 7 --
8 8
9 9 local string_byte, string_char = string.byte, string.char;
10 local t_concat, t_insert = table.concat, table.insert;
11 local type, tonumber, tostring = type, tonumber, tostring;
10 12
11 local file = nil; 13 local file = nil;
12 local last = nil; 14 local last = nil;
13 local function read(expected) 15 local function read(expected)
14 local ch; 16 local ch;
25 local function peek() 27 local function peek()
26 if not last then last = read(); end 28 if not last then last = read(); end
27 return last; 29 return last;
28 end 30 end
29 31
30 local _A, _a, _Z, _z, _0, _9, __, _at, _space = string.byte("AaZz09@_ ", 1, 9); 32 local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10);
31 local function isLowerAlpha(ch) 33 local function isLowerAlpha(ch)
32 ch = string.byte(ch) or 0; 34 ch = string_byte(ch) or 0;
33 return (ch >= _a and ch <= _z); 35 return (ch >= _a and ch <= _z);
34 end 36 end
35 local function isNumeric(ch) 37 local function isNumeric(ch)
36 ch = string.byte(ch) or 0; 38 ch = string_byte(ch) or 0;
37 return (ch >= _0 and ch <= _9); 39 return (ch >= _0 and ch <= _9) or ch == _minus;
38 end 40 end
39 local function isAtom(ch) 41 local function isAtom(ch)
40 ch = string.byte(ch) or 0; 42 ch = string_byte(ch) or 0;
41 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at; 43 return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at;
42 end 44 end
43 local function isSpace(ch) 45 local function isSpace(ch)
44 ch = string.byte(ch) or "x"; 46 ch = string_byte(ch) or "x";
45 return ch <= _space; 47 return ch <= _space;
46 end 48 end
47 49
48 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"}; 50 local escapes = {["\\b"]="\b", ["\\d"]="\d", ["\\e"]="\e", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]="\s", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"};
49 local function readString() 51 local function readString()
50 read("\""); -- skip quote 52 read("\""); -- skip quote
51 local slash = nil; 53 local slash = nil;
52 local str = ""; 54 local str = {};
53 while true do 55 while true do
54 local ch = read(); 56 local ch = read();
55 if slash then 57 if slash then
56 slash = slash..ch; 58 slash = slash..ch;
57 if not escapes[slash] then error("Unknown escape sequence: "..slash); end 59 if not escapes[slash] then error("Unknown escape sequence: "..slash); end
58 str = str..escapes[slash]; 60 str[#str+1] = escapes[slash];
59 slash = nil; 61 slash = nil;
60 elseif ch == "\"" then 62 elseif ch == "\"" then
61 break; 63 break;
62 elseif ch == "\\" then 64 elseif ch == "\\" then
63 slash = ch; 65 slash = ch;
64 else 66 else
65 str = str..ch; 67 str[#str+1] = ch;
66 end 68 end
67 end 69 end
68 return str; 70 return t_concat(str);
69 end 71 end
70 local function readAtom1() 72 local function readAtom1()
71 local var = read(); 73 local var = { read() };
72 while isAtom(peek()) do 74 while isAtom(peek()) do
73 var = var..read(); 75 var[#var+1] = read();
74 end 76 end
75 return var; 77 return t_concat(var);
76 end 78 end
77 local function readAtom2() 79 local function readAtom2()
78 local str = read("'"); 80 local str = { read("'") };
79 local slash = nil; 81 local slash = nil;
80 while true do 82 while true do
81 local ch = read(); 83 local ch = read();
82 str = str..ch; 84 str[#str+1] = ch;
83 if ch == "'" and not slash then break; end 85 if ch == "'" and not slash then break; end
84 end 86 end
85 return str; 87 return t_concat(str);
86 end 88 end
87 local function readNumber() 89 local function readNumber()
88 local num = read(); 90 local num = { read() };
89 while isNumeric(peek()) do 91 while isNumeric(peek()) do
90 num = num..read(); 92 num[#num+1] = read();
91 end 93 end
92 return tonumber(num); 94 return tonumber(t_concat(num));
93 end 95 end
94 local readItem = nil; 96 local readItem = nil;
95 local function readTuple() 97 local function readTuple()
96 local t = {}; 98 local t = {};
97 local s = ""; -- string representation 99 local s = {}; -- string representation
98 read(); -- read {, or [, or < 100 read(); -- read {, or [, or <
99 while true do 101 while true do
100 local item = readItem(); 102 local item = readItem();
101 if not item then break; end 103 if not item then break; end
102 if type(item) ~= type(0) or item > 255 then 104 if type(item) ~= "number" or item > 255 then
103 s = nil; 105 s = nil;
104 elseif s then 106 elseif s then
105 s = s..string.char(item); 107 s[#s+1] = string_char(item);
106 end 108 end
107 table.insert(t, item); 109 t_insert(t, item);
108 end 110 end
109 read(); -- read }, or ], or > 111 read(); -- read }, or ], or >
110 if s and s ~= "" then 112 if s and #s > 0 then
111 return s 113 return t_concat(s)
112 else 114 else
113 return t 115 return t
114 end; 116 end;
115 end 117 end
116 local function readBinary() 118 local function readBinary()
117 read("<"); -- read < 119 read("<"); -- read <
118 local t = readTuple(); 120 local t = readTuple();
119 read(">") -- read > 121 read(">") -- read >
120 local ch = peek(); 122 local ch = peek();
121 if type(t) == type("") then 123 if type(t) == "string" then
122 -- binary is a list of integers 124 -- binary is a list of integers
123 return t; 125 return t;
124 elseif type(t) == type({}) then 126 elseif type(t) == "table" then
125 if t[1] then 127 if t[1] then
126 -- binary contains string 128 -- binary contains string
127 return t[1]; 129 return t[1];
128 else 130 else
129 -- binary is empty 131 -- binary is empty