Comparison

util/json.lua @ 5565:6dd806829226

util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input.
author Waqas Hussain <waqas20@gmail.com>
date Tue, 07 May 2013 10:42:44 -0400
parent 5563:678867c552d1
child 5776:bd0ff8ae98a8
comparison
equal deleted inserted replaced
5564:1292643ac498 5565:6dd806829226
183 end 183 end
184 184
185 ----------------------------------- 185 -----------------------------------
186 186
187 187
188 local function _skip_whitespace(json, index)
189 return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t
190 end
191 local function _fixobject(obj)
192 local __array = obj.__array;
193 if __array then
194 obj.__array = nil;
195 for i,v in ipairs(__array) do
196 t_insert(obj, v);
197 end
198 end
199 local __hash = obj.__hash;
200 if __hash then
201 obj.__hash = nil;
202 local k;
203 for i,v in ipairs(__hash) do
204 if k ~= nil then
205 obj[k] = v; k = nil;
206 else
207 k = v;
208 end
209 end
210 end
211 return obj;
212 end
213 local _readvalue, _readstring;
214 local function _readobject(json, index)
215 local o = {};
216 while true do
217 local key, val;
218 index = _skip_whitespace(json, index + 1);
219 if json:byte(index) ~= 0x22 then -- "\""
220 if json:byte(index) == 0x7d then return o, index + 1; end -- "}"
221 return nil, "key expected";
222 end
223 key, index = _readstring(json, index);
224 if key == nil then return nil, index; end
225 index = _skip_whitespace(json, index);
226 if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":"
227 val, index = _readvalue(json, index + 1);
228 if val == nil then return nil, index; end
229 o[key] = val;
230 index = _skip_whitespace(json, index);
231 local b = json:byte(index);
232 if b == 0x7d then return _fixobject(o), index + 1; end -- "}"
233 if b ~= 0x2c then return nil, "object eof"; end -- ","
234 end
235 end
236 local function _readarray(json, index)
237 local a = {};
238 local oindex = index;
239 while true do
240 local val;
241 val, index = _readvalue(json, index + 1);
242 if val == nil then
243 if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]"
244 return val, index;
245 end
246 t_insert(a, val);
247 index = _skip_whitespace(json, index);
248 local b = json:byte(index);
249 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
250 if b ~= 0x2c then return nil, "array eof"; end -- ","
251 end
252 end
253 local _unescape_error;
254 local function _unescape_surrogate_func(x)
255 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
256 local codepoint = lead * 0x400 + trail - 0x35FDC00;
257 local a = codepoint % 64;
258 codepoint = (codepoint - a) / 64;
259 local b = codepoint % 64;
260 codepoint = (codepoint - b) / 64;
261 local c = codepoint % 64;
262 codepoint = (codepoint - c) / 64;
263 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
264 end
265 local function _unescape_func(x)
266 x = x:match("%x%x%x%x", 3);
267 if x then
268 --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
269 return codepoint_to_utf8(tonumber(x, 16));
270 end
271 _unescape_error = true;
272 end
273 function _readstring(json, index)
274 index = index + 1;
275 local endindex = json:find("\"", index, true);
276 if endindex then
277 local s = json:sub(index, endindex - 1);
278 --if s:find("[%z-\31]") then return nil, "control char in string"; end
279 -- FIXME handle control characters
280 _unescape_error = nil;
281 --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
282 -- FIXME handle escapes beyond BMP
283 s = s:gsub("\\u.?.?.?.?", _unescape_func);
284 if _unescape_error then return nil, "invalid escape"; end
285 return s, endindex + 1;
286 end
287 return nil, "string eof";
288 end
289 local function _readnumber(json, index)
290 local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking
291 return tonumber(m), index + #m;
292 end
293 local function _readnull(json, index)
294 local a, b, c = json:byte(index + 1, index + 3);
295 if a == 0x75 and b == 0x6c and c == 0x6c then
296 return null, index + 4;
297 end
298 return nil, "null parse failed";
299 end
300 local function _readtrue(json, index)
301 local a, b, c = json:byte(index + 1, index + 3);
302 if a == 0x72 and b == 0x75 and c == 0x65 then
303 return true, index + 4;
304 end
305 return nil, "true parse failed";
306 end
307 local function _readfalse(json, index)
308 local a, b, c, d = json:byte(index + 1, index + 4);
309 if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then
310 return false, index + 5;
311 end
312 return nil, "false parse failed";
313 end
314 function _readvalue(json, index)
315 index = _skip_whitespace(json, index);
316 local b = json:byte(index);
317 -- TODO try table lookup instead of if-else?
318 if b == 0x7B then -- "{"
319 return _readobject(json, index);
320 elseif b == 0x5B then -- "["
321 return _readarray(json, index);
322 elseif b == 0x22 then -- "\""
323 return _readstring(json, index);
324 elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-"
325 return _readnumber(json, index);
326 elseif b == 0x6e then -- "n"
327 return _readnull(json, index);
328 elseif b == 0x74 then -- "t"
329 return _readtrue(json, index);
330 elseif b == 0x66 then -- "f"
331 return _readfalse(json, index);
332 else
333 return nil, "value expected";
334 end
335 end
336 local first_escape = {
337 ["\\\""] = "\\u0022";
338 ["\\\\"] = "\\u005c";
339 ["\\/" ] = "\\u002f";
340 ["\\b" ] = "\\u0008";
341 ["\\f" ] = "\\u000C";
342 ["\\n" ] = "\\u000A";
343 ["\\r" ] = "\\u000D";
344 ["\\t" ] = "\\u0009";
345 ["\\u" ] = "\\u";
346 };
347
188 function json.decode(json) 348 function json.decode(json)
189 json = json.." "; -- appending a space ensures valid json wouldn't touch EOF 349 json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler
190 local pos = 1; 350 --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings
191 local current = {};
192 local stack = {};
193 local ch, peek;
194 local function next()
195 ch = json:sub(pos, pos);
196 if ch == "" then error("Unexpected EOF"); end
197 pos = pos+1;
198 peek = json:sub(pos, pos);
199 return ch;
200 end
201 351
202 local function skipwhitespace() 352 -- TODO do encoding verification
203 while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
204 next();
205 end
206 end
207 local function skiplinecomment()
208 repeat next(); until not(ch) or ch == "\r" or ch == "\n";
209 skipwhitespace();
210 end
211 local function skipstarcomment()
212 next(); next(); -- skip '/', '*'
213 while peek and ch ~= "*" and peek ~= "/" do next(); end
214 if not peek then error("eof in star comment") end
215 next(); next(); -- skip '*', '/'
216 skipwhitespace();
217 end
218 local function skipstuff()
219 while true do
220 skipwhitespace();
221 if ch == "/" and peek == "*" then
222 skipstarcomment();
223 elseif ch == "/" and peek == "/" then
224 skiplinecomment();
225 else
226 return;
227 end
228 end
229 end
230 353
231 local readvalue; 354 local val, index = _readvalue(json, 1);
232 local function readarray() 355 if val == nil then return val, index; end
233 local t = setmetatable({}, array_mt); 356 if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end
234 next(); -- skip '[' 357
235 skipstuff(); 358 return val;
236 if ch == "]" then next(); return t; end
237 t_insert(t, readvalue());
238 while true do
239 skipstuff();
240 if ch == "]" then next(); return t; end
241 if not ch then error("eof while reading array");
242 elseif ch == "," then next();
243 elseif ch then error("unexpected character in array, comma expected"); end
244 if not ch then error("eof while reading array"); end
245 t_insert(t, readvalue());
246 end
247 end
248
249 local function checkandskip(c)
250 local x = ch or "eof";
251 if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
252 next();
253 end
254 local function readliteral(lit, val)
255 for c in lit:gmatch(".") do
256 checkandskip(c);
257 end
258 return val;
259 end
260 local function readstring()
261 local s = {};
262 checkandskip("\"");
263 while ch do
264 while ch and ch ~= "\\" and ch ~= "\"" do
265 t_insert(s, ch); next();
266 end
267 if ch == "\\" then
268 next();
269 if unescapes[ch] then
270 t_insert(s, unescapes[ch]);
271 next();
272 elseif ch == "u" then
273 local seq = "";
274 for i=1,4 do
275 next();
276 if not ch then error("unexpected eof in string"); end
277 if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
278 seq = seq..ch;
279 end
280 t_insert(s, codepoint_to_utf8(tonumber(seq, 16)));
281 next();
282 else error("invalid escape sequence in string"); end
283 end
284 if ch == "\"" then
285 next();
286 return t_concat(s);
287 end
288 end
289 error("eof while reading string");
290 end
291 local function readnumber()
292 local s = "";
293 if ch == "-" then
294 s = s..ch; next();
295 if not ch:match("[0-9]") then error("number format error"); end
296 end
297 if ch == "0" then
298 s = s..ch; next();
299 if ch:match("[0-9]") then error("number format error"); end
300 else
301 while ch and ch:match("[0-9]") do
302 s = s..ch; next();
303 end
304 end
305 if ch == "." then
306 s = s..ch; next();
307 if not ch:match("[0-9]") then error("number format error"); end
308 while ch and ch:match("[0-9]") do
309 s = s..ch; next();
310 end
311 if ch == "e" or ch == "E" then
312 s = s..ch; next();
313 if ch == "+" or ch == "-" then
314 s = s..ch; next();
315 if not ch:match("[0-9]") then error("number format error"); end
316 while ch and ch:match("[0-9]") do
317 s = s..ch; next();
318 end
319 end
320 end
321 end
322 return tonumber(s);
323 end
324 local function readmember(t)
325 skipstuff();
326 local k = readstring();
327 skipstuff();
328 checkandskip(":");
329 t[k] = readvalue();
330 end
331 local function fixobject(obj)
332 local __array = obj.__array;
333 if __array then
334 obj.__array = nil;
335 for i,v in ipairs(__array) do
336 t_insert(obj, v);
337 end
338 end
339 local __hash = obj.__hash;
340 if __hash then
341 obj.__hash = nil;
342 local k;
343 for i,v in ipairs(__hash) do
344 if k ~= nil then
345 obj[k] = v; k = nil;
346 else
347 k = v;
348 end
349 end
350 end
351 return obj;
352 end
353 local function readobject()
354 local t = {};
355 next(); -- skip '{'
356 skipstuff();
357 if ch == "}" then next(); return t; end
358 if not ch then error("eof while reading object"); end
359 readmember(t);
360 while true do
361 skipstuff();
362 if ch == "}" then next(); return fixobject(t); end
363 if not ch then error("eof while reading object");
364 elseif ch == "," then next();
365 elseif ch then error("unexpected character in object, comma expected"); end
366 if not ch then error("eof while reading object"); end
367 readmember(t);
368 end
369 end
370
371 function readvalue()
372 skipstuff();
373 while ch do
374 if ch == "{" then
375 return readobject();
376 elseif ch == "[" then
377 return readarray();
378 elseif ch == "\"" then
379 return readstring();
380 elseif ch:match("[%-0-9%.]") then
381 return readnumber();
382 elseif ch == "n" then
383 return readliteral("null", null);
384 elseif ch == "t" then
385 return readliteral("true", true);
386 elseif ch == "f" then
387 return readliteral("false", false);
388 else
389 error("invalid character at value start: "..ch);
390 end
391 end
392 error("eof while reading value");
393 end
394 next();
395 return readvalue();
396 end 359 end
397 360
398 function json.test(object) 361 function json.test(object)
399 local encoded = json.encode(object); 362 local encoded = json.encode(object);
400 local decoded = json.decode(encoded); 363 local decoded = json.decode(encoded);