Software /
code /
prosody
Comparison
util/json.lua @ 5565:6dd806829226
util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input.
author | Waqas Hussain <waqas20@gmail.com> |
---|---|
date | Tue, 07 May 2013 10:42:44 -0400 |
parent | 5563:678867c552d1 |
child | 5776:bd0ff8ae98a8 |
comparison
equal
deleted
inserted
replaced
5564:1292643ac498 | 5565:6dd806829226 |
---|---|
183 end | 183 end |
184 | 184 |
185 ----------------------------------- | 185 ----------------------------------- |
186 | 186 |
187 | 187 |
188 local function _skip_whitespace(json, index) | |
189 return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t | |
190 end | |
191 local function _fixobject(obj) | |
192 local __array = obj.__array; | |
193 if __array then | |
194 obj.__array = nil; | |
195 for i,v in ipairs(__array) do | |
196 t_insert(obj, v); | |
197 end | |
198 end | |
199 local __hash = obj.__hash; | |
200 if __hash then | |
201 obj.__hash = nil; | |
202 local k; | |
203 for i,v in ipairs(__hash) do | |
204 if k ~= nil then | |
205 obj[k] = v; k = nil; | |
206 else | |
207 k = v; | |
208 end | |
209 end | |
210 end | |
211 return obj; | |
212 end | |
213 local _readvalue, _readstring; | |
214 local function _readobject(json, index) | |
215 local o = {}; | |
216 while true do | |
217 local key, val; | |
218 index = _skip_whitespace(json, index + 1); | |
219 if json:byte(index) ~= 0x22 then -- "\"" | |
220 if json:byte(index) == 0x7d then return o, index + 1; end -- "}" | |
221 return nil, "key expected"; | |
222 end | |
223 key, index = _readstring(json, index); | |
224 if key == nil then return nil, index; end | |
225 index = _skip_whitespace(json, index); | |
226 if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":" | |
227 val, index = _readvalue(json, index + 1); | |
228 if val == nil then return nil, index; end | |
229 o[key] = val; | |
230 index = _skip_whitespace(json, index); | |
231 local b = json:byte(index); | |
232 if b == 0x7d then return _fixobject(o), index + 1; end -- "}" | |
233 if b ~= 0x2c then return nil, "object eof"; end -- "," | |
234 end | |
235 end | |
236 local function _readarray(json, index) | |
237 local a = {}; | |
238 local oindex = index; | |
239 while true do | |
240 local val; | |
241 val, index = _readvalue(json, index + 1); | |
242 if val == nil then | |
243 if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]" | |
244 return val, index; | |
245 end | |
246 t_insert(a, val); | |
247 index = _skip_whitespace(json, index); | |
248 local b = json:byte(index); | |
249 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]" | |
250 if b ~= 0x2c then return nil, "array eof"; end -- "," | |
251 end | |
252 end | |
253 local _unescape_error; | |
254 local function _unescape_surrogate_func(x) | |
255 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16); | |
256 local codepoint = lead * 0x400 + trail - 0x35FDC00; | |
257 local a = codepoint % 64; | |
258 codepoint = (codepoint - a) / 64; | |
259 local b = codepoint % 64; | |
260 codepoint = (codepoint - b) / 64; | |
261 local c = codepoint % 64; | |
262 codepoint = (codepoint - c) / 64; | |
263 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a); | |
264 end | |
265 local function _unescape_func(x) | |
266 x = x:match("%x%x%x%x", 3); | |
267 if x then | |
268 --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair | |
269 return codepoint_to_utf8(tonumber(x, 16)); | |
270 end | |
271 _unescape_error = true; | |
272 end | |
273 function _readstring(json, index) | |
274 index = index + 1; | |
275 local endindex = json:find("\"", index, true); | |
276 if endindex then | |
277 local s = json:sub(index, endindex - 1); | |
278 --if s:find("[%z-\31]") then return nil, "control char in string"; end | |
279 -- FIXME handle control characters | |
280 _unescape_error = nil; | |
281 --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func); | |
282 -- FIXME handle escapes beyond BMP | |
283 s = s:gsub("\\u.?.?.?.?", _unescape_func); | |
284 if _unescape_error then return nil, "invalid escape"; end | |
285 return s, endindex + 1; | |
286 end | |
287 return nil, "string eof"; | |
288 end | |
289 local function _readnumber(json, index) | |
290 local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking | |
291 return tonumber(m), index + #m; | |
292 end | |
293 local function _readnull(json, index) | |
294 local a, b, c = json:byte(index + 1, index + 3); | |
295 if a == 0x75 and b == 0x6c and c == 0x6c then | |
296 return null, index + 4; | |
297 end | |
298 return nil, "null parse failed"; | |
299 end | |
300 local function _readtrue(json, index) | |
301 local a, b, c = json:byte(index + 1, index + 3); | |
302 if a == 0x72 and b == 0x75 and c == 0x65 then | |
303 return true, index + 4; | |
304 end | |
305 return nil, "true parse failed"; | |
306 end | |
307 local function _readfalse(json, index) | |
308 local a, b, c, d = json:byte(index + 1, index + 4); | |
309 if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then | |
310 return false, index + 5; | |
311 end | |
312 return nil, "false parse failed"; | |
313 end | |
314 function _readvalue(json, index) | |
315 index = _skip_whitespace(json, index); | |
316 local b = json:byte(index); | |
317 -- TODO try table lookup instead of if-else? | |
318 if b == 0x7B then -- "{" | |
319 return _readobject(json, index); | |
320 elseif b == 0x5B then -- "[" | |
321 return _readarray(json, index); | |
322 elseif b == 0x22 then -- "\"" | |
323 return _readstring(json, index); | |
324 elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-" | |
325 return _readnumber(json, index); | |
326 elseif b == 0x6e then -- "n" | |
327 return _readnull(json, index); | |
328 elseif b == 0x74 then -- "t" | |
329 return _readtrue(json, index); | |
330 elseif b == 0x66 then -- "f" | |
331 return _readfalse(json, index); | |
332 else | |
333 return nil, "value expected"; | |
334 end | |
335 end | |
336 local first_escape = { | |
337 ["\\\""] = "\\u0022"; | |
338 ["\\\\"] = "\\u005c"; | |
339 ["\\/" ] = "\\u002f"; | |
340 ["\\b" ] = "\\u0008"; | |
341 ["\\f" ] = "\\u000C"; | |
342 ["\\n" ] = "\\u000A"; | |
343 ["\\r" ] = "\\u000D"; | |
344 ["\\t" ] = "\\u0009"; | |
345 ["\\u" ] = "\\u"; | |
346 }; | |
347 | |
188 function json.decode(json) | 348 function json.decode(json) |
189 json = json.." "; -- appending a space ensures valid json wouldn't touch EOF | 349 json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler |
190 local pos = 1; | 350 --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings |
191 local current = {}; | |
192 local stack = {}; | |
193 local ch, peek; | |
194 local function next() | |
195 ch = json:sub(pos, pos); | |
196 if ch == "" then error("Unexpected EOF"); end | |
197 pos = pos+1; | |
198 peek = json:sub(pos, pos); | |
199 return ch; | |
200 end | |
201 | 351 |
202 local function skipwhitespace() | 352 -- TODO do encoding verification |
203 while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do | |
204 next(); | |
205 end | |
206 end | |
207 local function skiplinecomment() | |
208 repeat next(); until not(ch) or ch == "\r" or ch == "\n"; | |
209 skipwhitespace(); | |
210 end | |
211 local function skipstarcomment() | |
212 next(); next(); -- skip '/', '*' | |
213 while peek and ch ~= "*" and peek ~= "/" do next(); end | |
214 if not peek then error("eof in star comment") end | |
215 next(); next(); -- skip '*', '/' | |
216 skipwhitespace(); | |
217 end | |
218 local function skipstuff() | |
219 while true do | |
220 skipwhitespace(); | |
221 if ch == "/" and peek == "*" then | |
222 skipstarcomment(); | |
223 elseif ch == "/" and peek == "/" then | |
224 skiplinecomment(); | |
225 else | |
226 return; | |
227 end | |
228 end | |
229 end | |
230 | 353 |
231 local readvalue; | 354 local val, index = _readvalue(json, 1); |
232 local function readarray() | 355 if val == nil then return val, index; end |
233 local t = setmetatable({}, array_mt); | 356 if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end |
234 next(); -- skip '[' | 357 |
235 skipstuff(); | 358 return val; |
236 if ch == "]" then next(); return t; end | |
237 t_insert(t, readvalue()); | |
238 while true do | |
239 skipstuff(); | |
240 if ch == "]" then next(); return t; end | |
241 if not ch then error("eof while reading array"); | |
242 elseif ch == "," then next(); | |
243 elseif ch then error("unexpected character in array, comma expected"); end | |
244 if not ch then error("eof while reading array"); end | |
245 t_insert(t, readvalue()); | |
246 end | |
247 end | |
248 | |
249 local function checkandskip(c) | |
250 local x = ch or "eof"; | |
251 if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end | |
252 next(); | |
253 end | |
254 local function readliteral(lit, val) | |
255 for c in lit:gmatch(".") do | |
256 checkandskip(c); | |
257 end | |
258 return val; | |
259 end | |
260 local function readstring() | |
261 local s = {}; | |
262 checkandskip("\""); | |
263 while ch do | |
264 while ch and ch ~= "\\" and ch ~= "\"" do | |
265 t_insert(s, ch); next(); | |
266 end | |
267 if ch == "\\" then | |
268 next(); | |
269 if unescapes[ch] then | |
270 t_insert(s, unescapes[ch]); | |
271 next(); | |
272 elseif ch == "u" then | |
273 local seq = ""; | |
274 for i=1,4 do | |
275 next(); | |
276 if not ch then error("unexpected eof in string"); end | |
277 if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end | |
278 seq = seq..ch; | |
279 end | |
280 t_insert(s, codepoint_to_utf8(tonumber(seq, 16))); | |
281 next(); | |
282 else error("invalid escape sequence in string"); end | |
283 end | |
284 if ch == "\"" then | |
285 next(); | |
286 return t_concat(s); | |
287 end | |
288 end | |
289 error("eof while reading string"); | |
290 end | |
291 local function readnumber() | |
292 local s = ""; | |
293 if ch == "-" then | |
294 s = s..ch; next(); | |
295 if not ch:match("[0-9]") then error("number format error"); end | |
296 end | |
297 if ch == "0" then | |
298 s = s..ch; next(); | |
299 if ch:match("[0-9]") then error("number format error"); end | |
300 else | |
301 while ch and ch:match("[0-9]") do | |
302 s = s..ch; next(); | |
303 end | |
304 end | |
305 if ch == "." then | |
306 s = s..ch; next(); | |
307 if not ch:match("[0-9]") then error("number format error"); end | |
308 while ch and ch:match("[0-9]") do | |
309 s = s..ch; next(); | |
310 end | |
311 if ch == "e" or ch == "E" then | |
312 s = s..ch; next(); | |
313 if ch == "+" or ch == "-" then | |
314 s = s..ch; next(); | |
315 if not ch:match("[0-9]") then error("number format error"); end | |
316 while ch and ch:match("[0-9]") do | |
317 s = s..ch; next(); | |
318 end | |
319 end | |
320 end | |
321 end | |
322 return tonumber(s); | |
323 end | |
324 local function readmember(t) | |
325 skipstuff(); | |
326 local k = readstring(); | |
327 skipstuff(); | |
328 checkandskip(":"); | |
329 t[k] = readvalue(); | |
330 end | |
331 local function fixobject(obj) | |
332 local __array = obj.__array; | |
333 if __array then | |
334 obj.__array = nil; | |
335 for i,v in ipairs(__array) do | |
336 t_insert(obj, v); | |
337 end | |
338 end | |
339 local __hash = obj.__hash; | |
340 if __hash then | |
341 obj.__hash = nil; | |
342 local k; | |
343 for i,v in ipairs(__hash) do | |
344 if k ~= nil then | |
345 obj[k] = v; k = nil; | |
346 else | |
347 k = v; | |
348 end | |
349 end | |
350 end | |
351 return obj; | |
352 end | |
353 local function readobject() | |
354 local t = {}; | |
355 next(); -- skip '{' | |
356 skipstuff(); | |
357 if ch == "}" then next(); return t; end | |
358 if not ch then error("eof while reading object"); end | |
359 readmember(t); | |
360 while true do | |
361 skipstuff(); | |
362 if ch == "}" then next(); return fixobject(t); end | |
363 if not ch then error("eof while reading object"); | |
364 elseif ch == "," then next(); | |
365 elseif ch then error("unexpected character in object, comma expected"); end | |
366 if not ch then error("eof while reading object"); end | |
367 readmember(t); | |
368 end | |
369 end | |
370 | |
371 function readvalue() | |
372 skipstuff(); | |
373 while ch do | |
374 if ch == "{" then | |
375 return readobject(); | |
376 elseif ch == "[" then | |
377 return readarray(); | |
378 elseif ch == "\"" then | |
379 return readstring(); | |
380 elseif ch:match("[%-0-9%.]") then | |
381 return readnumber(); | |
382 elseif ch == "n" then | |
383 return readliteral("null", null); | |
384 elseif ch == "t" then | |
385 return readliteral("true", true); | |
386 elseif ch == "f" then | |
387 return readliteral("false", false); | |
388 else | |
389 error("invalid character at value start: "..ch); | |
390 end | |
391 end | |
392 error("eof while reading value"); | |
393 end | |
394 next(); | |
395 return readvalue(); | |
396 end | 359 end |
397 | 360 |
398 function json.test(object) | 361 function json.test(object) |
399 local encoded = json.encode(object); | 362 local encoded = json.encode(object); |
400 local decoded = json.decode(encoded); | 363 local decoded = json.decode(encoded); |