Software / code / prosody
Comparison
util/json.lua @ 5565:6dd806829226
util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input.
| author | Waqas Hussain <waqas20@gmail.com> |
|---|---|
| date | Tue, 07 May 2013 10:42:44 -0400 |
| parent | 5563:678867c552d1 |
| child | 5776:bd0ff8ae98a8 |
comparison
equal
deleted
inserted
replaced
| 5564:1292643ac498 | 5565:6dd806829226 |
|---|---|
| 183 end | 183 end |
| 184 | 184 |
| 185 ----------------------------------- | 185 ----------------------------------- |
| 186 | 186 |
| 187 | 187 |
| 188 local function _skip_whitespace(json, index) | |
| 189 return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t | |
| 190 end | |
| 191 local function _fixobject(obj) | |
| 192 local __array = obj.__array; | |
| 193 if __array then | |
| 194 obj.__array = nil; | |
| 195 for i,v in ipairs(__array) do | |
| 196 t_insert(obj, v); | |
| 197 end | |
| 198 end | |
| 199 local __hash = obj.__hash; | |
| 200 if __hash then | |
| 201 obj.__hash = nil; | |
| 202 local k; | |
| 203 for i,v in ipairs(__hash) do | |
| 204 if k ~= nil then | |
| 205 obj[k] = v; k = nil; | |
| 206 else | |
| 207 k = v; | |
| 208 end | |
| 209 end | |
| 210 end | |
| 211 return obj; | |
| 212 end | |
| 213 local _readvalue, _readstring; | |
| 214 local function _readobject(json, index) | |
| 215 local o = {}; | |
| 216 while true do | |
| 217 local key, val; | |
| 218 index = _skip_whitespace(json, index + 1); | |
| 219 if json:byte(index) ~= 0x22 then -- "\"" | |
| 220 if json:byte(index) == 0x7d then return o, index + 1; end -- "}" | |
| 221 return nil, "key expected"; | |
| 222 end | |
| 223 key, index = _readstring(json, index); | |
| 224 if key == nil then return nil, index; end | |
| 225 index = _skip_whitespace(json, index); | |
| 226 if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":" | |
| 227 val, index = _readvalue(json, index + 1); | |
| 228 if val == nil then return nil, index; end | |
| 229 o[key] = val; | |
| 230 index = _skip_whitespace(json, index); | |
| 231 local b = json:byte(index); | |
| 232 if b == 0x7d then return _fixobject(o), index + 1; end -- "}" | |
| 233 if b ~= 0x2c then return nil, "object eof"; end -- "," | |
| 234 end | |
| 235 end | |
| 236 local function _readarray(json, index) | |
| 237 local a = {}; | |
| 238 local oindex = index; | |
| 239 while true do | |
| 240 local val; | |
| 241 val, index = _readvalue(json, index + 1); | |
| 242 if val == nil then | |
| 243 if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]" | |
| 244 return val, index; | |
| 245 end | |
| 246 t_insert(a, val); | |
| 247 index = _skip_whitespace(json, index); | |
| 248 local b = json:byte(index); | |
| 249 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]" | |
| 250 if b ~= 0x2c then return nil, "array eof"; end -- "," | |
| 251 end | |
| 252 end | |
| 253 local _unescape_error; | |
| 254 local function _unescape_surrogate_func(x) | |
| 255 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16); | |
| 256 local codepoint = lead * 0x400 + trail - 0x35FDC00; | |
| 257 local a = codepoint % 64; | |
| 258 codepoint = (codepoint - a) / 64; | |
| 259 local b = codepoint % 64; | |
| 260 codepoint = (codepoint - b) / 64; | |
| 261 local c = codepoint % 64; | |
| 262 codepoint = (codepoint - c) / 64; | |
| 263 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a); | |
| 264 end | |
| 265 local function _unescape_func(x) | |
| 266 x = x:match("%x%x%x%x", 3); | |
| 267 if x then | |
| 268 --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair | |
| 269 return codepoint_to_utf8(tonumber(x, 16)); | |
| 270 end | |
| 271 _unescape_error = true; | |
| 272 end | |
| 273 function _readstring(json, index) | |
| 274 index = index + 1; | |
| 275 local endindex = json:find("\"", index, true); | |
| 276 if endindex then | |
| 277 local s = json:sub(index, endindex - 1); | |
| 278 --if s:find("[%z-\31]") then return nil, "control char in string"; end | |
| 279 -- FIXME handle control characters | |
| 280 _unescape_error = nil; | |
| 281 --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func); | |
| 282 -- FIXME handle escapes beyond BMP | |
| 283 s = s:gsub("\\u.?.?.?.?", _unescape_func); | |
| 284 if _unescape_error then return nil, "invalid escape"; end | |
| 285 return s, endindex + 1; | |
| 286 end | |
| 287 return nil, "string eof"; | |
| 288 end | |
| 289 local function _readnumber(json, index) | |
| 290 local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking | |
| 291 return tonumber(m), index + #m; | |
| 292 end | |
| 293 local function _readnull(json, index) | |
| 294 local a, b, c = json:byte(index + 1, index + 3); | |
| 295 if a == 0x75 and b == 0x6c and c == 0x6c then | |
| 296 return null, index + 4; | |
| 297 end | |
| 298 return nil, "null parse failed"; | |
| 299 end | |
| 300 local function _readtrue(json, index) | |
| 301 local a, b, c = json:byte(index + 1, index + 3); | |
| 302 if a == 0x72 and b == 0x75 and c == 0x65 then | |
| 303 return true, index + 4; | |
| 304 end | |
| 305 return nil, "true parse failed"; | |
| 306 end | |
| 307 local function _readfalse(json, index) | |
| 308 local a, b, c, d = json:byte(index + 1, index + 4); | |
| 309 if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then | |
| 310 return false, index + 5; | |
| 311 end | |
| 312 return nil, "false parse failed"; | |
| 313 end | |
| 314 function _readvalue(json, index) | |
| 315 index = _skip_whitespace(json, index); | |
| 316 local b = json:byte(index); | |
| 317 -- TODO try table lookup instead of if-else? | |
| 318 if b == 0x7B then -- "{" | |
| 319 return _readobject(json, index); | |
| 320 elseif b == 0x5B then -- "[" | |
| 321 return _readarray(json, index); | |
| 322 elseif b == 0x22 then -- "\"" | |
| 323 return _readstring(json, index); | |
| 324 elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-" | |
| 325 return _readnumber(json, index); | |
| 326 elseif b == 0x6e then -- "n" | |
| 327 return _readnull(json, index); | |
| 328 elseif b == 0x74 then -- "t" | |
| 329 return _readtrue(json, index); | |
| 330 elseif b == 0x66 then -- "f" | |
| 331 return _readfalse(json, index); | |
| 332 else | |
| 333 return nil, "value expected"; | |
| 334 end | |
| 335 end | |
| 336 local first_escape = { | |
| 337 ["\\\""] = "\\u0022"; | |
| 338 ["\\\\"] = "\\u005c"; | |
| 339 ["\\/" ] = "\\u002f"; | |
| 340 ["\\b" ] = "\\u0008"; | |
| 341 ["\\f" ] = "\\u000C"; | |
| 342 ["\\n" ] = "\\u000A"; | |
| 343 ["\\r" ] = "\\u000D"; | |
| 344 ["\\t" ] = "\\u0009"; | |
| 345 ["\\u" ] = "\\u"; | |
| 346 }; | |
| 347 | |
| 188 function json.decode(json) | 348 function json.decode(json) |
| 189 json = json.." "; -- appending a space ensures valid json wouldn't touch EOF | 349 json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler |
| 190 local pos = 1; | 350 --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings |
| 191 local current = {}; | |
| 192 local stack = {}; | |
| 193 local ch, peek; | |
| 194 local function next() | |
| 195 ch = json:sub(pos, pos); | |
| 196 if ch == "" then error("Unexpected EOF"); end | |
| 197 pos = pos+1; | |
| 198 peek = json:sub(pos, pos); | |
| 199 return ch; | |
| 200 end | |
| 201 | 351 |
| 202 local function skipwhitespace() | 352 -- TODO do encoding verification |
| 203 while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do | |
| 204 next(); | |
| 205 end | |
| 206 end | |
| 207 local function skiplinecomment() | |
| 208 repeat next(); until not(ch) or ch == "\r" or ch == "\n"; | |
| 209 skipwhitespace(); | |
| 210 end | |
| 211 local function skipstarcomment() | |
| 212 next(); next(); -- skip '/', '*' | |
| 213 while peek and ch ~= "*" and peek ~= "/" do next(); end | |
| 214 if not peek then error("eof in star comment") end | |
| 215 next(); next(); -- skip '*', '/' | |
| 216 skipwhitespace(); | |
| 217 end | |
| 218 local function skipstuff() | |
| 219 while true do | |
| 220 skipwhitespace(); | |
| 221 if ch == "/" and peek == "*" then | |
| 222 skipstarcomment(); | |
| 223 elseif ch == "/" and peek == "/" then | |
| 224 skiplinecomment(); | |
| 225 else | |
| 226 return; | |
| 227 end | |
| 228 end | |
| 229 end | |
| 230 | 353 |
| 231 local readvalue; | 354 local val, index = _readvalue(json, 1); |
| 232 local function readarray() | 355 if val == nil then return val, index; end |
| 233 local t = setmetatable({}, array_mt); | 356 if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end |
| 234 next(); -- skip '[' | 357 |
| 235 skipstuff(); | 358 return val; |
| 236 if ch == "]" then next(); return t; end | |
| 237 t_insert(t, readvalue()); | |
| 238 while true do | |
| 239 skipstuff(); | |
| 240 if ch == "]" then next(); return t; end | |
| 241 if not ch then error("eof while reading array"); | |
| 242 elseif ch == "," then next(); | |
| 243 elseif ch then error("unexpected character in array, comma expected"); end | |
| 244 if not ch then error("eof while reading array"); end | |
| 245 t_insert(t, readvalue()); | |
| 246 end | |
| 247 end | |
| 248 | |
| 249 local function checkandskip(c) | |
| 250 local x = ch or "eof"; | |
| 251 if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end | |
| 252 next(); | |
| 253 end | |
| 254 local function readliteral(lit, val) | |
| 255 for c in lit:gmatch(".") do | |
| 256 checkandskip(c); | |
| 257 end | |
| 258 return val; | |
| 259 end | |
| 260 local function readstring() | |
| 261 local s = {}; | |
| 262 checkandskip("\""); | |
| 263 while ch do | |
| 264 while ch and ch ~= "\\" and ch ~= "\"" do | |
| 265 t_insert(s, ch); next(); | |
| 266 end | |
| 267 if ch == "\\" then | |
| 268 next(); | |
| 269 if unescapes[ch] then | |
| 270 t_insert(s, unescapes[ch]); | |
| 271 next(); | |
| 272 elseif ch == "u" then | |
| 273 local seq = ""; | |
| 274 for i=1,4 do | |
| 275 next(); | |
| 276 if not ch then error("unexpected eof in string"); end | |
| 277 if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end | |
| 278 seq = seq..ch; | |
| 279 end | |
| 280 t_insert(s, codepoint_to_utf8(tonumber(seq, 16))); | |
| 281 next(); | |
| 282 else error("invalid escape sequence in string"); end | |
| 283 end | |
| 284 if ch == "\"" then | |
| 285 next(); | |
| 286 return t_concat(s); | |
| 287 end | |
| 288 end | |
| 289 error("eof while reading string"); | |
| 290 end | |
| 291 local function readnumber() | |
| 292 local s = ""; | |
| 293 if ch == "-" then | |
| 294 s = s..ch; next(); | |
| 295 if not ch:match("[0-9]") then error("number format error"); end | |
| 296 end | |
| 297 if ch == "0" then | |
| 298 s = s..ch; next(); | |
| 299 if ch:match("[0-9]") then error("number format error"); end | |
| 300 else | |
| 301 while ch and ch:match("[0-9]") do | |
| 302 s = s..ch; next(); | |
| 303 end | |
| 304 end | |
| 305 if ch == "." then | |
| 306 s = s..ch; next(); | |
| 307 if not ch:match("[0-9]") then error("number format error"); end | |
| 308 while ch and ch:match("[0-9]") do | |
| 309 s = s..ch; next(); | |
| 310 end | |
| 311 if ch == "e" or ch == "E" then | |
| 312 s = s..ch; next(); | |
| 313 if ch == "+" or ch == "-" then | |
| 314 s = s..ch; next(); | |
| 315 if not ch:match("[0-9]") then error("number format error"); end | |
| 316 while ch and ch:match("[0-9]") do | |
| 317 s = s..ch; next(); | |
| 318 end | |
| 319 end | |
| 320 end | |
| 321 end | |
| 322 return tonumber(s); | |
| 323 end | |
| 324 local function readmember(t) | |
| 325 skipstuff(); | |
| 326 local k = readstring(); | |
| 327 skipstuff(); | |
| 328 checkandskip(":"); | |
| 329 t[k] = readvalue(); | |
| 330 end | |
| 331 local function fixobject(obj) | |
| 332 local __array = obj.__array; | |
| 333 if __array then | |
| 334 obj.__array = nil; | |
| 335 for i,v in ipairs(__array) do | |
| 336 t_insert(obj, v); | |
| 337 end | |
| 338 end | |
| 339 local __hash = obj.__hash; | |
| 340 if __hash then | |
| 341 obj.__hash = nil; | |
| 342 local k; | |
| 343 for i,v in ipairs(__hash) do | |
| 344 if k ~= nil then | |
| 345 obj[k] = v; k = nil; | |
| 346 else | |
| 347 k = v; | |
| 348 end | |
| 349 end | |
| 350 end | |
| 351 return obj; | |
| 352 end | |
| 353 local function readobject() | |
| 354 local t = {}; | |
| 355 next(); -- skip '{' | |
| 356 skipstuff(); | |
| 357 if ch == "}" then next(); return t; end | |
| 358 if not ch then error("eof while reading object"); end | |
| 359 readmember(t); | |
| 360 while true do | |
| 361 skipstuff(); | |
| 362 if ch == "}" then next(); return fixobject(t); end | |
| 363 if not ch then error("eof while reading object"); | |
| 364 elseif ch == "," then next(); | |
| 365 elseif ch then error("unexpected character in object, comma expected"); end | |
| 366 if not ch then error("eof while reading object"); end | |
| 367 readmember(t); | |
| 368 end | |
| 369 end | |
| 370 | |
| 371 function readvalue() | |
| 372 skipstuff(); | |
| 373 while ch do | |
| 374 if ch == "{" then | |
| 375 return readobject(); | |
| 376 elseif ch == "[" then | |
| 377 return readarray(); | |
| 378 elseif ch == "\"" then | |
| 379 return readstring(); | |
| 380 elseif ch:match("[%-0-9%.]") then | |
| 381 return readnumber(); | |
| 382 elseif ch == "n" then | |
| 383 return readliteral("null", null); | |
| 384 elseif ch == "t" then | |
| 385 return readliteral("true", true); | |
| 386 elseif ch == "f" then | |
| 387 return readliteral("false", false); | |
| 388 else | |
| 389 error("invalid character at value start: "..ch); | |
| 390 end | |
| 391 end | |
| 392 error("eof while reading value"); | |
| 393 end | |
| 394 next(); | |
| 395 return readvalue(); | |
| 396 end | 359 end |
| 397 | 360 |
| 398 function json.test(object) | 361 function json.test(object) |
| 399 local encoded = json.encode(object); | 362 local encoded = json.encode(object); |
| 400 local decoded = json.decode(encoded); | 363 local decoded = json.decode(encoded); |