Software /
code /
prosody
Comparison
util/json.lua @ 8697:c60fdf148118
util.json: Unescape surrogate pairs
author | Matthew Wild <mwild1@gmail.com> |
---|---|
date | Tue, 27 Mar 2018 13:44:40 +0100 |
parent | 8382:e5d00bf4a4d5 |
child | 9329:19bc3ec13f07 |
comparison
equal
deleted
inserted
replaced
8696:164da3186511 | 8697:c60fdf148118 |
---|---|
244 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]" | 244 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]" |
245 if b ~= 0x2c then return nil, "array eof"; end -- "," | 245 if b ~= 0x2c then return nil, "array eof"; end -- "," |
246 end | 246 end |
247 end | 247 end |
248 local _unescape_error; | 248 local _unescape_error; |
249 local function _unescape_surrogate_func(x) -- luacheck: ignore | 249 local function _unescape_surrogate_func(x) |
250 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16); | 250 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16); |
251 local codepoint = lead * 0x400 + trail - 0x35FDC00; | 251 local codepoint = lead * 0x400 + trail - 0x35FDC00; |
252 local a = codepoint % 64; | 252 local a = codepoint % 64; |
253 codepoint = (codepoint - a) / 64; | 253 codepoint = (codepoint - a) / 64; |
254 local b = codepoint % 64; | 254 local b = codepoint % 64; |
258 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a); | 258 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a); |
259 end | 259 end |
260 local function _unescape_func(x) | 260 local function _unescape_func(x) |
261 x = x:match("%x%x%x%x", 3); | 261 x = x:match("%x%x%x%x", 3); |
262 if x then | 262 if x then |
263 --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair | 263 local codepoint = tonumber(x, 16) |
264 return codepoint_to_utf8(tonumber(x, 16)); | 264 if codepoint >= 0xD800 and codepoint <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair |
265 return codepoint_to_utf8(codepoint); | |
265 end | 266 end |
266 _unescape_error = true; | 267 _unescape_error = true; |
267 end | 268 end |
268 function _readstring(json, index) | 269 function _readstring(json, index) |
269 index = index + 1; | 270 index = index + 1; |
271 if endindex then | 272 if endindex then |
272 local s = json:sub(index, endindex - 1); | 273 local s = json:sub(index, endindex - 1); |
273 --if s:find("[%z-\31]") then return nil, "control char in string"; end | 274 --if s:find("[%z-\31]") then return nil, "control char in string"; end |
274 -- FIXME handle control characters | 275 -- FIXME handle control characters |
275 _unescape_error = nil; | 276 _unescape_error = nil; |
276 --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func); | 277 s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func); |
277 -- FIXME handle escapes beyond BMP | 278 -- FIXME handle escapes beyond BMP |
278 s = s:gsub("\\u.?.?.?.?", _unescape_func); | 279 s = s:gsub("\\u.?.?.?.?", _unescape_func); |
279 if _unescape_error then return nil, "invalid escape"; end | 280 if _unescape_error then return nil, "invalid escape"; end |
280 return s, endindex + 1; | 281 return s, endindex + 1; |
281 end | 282 end |