Comparison

util/json.lua @ 8697:c60fdf148118

util.json: Unescape surrogate pairs
author Matthew Wild <mwild1@gmail.com>
date Tue, 27 Mar 2018 13:44:40 +0100
parent 8382:e5d00bf4a4d5
child 9329:19bc3ec13f07
comparison
equal deleted inserted replaced
8696:164da3186511 8697:c60fdf148118
244 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]" 244 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
245 if b ~= 0x2c then return nil, "array eof"; end -- "," 245 if b ~= 0x2c then return nil, "array eof"; end -- ","
246 end 246 end
247 end 247 end
248 local _unescape_error; 248 local _unescape_error;
249 local function _unescape_surrogate_func(x) -- luacheck: ignore 249 local function _unescape_surrogate_func(x)
250 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16); 250 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
251 local codepoint = lead * 0x400 + trail - 0x35FDC00; 251 local codepoint = lead * 0x400 + trail - 0x35FDC00;
252 local a = codepoint % 64; 252 local a = codepoint % 64;
253 codepoint = (codepoint - a) / 64; 253 codepoint = (codepoint - a) / 64;
254 local b = codepoint % 64; 254 local b = codepoint % 64;
258 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a); 258 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
259 end 259 end
260 local function _unescape_func(x) 260 local function _unescape_func(x)
261 x = x:match("%x%x%x%x", 3); 261 x = x:match("%x%x%x%x", 3);
262 if x then 262 if x then
263 --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair 263 local codepoint = tonumber(x, 16)
264 return codepoint_to_utf8(tonumber(x, 16)); 264 if codepoint >= 0xD800 and codepoint <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
265 return codepoint_to_utf8(codepoint);
265 end 266 end
266 _unescape_error = true; 267 _unescape_error = true;
267 end 268 end
268 function _readstring(json, index) 269 function _readstring(json, index)
269 index = index + 1; 270 index = index + 1;
271 if endindex then 272 if endindex then
272 local s = json:sub(index, endindex - 1); 273 local s = json:sub(index, endindex - 1);
273 --if s:find("[%z-\31]") then return nil, "control char in string"; end 274 --if s:find("[%z-\31]") then return nil, "control char in string"; end
274 -- FIXME handle control characters 275 -- FIXME handle control characters
275 _unescape_error = nil; 276 _unescape_error = nil;
276 --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func); 277 s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
277 -- FIXME handle escapes beyond BMP 278 -- FIXME handle escapes beyond BMP
278 s = s:gsub("\\u.?.?.?.?", _unescape_func); 279 s = s:gsub("\\u.?.?.?.?", _unescape_func);
279 if _unescape_error then return nil, "invalid escape"; end 280 if _unescape_error then return nil, "invalid escape"; end
280 return s, endindex + 1; 281 return s, endindex + 1;
281 end 282 end