Changeset

5436:a4ba5819bf50

util.json: Convert \uXXXX to UTF-8 when decoding
author Matthew Wild <mwild1@gmail.com>
date Sat, 06 Apr 2013 12:20:31 +0100 (2013-04-06)
parents 5435:f56e449a63e3
children 5437:1994a4483b1c 5438:5032b3b5b556
files util/json.lua
diffstat 1 files changed, 36 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/util/json.lua	Fri Apr 05 19:59:48 2013 +0100
+++ b/util/json.lua	Sat Apr 06 12:20:31 2013 +0100
@@ -1,3 +1,12 @@
+-- Prosody IM
+-- Copyright (C) 2008-2010 Matthew Wild
+-- Copyright (C) 2008-2010 Waqas Hussain
+--
+-- utf8char copyright (C) 2007 Rici Lake
+--
+-- This project is MIT/X11 licensed. Please see the
+-- COPYING file in the source package for more information.
+--
 
 local type = type;
 local t_insert, t_concat, t_remove, t_sort = table.insert, table.concat, table.remove, table.sort;
@@ -29,6 +38,32 @@
 	if not escapes[ch] then escapes[ch] = ("\\u%.4X"):format(i); end
 end
 
+local function utf8char(i)
+	if i >= 0 then
+		i = i - i%1
+		if i < 128 then
+			return s_char(i)
+		else
+			local c1 = i % 64
+			i = (i - c1) / 64
+			if i < 32 then
+				return s_char(0xC0+i, 0x80+c1)
+			else
+        			local c2 = i % 64
+        			i = (i - c2) / 64
+        			if i < 16 and (i ~= 13 or c2 < 32) then
+        				return s_char(0xE0+i, 0x80+c2, 0x80+c1)
+        			elseif i >= 16 and i < 0x110 then
+        				local c3 = i % 64
+        				i = (i - c3) / 64
+        				return s_char(0xF0+i, 0x80+c3, 0x80+c2, 0x80+c1)
+        			end
+			end
+		end
+	end
+end
+
+
 local valid_types = {
 	number  = true,
 	string  = true,
@@ -249,7 +284,7 @@
 						if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
 						seq = seq..ch;
 					end
-					s = s..s.char(tonumber(seq, 16)); -- FIXME do proper utf-8
+					s = s..utf8char(tonumber(seq, 16));
 					next();
 				else error("invalid escape sequence in string"); end
 			end