Comparison

util/serialization.lua @ 9008:ae3c52419ec1

util.serialization: Rewritte for performance and flexibility ... and because rewrites are fun!
author Kim Alvefur <zash@zash.se>
date Tue, 10 Jul 2018 22:04:26 +0200
parent 8555:4f0f5b49bb03
child 9060:69bc3144c2b7
comparison
equal deleted inserted replaced
9007:695904638cfa 9008:ae3c52419ec1
1 -- Prosody IM 1 -- Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild 2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain 3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- Copyright (C) 2018 Kim Alvefur
4 -- 5 --
5 -- This project is MIT/X11 licensed. Please see the 6 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information. 7 -- COPYING file in the source package for more information.
7 -- 8 --
8 9
9 local string_rep = string.rep; 10 local getmetatable = getmetatable;
10 local type = type; 11 local next, type = next, type;
11 local tostring = tostring; 12 local s_format = string.format;
12 local t_insert = table.insert; 13 local s_gsub = string.gsub;
14 local s_rep = string.rep;
15 local s_char = string.char;
16 local s_match = string.match;
13 local t_concat = table.concat; 17 local t_concat = table.concat;
14 local pairs = pairs;
15 local next = next;
16 18
17 local pcall = pcall; 19 local pcall = pcall;
18
19 local debug_traceback = debug.traceback;
20 local log = require "util.logger".init("serialization");
21 local envload = require"util.envload".envload; 20 local envload = require"util.envload".envload;
22 21
23 local _ENV = nil; 22 local pos_inf, neg_inf = math.huge, -math.huge;
24 -- luacheck: std none 23 local m_log = math.log;
25 24 local m_log10 = math.log10 or function (n)
26 local indent = function(i) 25 return m_log(n, 10);
27 return string_rep("\t", i); 26 end
28 end 27 local m_floor = math.floor;
29 local function basicSerialize (o) 28 -- luacheck: ignore 143/math
30 if type(o) == "number" or type(o) == "boolean" then 29 local m_type = math.type or function (n)
31 -- no need to check for NaN, as that's not a valid table index 30 return n % 1 == 0 and n <= 9007199254740992 and n >= -9007199254740992 and "integer" or "float";
32 if o == 1/0 then return "(1/0)"; 31 end;
33 elseif o == -1/0 then return "(-1/0)"; 32
34 else return tostring(o); end 33 local char_to_hex = {};
35 else -- assume it is a string -- FIXME make sure it's a string. throw an error otherwise. 34 for i = 0,255 do
36 return (("%q"):format(tostring(o)):gsub("\\\n", "\\n")); 35 char_to_hex[s_char(i)] = s_format("%02x", i);
37 end 36 end
38 end 37
39 local function _simplesave(o, ind, t, func) 38 local function to_hex(s)
40 if type(o) == "number" then 39 return (s_gsub(s, ".", char_to_hex));
41 if o ~= o then func(t, "(0/0)"); 40 end
42 elseif o == 1/0 then func(t, "(1/0)"); 41
43 elseif o == -1/0 then func(t, "(-1/0)"); 42 local function fatal_error(obj, why)
44 else func(t, tostring(o)); end 43 error("Can't serialize "..type(obj) .. (why and ": ".. why or ""));
45 elseif type(o) == "string" then 44 end
46 func(t, (("%q"):format(o):gsub("\\\n", "\\n"))); 45
47 elseif type(o) == "table" then 46 local function default_fallback(x, why)
48 if next(o) ~= nil then 47 return s_format("nil --[[%s: %s]]", type(x), why or "fail");
49 func(t, "{\n"); 48 end
50 for k,v in pairs(o) do 49
51 func(t, indent(ind)); 50 local string_escapes = {
52 func(t, "["); 51 ['\a'] = [[\a]]; ['\b'] = [[\b]];
53 func(t, basicSerialize(k)); 52 ['\f'] = [[\f]]; ['\n'] = [[\n]];
54 func(t, "] = "); 53 ['\r'] = [[\r]]; ['\t'] = [[\t]];
55 if ind == 0 then 54 ['\v'] = [[\v]]; ['\\'] = [[\\]];
56 _simplesave(v, 0, t, func); 55 ['\"'] = [[\"]]; ['\''] = [[\']];
56 }
57
58 for i = 0, 255 do
59 local c = s_char(i);
60 if not string_escapes[c] then
61 string_escapes[c] = s_format("\\%03d", i);
62 end
63 end
64
65 local default_keywords = {
66 ["do"] = true; ["and"] = true; ["else"] = true; ["break"] = true;
67 ["if"] = true; ["end"] = true; ["goto"] = true; ["false"] = true;
68 ["in"] = true; ["for"] = true; ["then"] = true; ["local"] = true;
69 ["or"] = true; ["nil"] = true; ["true"] = true; ["until"] = true;
70 ["elseif"] = true; ["function"] = true; ["not"] = true;
71 ["repeat"] = true; ["return"] = true; ["while"] = true;
72 };
73
74 local function new(opt)
75 if type(opt) ~= "table" then
76 opt = { preset = opt };
77 end
78
79 local types = {
80 table = true;
81 string = true;
82 number = true;
83 boolean = true;
84 ["nil"] = true;
85 };
86
87 -- presets
88 if opt.preset == "debug" then
89 opt.preset = "oneline";
90 opt.freeze = true;
91 opt.fatal = false;
92 opt.fallback = default_fallback;
93 end
94 if opt.preset == "oneline" then
95 opt.indentwith = opt.indentwith or "";
96 opt.itemstart = opt.itemstart or " ";
97 opt.itemlast = opt.itemlast or "";
98 opt.tend = opt.tend or " }";
99 elseif opt.preset == "compact" then
100 opt.indentwith = opt.indentwith or "";
101 opt.itemstart = opt.itemstart or "";
102 opt.itemlast = opt.itemlast or "";
103 opt.equals = opt.equals or "=";
104 end
105
106 local fallback = opt.fatal and fatal_error or opt.fallback or default_fallback;
107
108 local function ser(v)
109 return (types[type(v)] or fallback)(v);
110 end
111
112 local keywords = opt.keywords or default_keywords;
113
114 -- indented
115 local indentwith = opt.indentwith or "\t";
116 local itemstart = opt.itemstart or "\n";
117 local itemsep = opt.itemsep or ";";
118 local itemlast = opt.itemlast or ";\n";
119 local tstart = opt.tstart or "{";
120 local tend = opt.tend or "}";
121 local kstart = opt.kstart or "[";
122 local kend = opt.kend or "]";
123 local equals = opt.equals or " = ";
124 local unquoted = opt.unquoted == nil and "^[%a_][%w_]*$" or opt.unquoted;
125 local hex = opt.hex;
126 local freeze = opt.freeze;
127 local precision = opt.precision or 10;
128
129 -- serialize one table, recursively
130 -- t - table being serialized
131 -- o - array where tokens are added, concatenate to get final result
132 -- - also used to detect cycles
133 -- l - position in o of where to insert next token
134 -- d - depth, used for indentation
135 local function serialize_table(t, o, l, d)
136 if o[t] or d > 127 then
137 o[l], l = fallback(t, "recursion"), l + 1;
138 return l;
139 end
140
141 o[t] = true;
142 if freeze then
143 -- opportunity to do pre-serialization
144 local mt = getmetatable(t);
145 local fr = (freeze ~= true and freeze[mt]);
146 local mf = mt and mt.__freeze;
147 local tag;
148 if type(fr) == "string" then
149 tag = fr;
150 fr = mf;
151 elseif mt then
152 tag = mt.__type;
153 end
154 if fr then
155 t = fr(t);
156 if tag then
157 o[l], l = tag, l + 1;
158 end
159 end
160 end
161 o[l], l = tstart, l + 1;
162 local indent = s_rep(indentwith, d);
163 local numkey = 1;
164 local ktyp, vtyp;
165 for k,v in next,t do
166 o[l], l = itemstart, l + 1;
167 o[l], l = indent, l + 1;
168 ktyp, vtyp = type(k), type(v);
169 if k == numkey then
170 -- next index in array part
171 -- assuming that these are found in order
172 numkey = numkey + 1;
173 elseif unquoted and ktyp == "string" and
174 not keywords[k] and s_match(k, unquoted) then
175 -- unquoted keys
176 o[l], l = k, l + 1;
177 o[l], l = equals, l + 1;
178 else
179 -- quoted keys
180 o[l], l = kstart, l + 1;
181 if ktyp == "table" then
182 l = serialize_table(k, o, l, d+1);
57 else 183 else
58 _simplesave(v, ind+1, t, func); 184 o[l], l = ser(k), l + 1;
59 end 185 end
60 func(t, ";\n"); 186 -- =
61 end 187 o[l], o[l+1], l = kend, equals, l + 2;
62 func(t, indent(ind-1)); 188 end
63 func(t, "}"); 189
190 -- the value
191 if vtyp == "table" then
192 l = serialize_table(v, o, l, d+1);
193 else
194 o[l], l = ser(v), l + 1;
195 end
196 -- last item?
197 if next(t, k) ~= nil then
198 o[l], l = itemsep, l + 1;
199 else
200 o[l], l = itemlast, l + 1;
201 end
202 end
203 if next(t) ~= nil then
204 o[l], l = s_rep(indentwith, d-1), l + 1;
205 end
206 o[l], l = tend, l +1;
207 return l;
208 end
209
210 function types.table(t)
211 local o = {};
212 serialize_table(t, o, 1, 1);
213 return t_concat(o);
214 end
215
216 local function serialize_string(s)
217 return '"' .. s_gsub(s, "[%z\1-\31\"\'\\\127-\255]", string_escapes) .. '"';
218 end
219
220 if hex then
221 function types.string(s)
222 local esc = serialize_string(s);
223 if #esc > (#s*2+2+#hex) then
224 return hex .. '"' .. to_hex(s) .. '"';
225 end
226 return esc;
227 end
228 else
229 types.string = serialize_string;
230 end
231
232 function types.number(t)
233 if m_type(t) == "integer" then
234 return s_format("%d", t);
235 elseif t == pos_inf then
236 return "(1/0)";
237 elseif t == neg_inf then
238 return "(-1/0)";
239 elseif t ~= t then
240 return "(0/0)";
241 end
242 local log = m_floor(m_log10(t));
243 if log > precision then
244 return s_format("%.18e", t);
64 else 245 else
65 func(t, "{}"); 246 return s_format("%.18g", t);
66 end 247 end
67 elseif type(o) == "boolean" then 248 end
68 func(t, (o and "true" or "false")); 249
69 else 250 -- Are these faster than tostring?
70 log("error", "cannot serialize a %s: %s", type(o), debug_traceback()) 251 types["nil"] = function()
71 func(t, "nil"); 252 return "nil";
72 end 253 end
73 end 254
74 255 function types.boolean(t)
75 local function append(t, o) 256 return t and "true" or "false";
76 _simplesave(o, 1, t, t.write or t_insert); 257 end
77 return t; 258
78 end 259 return ser;
79
80 local function serialize(o)
81 return t_concat(append({}, o));
82 end 260 end
83 261
84 local function deserialize(str) 262 local function deserialize(str)
85 if type(str) ~= "string" then return nil; end 263 if type(str) ~= "string" then return nil; end
86 str = "return "..str; 264 str = "return "..str;
90 if not success then return nil, ret; end 268 if not success then return nil, ret; end
91 return ret; 269 return ret;
92 end 270 end
93 271
94 return { 272 return {
95 append = append; 273 new = new;
96 serialize = serialize; 274 serialize = function (x, opt)
275 return new(opt)(x);
276 end;
97 deserialize = deserialize; 277 deserialize = deserialize;
98 }; 278 };