Software /
code /
prosody
Comparison
util/serialization.lua @ 9008:ae3c52419ec1
util.serialization: Rewritte for performance and flexibility
... and because rewrites are fun!
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Tue, 10 Jul 2018 22:04:26 +0200 |
parent | 8555:4f0f5b49bb03 |
child | 9060:69bc3144c2b7 |
comparison
equal
deleted
inserted
replaced
9007:695904638cfa | 9008:ae3c52419ec1 |
---|---|
1 -- Prosody IM | 1 -- Prosody IM |
2 -- Copyright (C) 2008-2010 Matthew Wild | 2 -- Copyright (C) 2008-2010 Matthew Wild |
3 -- Copyright (C) 2008-2010 Waqas Hussain | 3 -- Copyright (C) 2008-2010 Waqas Hussain |
4 -- Copyright (C) 2018 Kim Alvefur | |
4 -- | 5 -- |
5 -- This project is MIT/X11 licensed. Please see the | 6 -- This project is MIT/X11 licensed. Please see the |
6 -- COPYING file in the source package for more information. | 7 -- COPYING file in the source package for more information. |
7 -- | 8 -- |
8 | 9 |
9 local string_rep = string.rep; | 10 local getmetatable = getmetatable; |
10 local type = type; | 11 local next, type = next, type; |
11 local tostring = tostring; | 12 local s_format = string.format; |
12 local t_insert = table.insert; | 13 local s_gsub = string.gsub; |
14 local s_rep = string.rep; | |
15 local s_char = string.char; | |
16 local s_match = string.match; | |
13 local t_concat = table.concat; | 17 local t_concat = table.concat; |
14 local pairs = pairs; | |
15 local next = next; | |
16 | 18 |
17 local pcall = pcall; | 19 local pcall = pcall; |
18 | |
19 local debug_traceback = debug.traceback; | |
20 local log = require "util.logger".init("serialization"); | |
21 local envload = require"util.envload".envload; | 20 local envload = require"util.envload".envload; |
22 | 21 |
23 local _ENV = nil; | 22 local pos_inf, neg_inf = math.huge, -math.huge; |
24 -- luacheck: std none | 23 local m_log = math.log; |
25 | 24 local m_log10 = math.log10 or function (n) |
26 local indent = function(i) | 25 return m_log(n, 10); |
27 return string_rep("\t", i); | 26 end |
28 end | 27 local m_floor = math.floor; |
29 local function basicSerialize (o) | 28 -- luacheck: ignore 143/math |
30 if type(o) == "number" or type(o) == "boolean" then | 29 local m_type = math.type or function (n) |
31 -- no need to check for NaN, as that's not a valid table index | 30 return n % 1 == 0 and n <= 9007199254740992 and n >= -9007199254740992 and "integer" or "float"; |
32 if o == 1/0 then return "(1/0)"; | 31 end; |
33 elseif o == -1/0 then return "(-1/0)"; | 32 |
34 else return tostring(o); end | 33 local char_to_hex = {}; |
35 else -- assume it is a string -- FIXME make sure it's a string. throw an error otherwise. | 34 for i = 0,255 do |
36 return (("%q"):format(tostring(o)):gsub("\\\n", "\\n")); | 35 char_to_hex[s_char(i)] = s_format("%02x", i); |
37 end | 36 end |
38 end | 37 |
39 local function _simplesave(o, ind, t, func) | 38 local function to_hex(s) |
40 if type(o) == "number" then | 39 return (s_gsub(s, ".", char_to_hex)); |
41 if o ~= o then func(t, "(0/0)"); | 40 end |
42 elseif o == 1/0 then func(t, "(1/0)"); | 41 |
43 elseif o == -1/0 then func(t, "(-1/0)"); | 42 local function fatal_error(obj, why) |
44 else func(t, tostring(o)); end | 43 error("Can't serialize "..type(obj) .. (why and ": ".. why or "")); |
45 elseif type(o) == "string" then | 44 end |
46 func(t, (("%q"):format(o):gsub("\\\n", "\\n"))); | 45 |
47 elseif type(o) == "table" then | 46 local function default_fallback(x, why) |
48 if next(o) ~= nil then | 47 return s_format("nil --[[%s: %s]]", type(x), why or "fail"); |
49 func(t, "{\n"); | 48 end |
50 for k,v in pairs(o) do | 49 |
51 func(t, indent(ind)); | 50 local string_escapes = { |
52 func(t, "["); | 51 ['\a'] = [[\a]]; ['\b'] = [[\b]]; |
53 func(t, basicSerialize(k)); | 52 ['\f'] = [[\f]]; ['\n'] = [[\n]]; |
54 func(t, "] = "); | 53 ['\r'] = [[\r]]; ['\t'] = [[\t]]; |
55 if ind == 0 then | 54 ['\v'] = [[\v]]; ['\\'] = [[\\]]; |
56 _simplesave(v, 0, t, func); | 55 ['\"'] = [[\"]]; ['\''] = [[\']]; |
56 } | |
57 | |
58 for i = 0, 255 do | |
59 local c = s_char(i); | |
60 if not string_escapes[c] then | |
61 string_escapes[c] = s_format("\\%03d", i); | |
62 end | |
63 end | |
64 | |
65 local default_keywords = { | |
66 ["do"] = true; ["and"] = true; ["else"] = true; ["break"] = true; | |
67 ["if"] = true; ["end"] = true; ["goto"] = true; ["false"] = true; | |
68 ["in"] = true; ["for"] = true; ["then"] = true; ["local"] = true; | |
69 ["or"] = true; ["nil"] = true; ["true"] = true; ["until"] = true; | |
70 ["elseif"] = true; ["function"] = true; ["not"] = true; | |
71 ["repeat"] = true; ["return"] = true; ["while"] = true; | |
72 }; | |
73 | |
74 local function new(opt) | |
75 if type(opt) ~= "table" then | |
76 opt = { preset = opt }; | |
77 end | |
78 | |
79 local types = { | |
80 table = true; | |
81 string = true; | |
82 number = true; | |
83 boolean = true; | |
84 ["nil"] = true; | |
85 }; | |
86 | |
87 -- presets | |
88 if opt.preset == "debug" then | |
89 opt.preset = "oneline"; | |
90 opt.freeze = true; | |
91 opt.fatal = false; | |
92 opt.fallback = default_fallback; | |
93 end | |
94 if opt.preset == "oneline" then | |
95 opt.indentwith = opt.indentwith or ""; | |
96 opt.itemstart = opt.itemstart or " "; | |
97 opt.itemlast = opt.itemlast or ""; | |
98 opt.tend = opt.tend or " }"; | |
99 elseif opt.preset == "compact" then | |
100 opt.indentwith = opt.indentwith or ""; | |
101 opt.itemstart = opt.itemstart or ""; | |
102 opt.itemlast = opt.itemlast or ""; | |
103 opt.equals = opt.equals or "="; | |
104 end | |
105 | |
106 local fallback = opt.fatal and fatal_error or opt.fallback or default_fallback; | |
107 | |
108 local function ser(v) | |
109 return (types[type(v)] or fallback)(v); | |
110 end | |
111 | |
112 local keywords = opt.keywords or default_keywords; | |
113 | |
114 -- indented | |
115 local indentwith = opt.indentwith or "\t"; | |
116 local itemstart = opt.itemstart or "\n"; | |
117 local itemsep = opt.itemsep or ";"; | |
118 local itemlast = opt.itemlast or ";\n"; | |
119 local tstart = opt.tstart or "{"; | |
120 local tend = opt.tend or "}"; | |
121 local kstart = opt.kstart or "["; | |
122 local kend = opt.kend or "]"; | |
123 local equals = opt.equals or " = "; | |
124 local unquoted = opt.unquoted == nil and "^[%a_][%w_]*$" or opt.unquoted; | |
125 local hex = opt.hex; | |
126 local freeze = opt.freeze; | |
127 local precision = opt.precision or 10; | |
128 | |
129 -- serialize one table, recursively | |
130 -- t - table being serialized | |
131 -- o - array where tokens are added, concatenate to get final result | |
132 -- - also used to detect cycles | |
133 -- l - position in o of where to insert next token | |
134 -- d - depth, used for indentation | |
135 local function serialize_table(t, o, l, d) | |
136 if o[t] or d > 127 then | |
137 o[l], l = fallback(t, "recursion"), l + 1; | |
138 return l; | |
139 end | |
140 | |
141 o[t] = true; | |
142 if freeze then | |
143 -- opportunity to do pre-serialization | |
144 local mt = getmetatable(t); | |
145 local fr = (freeze ~= true and freeze[mt]); | |
146 local mf = mt and mt.__freeze; | |
147 local tag; | |
148 if type(fr) == "string" then | |
149 tag = fr; | |
150 fr = mf; | |
151 elseif mt then | |
152 tag = mt.__type; | |
153 end | |
154 if fr then | |
155 t = fr(t); | |
156 if tag then | |
157 o[l], l = tag, l + 1; | |
158 end | |
159 end | |
160 end | |
161 o[l], l = tstart, l + 1; | |
162 local indent = s_rep(indentwith, d); | |
163 local numkey = 1; | |
164 local ktyp, vtyp; | |
165 for k,v in next,t do | |
166 o[l], l = itemstart, l + 1; | |
167 o[l], l = indent, l + 1; | |
168 ktyp, vtyp = type(k), type(v); | |
169 if k == numkey then | |
170 -- next index in array part | |
171 -- assuming that these are found in order | |
172 numkey = numkey + 1; | |
173 elseif unquoted and ktyp == "string" and | |
174 not keywords[k] and s_match(k, unquoted) then | |
175 -- unquoted keys | |
176 o[l], l = k, l + 1; | |
177 o[l], l = equals, l + 1; | |
178 else | |
179 -- quoted keys | |
180 o[l], l = kstart, l + 1; | |
181 if ktyp == "table" then | |
182 l = serialize_table(k, o, l, d+1); | |
57 else | 183 else |
58 _simplesave(v, ind+1, t, func); | 184 o[l], l = ser(k), l + 1; |
59 end | 185 end |
60 func(t, ";\n"); | 186 -- = |
61 end | 187 o[l], o[l+1], l = kend, equals, l + 2; |
62 func(t, indent(ind-1)); | 188 end |
63 func(t, "}"); | 189 |
190 -- the value | |
191 if vtyp == "table" then | |
192 l = serialize_table(v, o, l, d+1); | |
193 else | |
194 o[l], l = ser(v), l + 1; | |
195 end | |
196 -- last item? | |
197 if next(t, k) ~= nil then | |
198 o[l], l = itemsep, l + 1; | |
199 else | |
200 o[l], l = itemlast, l + 1; | |
201 end | |
202 end | |
203 if next(t) ~= nil then | |
204 o[l], l = s_rep(indentwith, d-1), l + 1; | |
205 end | |
206 o[l], l = tend, l +1; | |
207 return l; | |
208 end | |
209 | |
210 function types.table(t) | |
211 local o = {}; | |
212 serialize_table(t, o, 1, 1); | |
213 return t_concat(o); | |
214 end | |
215 | |
216 local function serialize_string(s) | |
217 return '"' .. s_gsub(s, "[%z\1-\31\"\'\\\127-\255]", string_escapes) .. '"'; | |
218 end | |
219 | |
220 if hex then | |
221 function types.string(s) | |
222 local esc = serialize_string(s); | |
223 if #esc > (#s*2+2+#hex) then | |
224 return hex .. '"' .. to_hex(s) .. '"'; | |
225 end | |
226 return esc; | |
227 end | |
228 else | |
229 types.string = serialize_string; | |
230 end | |
231 | |
232 function types.number(t) | |
233 if m_type(t) == "integer" then | |
234 return s_format("%d", t); | |
235 elseif t == pos_inf then | |
236 return "(1/0)"; | |
237 elseif t == neg_inf then | |
238 return "(-1/0)"; | |
239 elseif t ~= t then | |
240 return "(0/0)"; | |
241 end | |
242 local log = m_floor(m_log10(t)); | |
243 if log > precision then | |
244 return s_format("%.18e", t); | |
64 else | 245 else |
65 func(t, "{}"); | 246 return s_format("%.18g", t); |
66 end | 247 end |
67 elseif type(o) == "boolean" then | 248 end |
68 func(t, (o and "true" or "false")); | 249 |
69 else | 250 -- Are these faster than tostring? |
70 log("error", "cannot serialize a %s: %s", type(o), debug_traceback()) | 251 types["nil"] = function() |
71 func(t, "nil"); | 252 return "nil"; |
72 end | 253 end |
73 end | 254 |
74 | 255 function types.boolean(t) |
75 local function append(t, o) | 256 return t and "true" or "false"; |
76 _simplesave(o, 1, t, t.write or t_insert); | 257 end |
77 return t; | 258 |
78 end | 259 return ser; |
79 | |
80 local function serialize(o) | |
81 return t_concat(append({}, o)); | |
82 end | 260 end |
83 | 261 |
84 local function deserialize(str) | 262 local function deserialize(str) |
85 if type(str) ~= "string" then return nil; end | 263 if type(str) ~= "string" then return nil; end |
86 str = "return "..str; | 264 str = "return "..str; |
90 if not success then return nil, ret; end | 268 if not success then return nil, ret; end |
91 return ret; | 269 return ret; |
92 end | 270 end |
93 | 271 |
94 return { | 272 return { |
95 append = append; | 273 new = new; |
96 serialize = serialize; | 274 serialize = function (x, opt) |
275 return new(opt)(x); | |
276 end; | |
97 deserialize = deserialize; | 277 deserialize = deserialize; |
98 }; | 278 }; |