Software /
code /
prosody
File
util/xmppstream.lua @ 13186:affaf6d08d26
util.datamanager: Pad list writes to avoid crossing block boundaries
By padding items so that they do not cross block boundaries, it becomes
eaiser to delete whole blocks with fallocate() without cutting items
in half, improving efficiency of such operations.
Since list stores are used for message archives, where the most common
deletion operation would be of the oldest entires, at the top of the
file. With this, all blocks that contain items to be removed could be
deleted without needing to read, delete and write out the whole file.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Wed, 07 Jun 2023 00:39:30 +0200 |
parent | 12975:d10957394a3c |
line wrap: on
line source
-- Prosody IM -- Copyright (C) 2008-2010 Matthew Wild -- Copyright (C) 2008-2010 Waqas Hussain -- -- This project is MIT/X11 licensed. Please see the -- COPYING file in the source package for more information. -- local lxp = require "lxp"; local st = require "prosody.util.stanza"; local stanza_mt = st.stanza_mt; local error = error; local tostring = tostring; local t_insert = table.insert; local t_concat = table.concat; local t_remove = table.remove; local setmetatable = setmetatable; -- COMPAT: w/LuaExpat 1.1.0 local lxp_supports_doctype = pcall(lxp.new, { StartDoctypeDecl = false }); local lxp_supports_xmldecl = pcall(lxp.new, { XmlDecl = false }); local lxp_supports_bytecount = not not lxp.new({}).getcurrentbytecount; local default_stanza_size_limit = 1024*1024*1; -- 1MB local _ENV = nil; -- luacheck: std none local new_parser = lxp.new; local xml_namespace = { ["http://www.w3.org/XML/1998/namespace\1lang"] = "xml:lang"; ["http://www.w3.org/XML/1998/namespace\1space"] = "xml:space"; ["http://www.w3.org/XML/1998/namespace\1base"] = "xml:base"; ["http://www.w3.org/XML/1998/namespace\1id"] = "xml:id"; }; local xmlns_streams = "http://etherx.jabber.org/streams"; local ns_separator = "\1"; local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$"; local function dummy_cb() end local function new_sax_handlers(session, stream_callbacks, cb_handleprogress) local xml_handlers = {}; local cb_streamopened = stream_callbacks.streamopened; local cb_streamclosed = stream_callbacks.streamclosed; local cb_error = stream_callbacks.error or function(_, e, stanza) error("XML stream error: "..tostring(e)..(stanza and ": "..tostring(stanza) or ""),2); end; local cb_handlestanza = stream_callbacks.handlestanza; cb_handleprogress = cb_handleprogress or dummy_cb; local stream_ns = stream_callbacks.stream_ns or xmlns_streams; local stream_tag = stream_callbacks.stream_tag or "stream"; if stream_ns ~= "" then stream_tag = stream_ns..ns_separator..stream_tag; end local stream_error_tag = stream_ns..ns_separator..(stream_callbacks.error_tag or "error"); local stream_default_ns = stream_callbacks.default_ns; local stream_lang = "en"; local stack = {}; local chardata, stanza = {}; local stanza_size = 0; local non_streamns_depth = 0; function xml_handlers:StartElement(tagname, attr) if stanza and #chardata > 0 then -- We have some character data in the buffer t_insert(stanza, t_concat(chardata)); chardata = {}; end local curr_ns,name = tagname:match(ns_pattern); if name == "" then curr_ns, name = "", curr_ns; end if curr_ns ~= stream_default_ns or non_streamns_depth > 0 then attr.xmlns = curr_ns; non_streamns_depth = non_streamns_depth + 1; end for i=1,#attr do local k = attr[i]; attr[i] = nil; local xmlk = xml_namespace[k]; if xmlk then attr[xmlk] = attr[k]; attr[k] = nil; end end if not stanza then --if we are not currently inside a stanza if lxp_supports_bytecount then stanza_size = self:getcurrentbytecount(); end if session.notopen then if tagname == stream_tag then non_streamns_depth = 0; stream_lang = attr["xml:lang"] or stream_lang; if cb_streamopened then if lxp_supports_bytecount then cb_handleprogress(stanza_size); stanza_size = 0; end cb_streamopened(session, attr); end else -- Garbage before stream? cb_error(session, "no-stream", tagname); end return; end if curr_ns == "jabber:client" and name ~= "iq" and name ~= "presence" and name ~= "message" then cb_error(session, "invalid-top-level-element"); end stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt); else -- we are inside a stanza, so add a tag if lxp_supports_bytecount then stanza_size = stanza_size + self:getcurrentbytecount(); end t_insert(stack, stanza); local oldstanza = stanza; stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt); t_insert(oldstanza, stanza); t_insert(oldstanza.tags, stanza); end end function xml_handlers:StartCdataSection() if lxp_supports_bytecount then if stanza then stanza_size = stanza_size + self:getcurrentbytecount(); else cb_handleprogress(self:getcurrentbytecount()); end end end function xml_handlers:EndCdataSection() if lxp_supports_bytecount then if stanza then stanza_size = stanza_size + self:getcurrentbytecount(); else cb_handleprogress(self:getcurrentbytecount()); end end end function xml_handlers:CharacterData(data) if stanza then if lxp_supports_bytecount then stanza_size = stanza_size + self:getcurrentbytecount(); end t_insert(chardata, data); elseif lxp_supports_bytecount then cb_handleprogress(self:getcurrentbytecount()); end end function xml_handlers:EndElement(tagname) if lxp_supports_bytecount then stanza_size = stanza_size + self:getcurrentbytecount() end if non_streamns_depth > 0 then non_streamns_depth = non_streamns_depth - 1; end if stanza then if #chardata > 0 then -- We have some character data in the buffer t_insert(stanza, t_concat(chardata)); chardata = {}; end -- Complete stanza if #stack == 0 then if lxp_supports_bytecount then cb_handleprogress(stanza_size); end stanza_size = 0; if stanza.attr["xml:lang"] == nil then stanza.attr["xml:lang"] = stream_lang; end if tagname ~= stream_error_tag then cb_handlestanza(session, stanza); else cb_error(session, "stream-error", stanza); end stanza = nil; else stanza = t_remove(stack); end else if lxp_supports_bytecount then cb_handleprogress(stanza_size); end if cb_streamclosed then cb_streamclosed(session); end end end local function restricted_handler(parser) cb_error(session, "parse-error", "restricted-xml", "Restricted XML, see RFC 6120 section 11.1."); if not parser.stop or not parser:stop() then error("Failed to abort parsing"); end end if lxp_supports_xmldecl then function xml_handlers:XmlDecl(version, encoding, standalone) if lxp_supports_bytecount then cb_handleprogress(self:getcurrentbytecount()); end if (encoding and encoding:lower() ~= "utf-8") or (standalone == "no") or (version and version ~= "1.0") then return restricted_handler(self); end end end if lxp_supports_doctype then xml_handlers.StartDoctypeDecl = restricted_handler; end xml_handlers.Comment = restricted_handler; xml_handlers.ProcessingInstruction = restricted_handler; local function reset() stanza, chardata, stanza_size = nil, {}, 0; stack = {}; end local function set_session(stream, new_session) -- luacheck: ignore 212/stream session = new_session; end return xml_handlers, { reset = reset, set_session = set_session }; end local function new(session, stream_callbacks, stanza_size_limit) -- Used to track parser progress (e.g. to enforce size limits) local n_outstanding_bytes = 0; local handle_progress; if lxp_supports_bytecount then function handle_progress(n_parsed_bytes) n_outstanding_bytes = n_outstanding_bytes - n_parsed_bytes; end stanza_size_limit = stanza_size_limit or default_stanza_size_limit; elseif stanza_size_limit then error("Stanza size limits are not supported on this version of LuaExpat") end local handlers, meta = new_sax_handlers(session, stream_callbacks, handle_progress); local parser = new_parser(handlers, ns_separator, false); local parse = parser.parse; function session.open_stream(session, from, to) -- luacheck: ignore 432/session local send = session.sends2s or session.send; local attr = { ["xmlns:stream"] = "http://etherx.jabber.org/streams", ["xml:lang"] = "en", xmlns = stream_callbacks.default_ns, version = session.version and (session.version > 0 and "1.0" or nil), id = session.streamid or "", from = from or session.host, to = to, }; if session.stream_attrs then session:stream_attrs(from, to, attr) end send("<?xml version='1.0'?>"..st.stanza("stream:stream", attr):top_tag()); return true; end return { reset = function () parser = new_parser(handlers, ns_separator, false); parse = parser.parse; n_outstanding_bytes = 0; meta.reset(); end, feed = function (self, data) -- luacheck: ignore 212/self if lxp_supports_bytecount then n_outstanding_bytes = n_outstanding_bytes + #data; end local _parser = parser; local ok, err = parse(_parser, data); if lxp_supports_bytecount and n_outstanding_bytes > stanza_size_limit then return nil, "stanza-too-large"; end if parser ~= _parser then _parser:parse(); _parser:close(); end return ok, err; end, set_session = meta.set_session; set_stanza_size_limit = function (_, new_stanza_size_limit) stanza_size_limit = new_stanza_size_limit; end; }; end return { ns_separator = ns_separator; ns_pattern = ns_pattern; new_sax_handlers = new_sax_handlers; new = new; };