Software /
code /
prosody
File
fallbacks/lxp.lua @ 13135:3fd24e1945b0
mod_storage_internal: Lazy-load archive items while iterating
Very large list files previously ran into limits of the Lua parser, or
just caused Prosody to freeze while parsing.
Using the new index we can parse individual items one at a time. This
probably won't reduce overall CPU usage, probably the opposite, but it
will reduce the number of items in memory at once and allow collection
of items after we iterated past them.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Wed, 12 May 2021 01:25:44 +0200 |
parent | 5776:bd0ff8ae98a8 |
line wrap: on
line source
local coroutine = coroutine; local tonumber = tonumber; local string = string; local setmetatable, getmetatable = setmetatable, getmetatable; local pairs = pairs; local deadroutine = coroutine.create(function() end); coroutine.resume(deadroutine); module("lxp") local entity_map = setmetatable({ ["amp"] = "&"; ["gt"] = ">"; ["lt"] = "<"; ["apos"] = "'"; ["quot"] = "\""; }, {__index = function(_, s) if s:sub(1,1) == "#" then if s:sub(2,2) == "x" then return string.char(tonumber(s:sub(3), 16)); else return string.char(tonumber(s:sub(2))); end end end }); local function xml_unescape(str) return (str:gsub("&(.-);", entity_map)); end local function parse_tag(s) local name,sattr=(s):gmatch("([^%s]+)(.*)")(); local attr = {}; for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end return name, attr; end local function parser(data, handlers, ns_separator) local function read_until(str) local pos = data:find(str, nil, true); while not pos do data = data..coroutine.yield(); pos = data:find(str, nil, true); end local r = data:sub(1, pos); data = data:sub(pos+1); return r; end local function read_before(str) local pos = data:find(str, nil, true); while not pos do data = data..coroutine.yield(); pos = data:find(str, nil, true); end local r = data:sub(1, pos-1); data = data:sub(pos); return r; end local function peek() while #data == 0 do data = coroutine.yield(); end return data:sub(1,1); end local ns = { xml = "http://www.w3.org/XML/1998/namespace" }; ns.__index = ns; local function apply_ns(name, dodefault) local prefix,n = name:match("^([^:]*):(.*)$"); if prefix and ns[prefix] then return ns[prefix]..ns_separator..n; end if dodefault and ns[""] then return ns[""]..ns_separator..name; end return name; end local function push(tag, attr) ns = setmetatable({}, ns); for k,v in pairs(attr) do local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$"); if xmlns then ns[xmlns] = v; attr[k] = nil; end end local newattr, n = {}, 0; for k,v in pairs(attr) do n = n+1; k = apply_ns(k); newattr[n] = k; newattr[k] = v; end tag = apply_ns(tag, true); ns[0] = tag; ns.__index = ns; return tag, newattr; end local function pop() local tag = ns[0]; ns = getmetatable(ns); return tag; end while true do if peek() == "<" then local elem = read_until(">"):sub(2,-2); if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions elseif elem:sub(1,1) == "/" then -- end tag elem = elem:sub(2); local name = pop(); handlers:EndElement(name); -- TODO check for start-end tag name match elseif elem:sub(-1,-1) == "/" then -- empty tag elem = elem:sub(1,-2); local name,attr = parse_tag(elem); name,attr = push(name,attr); handlers:StartElement(name,attr); name = pop(); handlers:EndElement(name); else -- start tag local name,attr = parse_tag(elem); name,attr = push(name,attr); handlers:StartElement(name,attr); end else local text = read_before("<"); handlers:CharacterData(xml_unescape(text)); end end end function new(handlers, ns_separator) local co = coroutine.create(parser); return { parse = function(self, data) if not data then co = deadroutine; return true; -- eof end local success, result = coroutine.resume(co, data, handlers, ns_separator); if result then co = deadroutine; return nil, result; -- error end return true; -- success end; }; end return _M;