Software /
code /
prosody
File
fallbacks/lxp.lua @ 13134:638f627e707f
util.datamanager: Add O(1) list indexing with on-disk index
Index file contains offsets and lengths of each item() which allows
seeking directly to each item and reading it without parsing the entire
file.
Also allows tricks like binary search, assuming items have some defined
order.
We take advantage of the 1-based indexing in tables to store a magic
header in the 0 position, so that table index 1 ends up at file index 1.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Tue, 11 May 2021 02:09:56 +0200 |
parent | 5776:bd0ff8ae98a8 |
line wrap: on
line source
local coroutine = coroutine; local tonumber = tonumber; local string = string; local setmetatable, getmetatable = setmetatable, getmetatable; local pairs = pairs; local deadroutine = coroutine.create(function() end); coroutine.resume(deadroutine); module("lxp") local entity_map = setmetatable({ ["amp"] = "&"; ["gt"] = ">"; ["lt"] = "<"; ["apos"] = "'"; ["quot"] = "\""; }, {__index = function(_, s) if s:sub(1,1) == "#" then if s:sub(2,2) == "x" then return string.char(tonumber(s:sub(3), 16)); else return string.char(tonumber(s:sub(2))); end end end }); local function xml_unescape(str) return (str:gsub("&(.-);", entity_map)); end local function parse_tag(s) local name,sattr=(s):gmatch("([^%s]+)(.*)")(); local attr = {}; for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end return name, attr; end local function parser(data, handlers, ns_separator) local function read_until(str) local pos = data:find(str, nil, true); while not pos do data = data..coroutine.yield(); pos = data:find(str, nil, true); end local r = data:sub(1, pos); data = data:sub(pos+1); return r; end local function read_before(str) local pos = data:find(str, nil, true); while not pos do data = data..coroutine.yield(); pos = data:find(str, nil, true); end local r = data:sub(1, pos-1); data = data:sub(pos); return r; end local function peek() while #data == 0 do data = coroutine.yield(); end return data:sub(1,1); end local ns = { xml = "http://www.w3.org/XML/1998/namespace" }; ns.__index = ns; local function apply_ns(name, dodefault) local prefix,n = name:match("^([^:]*):(.*)$"); if prefix and ns[prefix] then return ns[prefix]..ns_separator..n; end if dodefault and ns[""] then return ns[""]..ns_separator..name; end return name; end local function push(tag, attr) ns = setmetatable({}, ns); for k,v in pairs(attr) do local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$"); if xmlns then ns[xmlns] = v; attr[k] = nil; end end local newattr, n = {}, 0; for k,v in pairs(attr) do n = n+1; k = apply_ns(k); newattr[n] = k; newattr[k] = v; end tag = apply_ns(tag, true); ns[0] = tag; ns.__index = ns; return tag, newattr; end local function pop() local tag = ns[0]; ns = getmetatable(ns); return tag; end while true do if peek() == "<" then local elem = read_until(">"):sub(2,-2); if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions elseif elem:sub(1,1) == "/" then -- end tag elem = elem:sub(2); local name = pop(); handlers:EndElement(name); -- TODO check for start-end tag name match elseif elem:sub(-1,-1) == "/" then -- empty tag elem = elem:sub(1,-2); local name,attr = parse_tag(elem); name,attr = push(name,attr); handlers:StartElement(name,attr); name = pop(); handlers:EndElement(name); else -- start tag local name,attr = parse_tag(elem); name,attr = push(name,attr); handlers:StartElement(name,attr); end else local text = read_before("<"); handlers:CharacterData(xml_unescape(text)); end end end function new(handlers, ns_separator) local co = coroutine.create(parser); return { parse = function(self, data) if not data then co = deadroutine; return true; -- eof end local success, result = coroutine.resume(co, data, handlers, ns_separator); if result then co = deadroutine; return nil, result; -- error end return true; -- success end; }; end return _M;