File

util/xml.lua @ 12182:5e21cf21d398 0.11

Added tag 0.11.12 for changeset 783056b4e448
author Matthew Wild <mwild1@gmail.com>
date Thu, 13 Jan 2022 12:18:49 +0000
parent 12181:783056b4e448
child 12201:e5e0ab93d7f4
line wrap: on
line source


local st = require "util.stanza";
local lxp = require "lxp";
local t_insert = table.insert;
local t_remove = table.remove;
local error = error;

local _ENV = nil;
-- luacheck: std none

local parse_xml = (function()
	local ns_prefixes = {
		["http://www.w3.org/XML/1998/namespace"] = "xml";
	};
	local ns_separator = "\1";
	local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
	return function(xml, options)
		--luacheck: ignore 212/self
		local handler = {};
		local stanza = st.stanza("root");
		local namespaces = {};
		local prefixes = {};
		function handler:StartNamespaceDecl(prefix, url)
			if prefix ~= nil then
				t_insert(namespaces, url);
				t_insert(prefixes, prefix);
			end
		end
		function handler:EndNamespaceDecl(prefix)
			if prefix ~= nil then
				-- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl
				t_remove(namespaces);
				t_remove(prefixes);
			end
		end
		function handler:StartElement(tagname, attr)
			local curr_ns,name = tagname:match(ns_pattern);
			if name == "" then
				curr_ns, name = "", curr_ns;
			end
			if curr_ns ~= "" then
				attr.xmlns = curr_ns;
			end
			for i=1,#attr do
				local k = attr[i];
				attr[i] = nil;
				local ns, nm = k:match(ns_pattern);
				if nm ~= "" then
					ns = ns_prefixes[ns];
					if ns then
						attr[ns..":"..nm] = attr[k];
						attr[k] = nil;
					end
				end
			end
			local n = {}
			for i=1,#namespaces do
				n[prefixes[i]] = namespaces[i];
			end
			stanza:tag(name, attr, n);
		end
		function handler:CharacterData(data)
			stanza:text(data);
		end
		function handler:EndElement()
			stanza:up();
		end
		local parser;
		-- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs.
		function handler:StartDoctypeDecl()
			if not parser.stop or not parser:stop() then
				error("Failed to abort parsing");
			end
		end
		function handler:ProcessingInstruction()
			if not parser.stop or not parser:stop() then
				error("Failed to abort parsing");
			end
		end
		if not options or not options.allow_comments then
			-- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data
			function handler:Comment()
				if not parser.stop or not parser:stop() then
					error("Failed to abort parsing");
				end
			end
		end
		parser = lxp.new(handler, ns_separator);
		local ok, err, line, col = parser:parse(xml);
		if ok then ok, err, line, col = parser:parse(); end
		--parser:close();
		if ok then
			return stanza.tags[1];
		else
			return ok, err.." (line "..line..", col "..col..")";
		end
	end;
end)();

return {
	parse = parse_xml;
};