Changeset

1628:c3ce7cbd123d

ejabberdsql2prosody: Added an XML parser
author Waqas Hussain <waqas20@gmail.com>
date Sun, 02 Aug 2009 14:35:03 +0500
parents 1623:099364ec1ab2
children 1629:f6af348cf497
files tools/ejabberdsql2prosody.lua
diffstat 1 files changed, 52 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/tools/ejabberdsql2prosody.lua	Fri Jul 31 17:19:05 2009 +0100
+++ b/tools/ejabberdsql2prosody.lua	Sun Aug 02 14:35:03 2009 +0500
@@ -156,6 +156,58 @@
 ------
 end
 
+-- XML parser
+local parse_xml = (function()
+	local entity_map = setmetatable({
+		["amp"] = "&";
+		["gt"] = ">";
+		["lt"] = "<";
+		["apos"] = "'";
+		["quot"] = "\"";
+	}, {__index = function(_, s)
+			if s:sub(1,1) == "#" then
+				if s:sub(2,2) == "x" then
+					return string.char(tonumber(s:sub(3), 16));
+				else
+					return string.char(tonumber(s:sub(2)));
+				end
+			end
+		end
+	});
+	local function xml_unescape(str)
+		return (str:gsub("&(.-);", entity_map));
+	end
+	local function parse_tag(s)
+		local name,sattr=(s):gmatch("([^%s]+)(.*)")();
+		local attr = {};
+		for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
+		return name, attr;
+	end
+	return function(xml)
+		local stanza = st.stanza("root");
+		local regexp = "<([^>]*)>([^<]*)";
+		for elem, text in xml:gmatch(regexp) do
+			if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
+			elseif elem:sub(1,1) == "/" then -- end tag
+				elem = elem:sub(2);
+				stanza:up(); -- TODO check for start-end tag name match
+			elseif elem:sub(-1,-1) == "/" then -- empty tag
+				elem = elem:sub(1,-2);
+				local name,attr = parse_tag(elem);
+				stanza:tag(name, attr):up();
+			else -- start tag
+				local name,attr = parse_tag(elem);
+				stanza:tag(name, attr);
+			end
+			if #text ~= 0 then -- text
+				stanza:text(xml_unescape(text));
+			end
+		end
+		return stanza.tags[1];
+	end
+end)();
+-- end of XML parser
+
 local arg, host = ...;
 local help = "/? -? ? /h -h /help -help --help";
 if not(arg and host) or help:find(arg, 1, true) then