# HG changeset patch # User Waqas Hussain # Date 1249205703 -18000 # Node ID c3ce7cbd123d9b6d69c399c178af0263f6b71eb7 # Parent 099364ec1ab2ed5b4e3ef9233443f08cc1052403 ejabberdsql2prosody: Added an XML parser diff -r 099364ec1ab2 -r c3ce7cbd123d tools/ejabberdsql2prosody.lua --- a/tools/ejabberdsql2prosody.lua Fri Jul 31 17:19:05 2009 +0100 +++ b/tools/ejabberdsql2prosody.lua Sun Aug 02 14:35:03 2009 +0500 @@ -156,6 +156,58 @@ ------ end +-- XML parser +local parse_xml = (function() + local entity_map = setmetatable({ + ["amp"] = "&"; + ["gt"] = ">"; + ["lt"] = "<"; + ["apos"] = "'"; + ["quot"] = "\""; + }, {__index = function(_, s) + if s:sub(1,1) == "#" then + if s:sub(2,2) == "x" then + return string.char(tonumber(s:sub(3), 16)); + else + return string.char(tonumber(s:sub(2))); + end + end + end + }); + local function xml_unescape(str) + return (str:gsub("&(.-);", entity_map)); + end + local function parse_tag(s) + local name,sattr=(s):gmatch("([^%s]+)(.*)")(); + local attr = {}; + for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end + return name, attr; + end + return function(xml) + local stanza = st.stanza("root"); + local regexp = "<([^>]*)>([^<]*)"; + for elem, text in xml:gmatch(regexp) do + if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions + elseif elem:sub(1,1) == "/" then -- end tag + elem = elem:sub(2); + stanza:up(); -- TODO check for start-end tag name match + elseif elem:sub(-1,-1) == "/" then -- empty tag + elem = elem:sub(1,-2); + local name,attr = parse_tag(elem); + stanza:tag(name, attr):up(); + else -- start tag + local name,attr = parse_tag(elem); + stanza:tag(name, attr); + end + if #text ~= 0 then -- text + stanza:text(xml_unescape(text)); + end + end + return stanza.tags[1]; + end +end)(); +-- end of XML parser + local arg, host = ...; local help = "/? -? ? /h -h /help -help --help"; if not(arg and host) or help:find(arg, 1, true) then