Annotate

plugins/storage/xmlparse.lib.lua @ 3788:588904a9fd8b

mod_iq: Optimized a bit more (fewer table accesses).
author Waqas Hussain <waqas20@gmail.com>
date Sun, 28 Nov 2010 02:42:02 +0500
parent 2678:c5882e2e12b5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2678
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
1
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
2 local st = require "util.stanza";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
3
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
4 -- XML parser
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
5 local parse_xml = (function()
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
6 local entity_map = setmetatable({
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
7 ["amp"] = "&";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
8 ["gt"] = ">";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
9 ["lt"] = "<";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
10 ["apos"] = "'";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
11 ["quot"] = "\"";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
12 }, {__index = function(_, s)
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
13 if s:sub(1,1) == "#" then
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
14 if s:sub(2,2) == "x" then
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
15 return string.char(tonumber(s:sub(3), 16));
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
16 else
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
17 return string.char(tonumber(s:sub(2)));
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
18 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
19 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
20 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
21 });
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
22 local function xml_unescape(str)
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
23 return (str:gsub("&(.-);", entity_map));
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
24 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
25 local function parse_tag(s)
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
26 local name,sattr=(s):gmatch("([^%s]+)(.*)")();
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
27 local attr = {};
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
28 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
29 return name, attr;
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
30 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
31 return function(xml)
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
32 local stanza = st.stanza("root");
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
33 local regexp = "<([^>]*)>([^<]*)";
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
34 for elem, text in xml:gmatch(regexp) do
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
35 if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
36 elseif elem:sub(1,1) == "/" then -- end tag
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
37 elem = elem:sub(2);
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
38 stanza:up(); -- TODO check for start-end tag name match
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
39 elseif elem:sub(-1,-1) == "/" then -- empty tag
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
40 elem = elem:sub(1,-2);
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
41 local name,attr = parse_tag(elem);
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
42 stanza:tag(name, attr):up();
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
43 else -- start tag
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
44 local name,attr = parse_tag(elem);
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
45 stanza:tag(name, attr);
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
46 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
47 if #text ~= 0 then -- text
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
48 stanza:text(xml_unescape(text));
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
49 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
50 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
51 return stanza.tags[1];
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
52 end
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
53 end)();
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
54 -- end of XML parser
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
55
c5882e2e12b5 mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
56 return parse_xml;