Software /
code /
prosody
Annotate
plugins/storage/xmlparse.lib.lua @ 2678:c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
author | Waqas Hussain <waqas20@gmail.com> |
---|---|
date | Fri, 19 Feb 2010 22:32:28 +0500 |
rev | line source |
---|---|
2678
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
1 |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
2 local st = require "util.stanza"; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
3 |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
4 -- XML parser |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
5 local parse_xml = (function() |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
6 local entity_map = setmetatable({ |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
7 ["amp"] = "&"; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
8 ["gt"] = ">"; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
9 ["lt"] = "<"; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
10 ["apos"] = "'"; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
11 ["quot"] = "\""; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
12 }, {__index = function(_, s) |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
13 if s:sub(1,1) == "#" then |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
14 if s:sub(2,2) == "x" then |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
15 return string.char(tonumber(s:sub(3), 16)); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
16 else |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
17 return string.char(tonumber(s:sub(2))); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
18 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
19 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
20 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
21 }); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
22 local function xml_unescape(str) |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
23 return (str:gsub("&(.-);", entity_map)); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
24 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
25 local function parse_tag(s) |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
26 local name,sattr=(s):gmatch("([^%s]+)(.*)")(); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
27 local attr = {}; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
28 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
29 return name, attr; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
30 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
31 return function(xml) |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
32 local stanza = st.stanza("root"); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
33 local regexp = "<([^>]*)>([^<]*)"; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
34 for elem, text in xml:gmatch(regexp) do |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
35 if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
36 elseif elem:sub(1,1) == "/" then -- end tag |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
37 elem = elem:sub(2); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
38 stanza:up(); -- TODO check for start-end tag name match |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
39 elseif elem:sub(-1,-1) == "/" then -- empty tag |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
40 elem = elem:sub(1,-2); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
41 local name,attr = parse_tag(elem); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
42 stanza:tag(name, attr):up(); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
43 else -- start tag |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
44 local name,attr = parse_tag(elem); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
45 stanza:tag(name, attr); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
46 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
47 if #text ~= 0 then -- text |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
48 stanza:text(xml_unescape(text)); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
49 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
50 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
51 return stanza.tags[1]; |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
52 end |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
53 end)(); |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
54 -- end of XML parser |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
55 |
c5882e2e12b5
mod_storage, plus a bit of SQL and XML.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
56 return parse_xml; |