Comparison

fallbacks/lxp.lua @ 3680:408a19977125

fallbacks/lxp.lua: Pure Lua pseudo-XML parser. Implements the same API as LuaExpat.
author Waqas Hussain <waqas20@gmail.com>
date Thu, 02 Dec 2010 17:11:51 +0500
child 3681:3dbdcc79bd66
comparison
equal deleted inserted replaced
3679:afdce92d07be 3680:408a19977125
1
2 local coroutine = coroutine;
3 local tonumber = tonumber;
4 local string = string;
5 local setmetatable, getmetatable = setmetatable, getmetatable;
6 local pairs = pairs;
7
8 local deadroutine = coroutine.create(function() end);
9 coroutine.resume(deadroutine);
10
11 module("lxp")
12
13 local entity_map = setmetatable({
14 ["amp"] = "&";
15 ["gt"] = ">";
16 ["lt"] = "<";
17 ["apos"] = "'";
18 ["quot"] = "\"";
19 }, {__index = function(_, s)
20 if s:sub(1,1) == "#" then
21 if s:sub(2,2) == "x" then
22 return string.char(tonumber(s:sub(3), 16));
23 else
24 return string.char(tonumber(s:sub(2)));
25 end
26 end
27 end
28 });
29 local function xml_unescape(str)
30 return (str:gsub("&(.-);", entity_map));
31 end
32 local function parse_tag(s)
33 local name,sattr=(s):gmatch("([^%s]+)(.*)")();
34 local attr = {};
35 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
36 return name, attr;
37 end
38
39 local function parser(data, handlers, ns_separator)
40 local function read_until(str)
41 local pos = data:find(str, nil, true);
42 while not pos do
43 data = data..coroutine.yield();
44 pos = data:find(str, nil, true);
45 end
46 local r = data:sub(1, pos);
47 data = data:sub(pos+1);
48 return r;
49 end
50 local function read_before(str)
51 local pos = data:find(str, nil, true);
52 while not pos do
53 data = data..coroutine.yield();
54 pos = data:find(str, nil, true);
55 end
56 local r = data:sub(1, pos-1);
57 data = data:sub(pos);
58 return r;
59 end
60 local function peek()
61 while #data == 0 do data = coroutine.yield(); end
62 return data:sub(1,1);
63 end
64
65 local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
66 ns.__index = ns;
67 local function apply_ns(name, dodefault)
68 local prefix,n = name:match("^([^:]*):(.*)$");
69 if prefix and ns[prefix] then
70 return ns[prefix]..ns_separator..n;
71 end
72 if dodefault and ns[""] then
73 return ns[""]..ns_separator..name;
74 end
75 return name;
76 end
77 local function push(tag, attr)
78 ns = setmetatable({}, ns);
79 for k,v in pairs(attr) do
80 local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
81 if xmlns then
82 ns[xmlns] = v;
83 attr[k] = nil;
84 end
85 end
86 local newattr, n = {}, 0;
87 for k,v in pairs(attr) do
88 n = n+1;
89 k = apply_ns(k);
90 newattr[n] = k;
91 newattr[k] = v;
92 end
93 tag = apply_ns(tag, true);
94 ns[0] = tag;
95 ns.__index = ns;
96 return tag, newattr;
97 end
98 local function pop()
99 local tag = ns[0];
100 ns = getmetatable(ns);
101 return tag;
102 end
103
104 while true do
105 if peek() == "<" then
106 local elem = read_until(">"):sub(2,-2);
107 if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
108 elseif elem:sub(1,1) == "/" then -- end tag
109 elem = elem:sub(2);
110 local name = pop();
111 handlers:EndElement(name); -- TODO check for start-end tag name match
112 elseif elem:sub(-1,-1) == "/" then -- empty tag
113 elem = elem:sub(1,-2);
114 local name,attr = parse_tag(elem);
115 name,attr = push(name,attr);
116 handlers:StartElement(name,attr);
117 name = pop();
118 handlers:EndElement(name);
119 else -- start tag
120 local name,attr = parse_tag(elem);
121 name,attr = push(name,attr);
122 handlers:StartElement(name,attr);
123 end
124 else
125 local text = read_before("<");
126 handlers:CharacterData(xml_unescape(text));
127 end
128 end
129 end
130
131 function new(handlers, ns_separator)
132 local co = coroutine.create(parser);
133 return {
134 parse = function(self, data)
135 if not data then
136 co = deadroutine;
137 return true; -- eof
138 end
139 local success, result = coroutine.resume(co, data, handlers, ns_separator);
140 if result then
141 co = deadroutine;
142 return nil, result; -- error
143 end
144 return true; -- success
145 end;
146 };
147 end
148
149 return _M;