Annotate

util/xml.lua @ 12939:bb6a98a7b0b4

doap: Sort by XEP number To keep them sorted. Not pedantic at all!
author Kim Alvefur <zash@zash.se>
date Thu, 16 Mar 2023 13:20:19 +0100
parent 12270:c78639ee6ccb
child 12975:d10957394a3c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
1
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
2 local st = require "util.stanza";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
3 local lxp = require "lxp";
7239
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
4 local t_insert = table.insert;
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
5 local t_remove = table.remove;
12181
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
6 local error = error;
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
7
6777
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6669
diff changeset
8 local _ENV = nil;
8555
4f0f5b49bb03 vairious: Add annotation when an empty environment is set [luacheck]
Kim Alvefur <zash@zash.se>
parents: 7239
diff changeset
9 -- luacheck: std none
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
10
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
11 local parse_xml = (function()
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
12 local ns_prefixes = {
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
13 ["http://www.w3.org/XML/1998/namespace"] = "xml";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
14 };
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
15 local ns_separator = "\1";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
16 local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
12181
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
17 return function(xml, options)
6663
d3023dd07cb6 portmanager, s2smanager, sessionmanager, stanza_router, storagemanager, usermanager, util.xml: Add luacheck annotations
Matthew Wild <mwild1@gmail.com>
parents: 5776
diff changeset
18 --luacheck: ignore 212/self
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
19 local handler = {};
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
20 local stanza = st.stanza("root");
7239
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
21 local namespaces = {};
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
22 local prefixes = {};
6978
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
23 function handler:StartNamespaceDecl(prefix, url)
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
24 if prefix ~= nil then
7239
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
25 t_insert(namespaces, url);
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
26 t_insert(prefixes, prefix);
6978
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
27 end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
28 end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
29 function handler:EndNamespaceDecl(prefix)
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
30 if prefix ~= nil then
7239
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
31 -- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
32 t_remove(namespaces);
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
33 t_remove(prefixes);
6978
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
34 end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
35 end
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
36 function handler:StartElement(tagname, attr)
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
37 local curr_ns,name = tagname:match(ns_pattern);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
38 if name == "" then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
39 curr_ns, name = "", curr_ns;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
40 end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
41 if curr_ns ~= "" then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
42 attr.xmlns = curr_ns;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
43 end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
44 for i=1,#attr do
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
45 local k = attr[i];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
46 attr[i] = nil;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
47 local ns, nm = k:match(ns_pattern);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
48 if nm ~= "" then
5776
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5223
diff changeset
49 ns = ns_prefixes[ns];
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5223
diff changeset
50 if ns then
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
51 attr[ns..":"..nm] = attr[k];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
52 attr[k] = nil;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
53 end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
54 end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
55 end
6978
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
56 local n = {}
7239
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
57 for i=1,#namespaces do
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6978
diff changeset
58 n[prefixes[i]] = namespaces[i];
6978
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
59 end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6777
diff changeset
60 stanza:tag(name, attr, n);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
61 end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
62 function handler:CharacterData(data)
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
63 stanza:text(data);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
64 end
6669
7da8b6bc0966 util.xml: Remove unused parameter (thanks, luacheck)
Matthew Wild <mwild1@gmail.com>
parents: 6663
diff changeset
65 function handler:EndElement()
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
66 stanza:up();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
67 end
12181
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
68 -- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs.
12202
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12201
diff changeset
69 local function restricted_handler(parser)
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12201
diff changeset
70 if not parser.stop or not parser:stop() then
12181
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
71 error("Failed to abort parsing");
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
72 end
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
73 end
12202
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12201
diff changeset
74 handler.StartDoctypeDecl = restricted_handler;
12181
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
75 if not options or not options.allow_comments then
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
76 -- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data
12202
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12201
diff changeset
77 handler.Comment = restricted_handler;
12181
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8555
diff changeset
78 end
12270
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12203
diff changeset
79 if not options or not options.allow_processing_instructions then
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12203
diff changeset
80 -- Processing instructions should generally be safe to just ignore
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12203
diff changeset
81 handler.ProcessingInstruction = restricted_handler;
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12203
diff changeset
82 end
12201
e5e0ab93d7f4 util.xml: Break reference to help the GC (fix #1711)
Kim Alvefur <zash@zash.se>
parents: 12181
diff changeset
83 local parser = lxp.new(handler, ns_separator);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
84 local ok, err, line, col = parser:parse(xml);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
85 if ok then ok, err, line, col = parser:parse(); end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
86 --parser:close();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
87 if ok then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
88 return stanza.tags[1];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
89 else
11127
1d9cd1abc660 util.xml: Fix float formatting of line and columns in error (on Lua 5.3+)
Kim Alvefur <zash@zash.se>
parents: 8555
diff changeset
90 return ok, ("%s (line %d, col %d))"):format(err, line, col);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
91 end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
92 end;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
93 end)();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
94
6777
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6669
diff changeset
95 return {
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6669
diff changeset
96 parse = parse_xml;
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6669
diff changeset
97 };