Software /
code /
prosody
Annotate
util/xml.lua @ 12939:bb6a98a7b0b4
doap: Sort by XEP number
To keep them sorted.
Not pedantic at all!
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Thu, 16 Mar 2023 13:20:19 +0100 |
parent | 12270:c78639ee6ccb |
child | 12975:d10957394a3c |
rev | line source |
---|---|
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
1 |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
2 local st = require "util.stanza"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
3 local lxp = require "lxp"; |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
4 local t_insert = table.insert; |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
5 local t_remove = table.remove; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
6 local error = error; |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
7 |
6777
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
8 local _ENV = nil; |
8555
4f0f5b49bb03
vairious: Add annotation when an empty environment is set [luacheck]
Kim Alvefur <zash@zash.se>
parents:
7239
diff
changeset
|
9 -- luacheck: std none |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
10 |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
11 local parse_xml = (function() |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
12 local ns_prefixes = { |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
13 ["http://www.w3.org/XML/1998/namespace"] = "xml"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
14 }; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
15 local ns_separator = "\1"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
16 local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$"; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
17 return function(xml, options) |
6663
d3023dd07cb6
portmanager, s2smanager, sessionmanager, stanza_router, storagemanager, usermanager, util.xml: Add luacheck annotations
Matthew Wild <mwild1@gmail.com>
parents:
5776
diff
changeset
|
18 --luacheck: ignore 212/self |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
19 local handler = {}; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
20 local stanza = st.stanza("root"); |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
21 local namespaces = {}; |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
22 local prefixes = {}; |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
23 function handler:StartNamespaceDecl(prefix, url) |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
24 if prefix ~= nil then |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
25 t_insert(namespaces, url); |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
26 t_insert(prefixes, prefix); |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
27 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
28 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
29 function handler:EndNamespaceDecl(prefix) |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
30 if prefix ~= nil then |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
31 -- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
32 t_remove(namespaces); |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
33 t_remove(prefixes); |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
34 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
35 end |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
36 function handler:StartElement(tagname, attr) |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
37 local curr_ns,name = tagname:match(ns_pattern); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
38 if name == "" then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
39 curr_ns, name = "", curr_ns; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
40 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
41 if curr_ns ~= "" then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
42 attr.xmlns = curr_ns; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
43 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
44 for i=1,#attr do |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
45 local k = attr[i]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
46 attr[i] = nil; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
47 local ns, nm = k:match(ns_pattern); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
48 if nm ~= "" then |
5776
bd0ff8ae98a8
Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents:
5223
diff
changeset
|
49 ns = ns_prefixes[ns]; |
bd0ff8ae98a8
Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents:
5223
diff
changeset
|
50 if ns then |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
51 attr[ns..":"..nm] = attr[k]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
52 attr[k] = nil; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
53 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
54 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
55 end |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
56 local n = {} |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
57 for i=1,#namespaces do |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
58 n[prefixes[i]] = namespaces[i]; |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
59 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
60 stanza:tag(name, attr, n); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
61 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
62 function handler:CharacterData(data) |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
63 stanza:text(data); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
64 end |
6669
7da8b6bc0966
util.xml: Remove unused parameter (thanks, luacheck)
Matthew Wild <mwild1@gmail.com>
parents:
6663
diff
changeset
|
65 function handler:EndElement() |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
66 stanza:up(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
67 end |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
68 -- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs. |
12202
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
69 local function restricted_handler(parser) |
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
70 if not parser.stop or not parser:stop() then |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
71 error("Failed to abort parsing"); |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
72 end |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
73 end |
12202
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
74 handler.StartDoctypeDecl = restricted_handler; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
75 if not options or not options.allow_comments then |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
76 -- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data |
12202
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
77 handler.Comment = restricted_handler; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
78 end |
12270
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
79 if not options or not options.allow_processing_instructions then |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
80 -- Processing instructions should generally be safe to just ignore |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
81 handler.ProcessingInstruction = restricted_handler; |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
82 end |
12201
e5e0ab93d7f4
util.xml: Break reference to help the GC (fix #1711)
Kim Alvefur <zash@zash.se>
parents:
12181
diff
changeset
|
83 local parser = lxp.new(handler, ns_separator); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
84 local ok, err, line, col = parser:parse(xml); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
85 if ok then ok, err, line, col = parser:parse(); end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
86 --parser:close(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
87 if ok then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
88 return stanza.tags[1]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
89 else |
11127
1d9cd1abc660
util.xml: Fix float formatting of line and columns in error (on Lua 5.3+)
Kim Alvefur <zash@zash.se>
parents:
8555
diff
changeset
|
90 return ok, ("%s (line %d, col %d))"):format(err, line, col); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
91 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
92 end; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
93 end)(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
94 |
6777
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
95 return { |
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
96 parse = parse_xml; |
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
97 }; |