Software /
code /
prosody
Annotate
util/xml.lua @ 13039:9ddb344b9fab
util.human.io: Allow defining per column ellipsis function
As an alternative to doing it in the mapper function. Could be useful in
cases where one may want to put the ellipsis in the middle or beginning
instead of the start.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Fri, 07 Apr 2023 13:00:58 +0200 (21 months ago) |
parent | 12975:d10957394a3c |
rev | line source |
---|---|
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
1 |
12975
d10957394a3c
util: Prefix module imports with prosody namespace
Kim Alvefur <zash@zash.se>
parents:
12270
diff
changeset
|
2 local st = require "prosody.util.stanza"; |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
3 local lxp = require "lxp"; |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
4 local t_insert = table.insert; |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
5 local t_remove = table.remove; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
6 local error = error; |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
7 |
6777
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
8 local _ENV = nil; |
8555
4f0f5b49bb03
vairious: Add annotation when an empty environment is set [luacheck]
Kim Alvefur <zash@zash.se>
parents:
7239
diff
changeset
|
9 -- luacheck: std none |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
10 |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
11 local parse_xml = (function() |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
12 local ns_prefixes = { |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
13 ["http://www.w3.org/XML/1998/namespace"] = "xml"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
14 }; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
15 local ns_separator = "\1"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
16 local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$"; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
17 return function(xml, options) |
6663
d3023dd07cb6
portmanager, s2smanager, sessionmanager, stanza_router, storagemanager, usermanager, util.xml: Add luacheck annotations
Matthew Wild <mwild1@gmail.com>
parents:
5776
diff
changeset
|
18 --luacheck: ignore 212/self |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
19 local handler = {}; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
20 local stanza = st.stanza("root"); |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
21 local namespaces = {}; |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
22 local prefixes = {}; |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
23 function handler:StartNamespaceDecl(prefix, url) |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
24 if prefix ~= nil then |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
25 t_insert(namespaces, url); |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
26 t_insert(prefixes, prefix); |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
27 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
28 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
29 function handler:EndNamespaceDecl(prefix) |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
30 if prefix ~= nil then |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
31 -- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
32 t_remove(namespaces); |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
33 t_remove(prefixes); |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
34 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
35 end |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
36 function handler:StartElement(tagname, attr) |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
37 local curr_ns,name = tagname:match(ns_pattern); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
38 if name == "" then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
39 curr_ns, name = "", curr_ns; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
40 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
41 if curr_ns ~= "" then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
42 attr.xmlns = curr_ns; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
43 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
44 for i=1,#attr do |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
45 local k = attr[i]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
46 attr[i] = nil; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
47 local ns, nm = k:match(ns_pattern); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
48 if nm ~= "" then |
5776
bd0ff8ae98a8
Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents:
5223
diff
changeset
|
49 ns = ns_prefixes[ns]; |
bd0ff8ae98a8
Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents:
5223
diff
changeset
|
50 if ns then |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
51 attr[ns..":"..nm] = attr[k]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
52 attr[k] = nil; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
53 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
54 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
55 end |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
56 local n = {} |
7239
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
57 for i=1,#namespaces do |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6978
diff
changeset
|
58 n[prefixes[i]] = namespaces[i]; |
6978
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
59 end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6777
diff
changeset
|
60 stanza:tag(name, attr, n); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
61 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
62 function handler:CharacterData(data) |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
63 stanza:text(data); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
64 end |
6669
7da8b6bc0966
util.xml: Remove unused parameter (thanks, luacheck)
Matthew Wild <mwild1@gmail.com>
parents:
6663
diff
changeset
|
65 function handler:EndElement() |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
66 stanza:up(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
67 end |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
68 -- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs. |
12202
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
69 local function restricted_handler(parser) |
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
70 if not parser.stop or not parser:stop() then |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
71 error("Failed to abort parsing"); |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
72 end |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
73 end |
12202
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
74 handler.StartDoctypeDecl = restricted_handler; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
75 if not options or not options.allow_comments then |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
76 -- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data |
12202
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12201
diff
changeset
|
77 handler.Comment = restricted_handler; |
12181
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8555
diff
changeset
|
78 end |
12270
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
79 if not options or not options.allow_processing_instructions then |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
80 -- Processing instructions should generally be safe to just ignore |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
81 handler.ProcessingInstruction = restricted_handler; |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12203
diff
changeset
|
82 end |
12201
e5e0ab93d7f4
util.xml: Break reference to help the GC (fix #1711)
Kim Alvefur <zash@zash.se>
parents:
12181
diff
changeset
|
83 local parser = lxp.new(handler, ns_separator); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
84 local ok, err, line, col = parser:parse(xml); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
85 if ok then ok, err, line, col = parser:parse(); end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
86 --parser:close(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
87 if ok then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
88 return stanza.tags[1]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
89 else |
11127
1d9cd1abc660
util.xml: Fix float formatting of line and columns in error (on Lua 5.3+)
Kim Alvefur <zash@zash.se>
parents:
8555
diff
changeset
|
90 return ok, ("%s (line %d, col %d))"):format(err, line, col); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
91 end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
92 end; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
93 end)(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
94 |
6777
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
95 return { |
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
96 parse = parse_xml; |
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6669
diff
changeset
|
97 }; |