Software /
code /
prosody-modules
Comparison
mod_xhtmlim/mod_xhtmlim.lua @ 2865:f6ed4421167d
mod_xhtmlim: Attempts to sanitize XMTML-IM messages
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Tue, 30 Jan 2018 18:49:09 +0100 |
child | 2866:276f7af8afd1 |
comparison
equal
deleted
inserted
replaced
2864:6f3859233515 | 2865:f6ed4421167d |
---|---|
1 -- XEP-0071: XHTML-IM sanitizing | |
2 | |
3 local assert = assert; | |
4 | |
5 local st = require "util.stanza"; | |
6 local url = require "socket.url"; | |
7 | |
8 local no_styles = module:get_option_boolean("strip_xhtml_style", false); | |
9 | |
10 -- Tables from XEP-0071 | |
11 local xeptables = [[ | |
12 <body/> class, id, title; style | |
13 <head/> profile | |
14 <html/> version | |
15 <title/> | |
16 <abbr/> class, id, title; style | |
17 <acronym/> class, id, title; style | |
18 <address/> class, id, title; style | |
19 <blockquote/> class, id, title; style; cite | |
20 <br/> class, id, title; style | |
21 <cite/> class, id, title; style | |
22 <code/> class, id, title; style | |
23 <dfn/> class, id, title; style | |
24 <div/> class, id, title; style | |
25 <em/> class, id, title; style | |
26 <h1/> class, id, title; style | |
27 <h2/> class, id, title; style | |
28 <h3/> class, id, title; style | |
29 <h4/> class, id, title; style | |
30 <h5/> class, id, title; style | |
31 <h6/> class, id, title; style | |
32 <kbd/> class, id, title; style | |
33 <p/> class, id, title; style | |
34 <pre/> class, id, title; style | |
35 <q/> class, id, title; style; cite | |
36 <samp/> class, id, title; style | |
37 <span/> class, id, title; style | |
38 <strong/> class, id, title; style | |
39 <var/> class, id, title; style | |
40 <a/> class, id, title; style; accesskey, charset, href, hreflang, rel, rev, tabindex, type | |
41 <dl/> class, id, title; style | |
42 <dt/> class, id, title; style | |
43 <dd/> class, id, title; style | |
44 <ol/> class, id, title; style | |
45 <ul/> class, id, title; style | |
46 <li/> class, id, title; style | |
47 <img/> class, id, title; style; alt, height, longdesc, src, width | |
48 ]]; | |
49 | |
50 -- map of whitelisted tag names to set of allowed attributes | |
51 local tags = {}; -- { string : { string : boolean } } | |
52 | |
53 for tag, attrs in xeptables:gmatch("<(%w+)/>([^\n]*)") do | |
54 tags[tag] = { xmlns = true, ["xml:lang"] = true }; | |
55 for attr in attrs:gmatch("%w+") do | |
56 tags[tag][attr] = true; | |
57 end | |
58 if no_styles then | |
59 tags[tag]["style"] = nil; | |
60 end | |
61 end | |
62 | |
63 -- module:log("debug", "tags = %s;", require "util.serialization".serialize(tags)); | |
64 | |
65 -- TODO Decide if disallowed tags should be bounced or silently discarded. | |
66 -- XEP says "ignore" and replace tag with text content, but that would | |
67 -- need a different transform which can't use `maptags`. | |
68 if not module:get_option_boolean("bounce_invalid_xhtml", false) then | |
69 assert = function (x) return x end | |
70 end | |
71 | |
72 local function sanitize_xhtml(tag) | |
73 -- module:log("debug", "sanitize_xhtml(<{%s}%s>)", tag.attr.xmlns, tag.name); | |
74 if tag.attr.xmlns == "http://www.w3.org/1999/xhtml" then | |
75 local allowed = assert(tags[tag.name], tag.name); | |
76 if allowed then | |
77 for attr, value in pairs(tag.attr) do | |
78 if not allowed[attr] then | |
79 -- module:log("debug", "Removing disallowed attribute %q from <%s>", attr, tag.name); | |
80 tag.attr[attr] = nil; | |
81 elseif attr == "src" or attr == "href" then | |
82 local urlattr = url.parse(value); | |
83 local scheme = urlattr and urlattr.scheme; | |
84 if scheme ~= "http" and scheme ~= "https" and scheme ~= "mailto" and scheme == "xmpp" and scheme ~= "cid" then | |
85 tag.attr[attr] = "https://url.was.invalid/"; | |
86 end | |
87 end | |
88 end | |
89 else | |
90 -- Can't happen with the above assert. | |
91 return nil; | |
92 end | |
93 -- Check child tags | |
94 tag:maptags(sanitize_xhtml); | |
95 -- This tag is clean! | |
96 return tag; | |
97 end | |
98 -- Not xhtml, probably best to discard it | |
99 return nil; | |
100 end | |
101 | |
102 -- Check for xhtml-im, sanitize if exists | |
103 local function message_handler(event) | |
104 local stanza = event.stanza; | |
105 if stanza:get_child("html", "http://jabber.org/protocol/xhtml-im") then | |
106 stanza = st.clone(stanza); | |
107 if pcall(function() -- try | |
108 stanza:get_child("html", "http://jabber.org/protocol/xhtml-im"):maptags(sanitize_xhtml); | |
109 end) then | |
110 event.stanza = stanza; | |
111 else -- catch | |
112 if stanza.attr.type ~= "error" then | |
113 event.origin.send(st.error_reply(stanza, "modify", "not-acceptable", "Stanza contained illegal XHTML-IM tag")); | |
114 end | |
115 return true; | |
116 end | |
117 end | |
118 end | |
119 | |
120 -- Stanzas received from clients | |
121 module:hook("pre-message/bare", message_handler, 71); | |
122 module:hook("pre-message/full", message_handler, 71); | |
123 module:hook("pre-message/host", message_handler, 71); | |
124 | |
125 -- Stanzas about to be delivered to clients | |
126 module:hook("message/bare", message_handler, 71); | |
127 module:hook("message/full", message_handler, 71); |