Annotate

mod_ogp/mod_ogp.lua @ 4483:c4f11a4b5ac7

mod_ogp: Add the ability to whitelist domains
author JC Brand <jc@opkode.com>
date Tue, 02 Mar 2021 13:36:10 +0100
parent 4482:21698b960bd6
child 4504:0136c98f574c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
1 local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
2 local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
3 local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
4 local url_pattern = [[https?://%S+]]
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
5 local domain_pattern = '^%w+://([^/]+)'
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
6 local xmlns_fasten = "urn:xmpp:fasten:0"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
7 local xmlns_xhtml = "http://www.w3.org/1999/xhtml"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
8 local whitelist = module:get_option_set("ogp_domain_whitelist", {})
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
9
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
10
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
11 local function is_whitelisted(url)
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
12 if whitelist:empty() then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
13 return true
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
14 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
15 local domain = url:match(domain_pattern)
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
16 if whitelist:contains(domain) then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
17 return true;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
18 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
19 return false
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
20 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
21
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
22
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
23 local function fetch_ogp_data(room, url, origin_id)
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
24 if not url or not is_whitelisted(url) then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
25 return;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
26 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
27
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
28 http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
29 url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
30 nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
31 function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
32 if response_code ~= 200 then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
33 return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
34 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
35
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
36 local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
37 local from = room and room.jid or module.host
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
38 local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
39 local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
40 local message_body = ""
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
41
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
42 local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
43 for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
44 local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
45 if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
46 property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
47 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
48
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
49 local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
50 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
51 content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
52 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
53 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
54 content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
55 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
56 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
57 content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
58 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
59
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
60 if property and content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
61 module:log("info", property .. "\t" .. content)
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
62 fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
63 "meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
64 {
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
65 xmlns = xmlns_xhtml,
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
66 property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
67 content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
68 }
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
69 ):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
70 found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
71 message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
72 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
73 end
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
74
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
75 if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
76 mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
77 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
78 module:log("info", tostring(fastening))
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
79 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
80 )
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
81 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
82
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
83 local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
84 local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
85 local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
86
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
87 if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
88
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
89 local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
90 if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
91
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
92 for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
93 fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
94 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
95 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
96
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
97 module:hook("muc-occupant-groupchat", ogp_handler)
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
98
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
99
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
100 module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
101 local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
102 if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
103 return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
104 end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
105 end);