Annotate

mod_ogp/mod_ogp.lua @ 4746:88f5e12c8351

mod_rest: Imply type=set for archive queries This is what makes GET /archive/{to} do the query instead of retrieve the form only.
author Kim Alvefur <zash@zash.se>
date Thu, 04 Nov 2021 20:17:07 +0100
parent 4598:09f0911c735d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
1 local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
2 local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
3 local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
4 local url_pattern = [[https?://%S+]]
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
5 local domain_pattern = '^%w+://([^/]+)'
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
6 local xmlns_fasten = "urn:xmpp:fasten:0"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
7 local xmlns_xhtml = "http://www.w3.org/1999/xhtml"
4598
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
8 local allowlist = module:get_option_set("ogp_domain_allowlist", module:get_option_set("ogp_domain_whitelist", {}))
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
9 local denylist = module:get_option_set("ogp_domain_denylist", {})
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
10
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
11
4598
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
12 local function is_allowed(domain)
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
13 if allowlist:empty() then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
14 return true
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
15 end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
16 if allowlist:contains(domain) then
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
17 return true
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
18 end
4598
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
19 return false
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
20 end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
21
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
22 local function is_denied(domain)
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
23 if denylist:empty() then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
24 return false
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
25 end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
26 if denylist:contains(domain) then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
27 return true
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
28 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
29 return false
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
30 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
31
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
32
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
33 local function fetch_ogp_data(room, url, origin_id)
4598
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
34 if not url then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
35 return;
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
36 end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
37
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
38 local domain = url:match(domain_pattern);
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4504
diff changeset
39 if is_denied(domain) or not is_allowed(domain) then
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
40 return;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
41 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
42
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
43 http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
44 url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
45 nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
46 function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
47 if response_code ~= 200 then
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
48 module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body)
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
49 return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
50 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
51
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
52 local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
53 local from = room and room.jid or module.host
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
54 local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
55 local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
56 local message_body = ""
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
57
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
58 local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
59 for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
60 local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
61 if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
62 property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
63 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
64
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
65 local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
66 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
67 content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
68 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
69 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
70 content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
71 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
72 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
73 content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
74 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
75
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
76 if property and content then
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
77 module:log("debug", property .. "\t" .. content)
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
78 fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
79 "meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
80 {
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
81 xmlns = xmlns_xhtml,
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
82 property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
83 content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
84 }
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
85 ):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
86 found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
87 message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
88 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
89 end
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
90
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
91 if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
92 mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
93 end
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
94 module:log("debug", tostring(fastening))
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
95 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
96 )
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
97 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
98
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
99 local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
100 local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
101 local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
102
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
103 if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
104
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
105 local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
106 if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
107
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
108 for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
109 fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
110 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
111 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
112
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
113 module:hook("muc-occupant-groupchat", ogp_handler)
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
114
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
115
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
116 module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
117 local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
118 if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
119 return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
120 end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
121 end);