Annotate

mod_ogp/mod_ogp.lua @ 4542:fb4a50bf60f1

mod_prometheus: Invoke stats collection if in 'manual' mode Since 10d13e0554f9 a special value for statistics_interval "manual" exists, where a module is expected to invoke processing in connection to collection of stats. This makes internal collection and exporting to Prometheus happens at the same time with no chance of timers getting out of sync.
author Kim Alvefur <zash@zash.se>
date Tue, 13 Apr 2021 23:53:53 +0200
parent 4504:0136c98f574c
child 4598:09f0911c735d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
1 local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
2 local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
3 local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
4 local url_pattern = [[https?://%S+]]
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
5 local domain_pattern = '^%w+://([^/]+)'
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
6 local xmlns_fasten = "urn:xmpp:fasten:0"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
7 local xmlns_xhtml = "http://www.w3.org/1999/xhtml"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
8 local whitelist = module:get_option_set("ogp_domain_whitelist", {})
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
9
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
10
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
11 local function is_whitelisted(url)
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
12 if whitelist:empty() then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
13 return true
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
14 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
15 local domain = url:match(domain_pattern)
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
16 if whitelist:contains(domain) then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
17 return true;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
18 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
19 return false
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
20 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
21
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
22
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
23 local function fetch_ogp_data(room, url, origin_id)
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
24 if not url or not is_whitelisted(url) then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
25 return;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
26 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
27
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
28 http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
29 url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
30 nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
31 function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
32 if response_code ~= 200 then
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
33 module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body)
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
34 return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
35 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
36
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
37 local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
38 local from = room and room.jid or module.host
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
39 local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
40 local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
41 local message_body = ""
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
42
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
43 local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
44 for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
45 local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
46 if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
47 property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
48 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
49
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
50 local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
51 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
52 content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
53 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
54 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
55 content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
56 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
57 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
58 content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
59 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
60
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
61 if property and content then
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
62 module:log("debug", property .. "\t" .. content)
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
63 fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
64 "meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
65 {
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
66 xmlns = xmlns_xhtml,
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
67 property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
68 content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
69 }
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
70 ):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
71 found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
72 message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
73 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
74 end
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
75
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
76 if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
77 mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
78 end
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
79 module:log("debug", tostring(fastening))
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
80 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
81 )
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
82 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
83
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
84 local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
85 local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
86 local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
87
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
88 if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
89
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
90 local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
91 if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
92
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
93 for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
94 fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
95 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
96 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
97
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
98 module:hook("muc-occupant-groupchat", ogp_handler)
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
99
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
100
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
101 module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
102 local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
103 if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
104 return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
105 end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
106 end);