Software /
code /
prosody-modules
Comparison
mod_pubsub_summary/mod_pubsub_summary.lua @ 4426:3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
This module is meant for use with mod_pubsub_feeds and tries to improve
on mod_pubsub's built-in Atom summary generator.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Thu, 04 Feb 2021 01:12:41 +0100 |
child | 4435:a620bf249e63 |
comparison
equal
deleted
inserted
replaced
4425:b3e0295e14a3 | 4426:3fe2c264aac4 |
---|---|
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds. | |
2 -- | |
3 -- Compose a textual representation of Atom payloads | |
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event) | |
5 local payload = event.payload; | |
6 local title = payload:get_child_text("title"); | |
7 local content_tag = payload:get_child("content") or payload:get_child("summary"); | |
8 local content = content_tag:get_text(); | |
9 if content_tag.attr.type == "html" then | |
10 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n"); | |
11 content = content:gsub("<li>(.-)</li>\n", "* %1\n"); | |
12 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "%2 <%1>"); | |
13 content = content:gsub("<b>(.-)</b>", "*%1*"); | |
14 content = content:gsub("<strong>(.-)</strong>", "*%1*"); | |
15 content = content:gsub("<em>(.-)</em>", "*%1*"); | |
16 content = content:gsub("<i>(.-)</i>", "*%1*"); | |
17 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab | |
18 content = content:gsub("<br[^>]*>", "\n"); | |
19 content = content:gsub("<[^>]+>", ""); | |
20 content = content:gsub("^%s*", ""):gsub("%s*$", ""); | |
21 content = content:gsub("\n\n\n+", "\n\n"); | |
22 content = content:gsub("&(%w+);", { | |
23 apos = "'"; | |
24 quot = '"'; | |
25 lt = "<"; | |
26 gt = ">"; | |
27 amp = "&"; | |
28 nbsp = utf8 and utf8.char(0xa0) or " "; | |
29 }); | |
30 end | |
31 local link = payload:get_child("link"); | |
32 local summary; | |
33 if title and content then | |
34 summary = title .. "\n\n" .. content; | |
35 elseif title or content then | |
36 summary = content or title; | |
37 end | |
38 if link and link.attr.href and link.attr.href ~= content then | |
39 summary = (summary and summary .. "\n" or "") .. link.attr.href; | |
40 end | |
41 return summary; | |
42 end, 1); |