Software / code / prosody-modules
Comparison
mod_anti_spam/mod_anti_spam.lua @ 6132:ffec70ddbffc
mod_flags: trunk version backported to 0.12
| author | Matthew Wild <mwild1@gmail.com> |
|---|---|
| date | Sat, 04 Jan 2025 17:50:35 +0000 |
| parent | 6130:5a0e47ad7d6b |
| child | 6134:00b55c7ef393 |
comparison
equal
deleted
inserted
replaced
| 6131:f80db102fdb1 | 6132:ffec70ddbffc |
|---|---|
| 1 local cache = require "util.cache"; | |
| 1 local ip = require "util.ip"; | 2 local ip = require "util.ip"; |
| 2 local jid_bare = require "util.jid".bare; | 3 local jid_bare = require "util.jid".bare; |
| 4 local jid_host = require "util.jid".host; | |
| 3 local jid_split = require "util.jid".split; | 5 local jid_split = require "util.jid".split; |
| 4 local set = require "util.set"; | 6 local set = require "util.set"; |
| 5 local sha256 = require "util.hashes".sha256; | 7 local sha256 = require "util.hashes".sha256; |
| 6 local st = require"util.stanza"; | 8 local st = require"util.stanza"; |
| 7 local is_contact_subscribed = require "core.rostermanager".is_contact_subscribed; | 9 local is_contact_subscribed = require "core.rostermanager".is_contact_subscribed; |
| 9 | 11 |
| 10 local user_exists = require "core.usermanager".user_exists; | 12 local user_exists = require "core.usermanager".user_exists; |
| 11 | 13 |
| 12 local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription; | 14 local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription; |
| 13 local trie = module:require("trie"); | 15 local trie = module:require("trie"); |
| 14 | 16 local pset = module:require("pset"); |
| 15 local spam_source_domains = set.new(); | 17 |
| 16 local spam_source_ips = trie.new(); | 18 -- { [service_jid] = set, ... } |
| 17 local spam_source_jids = set.new(); | 19 local spam_source_domains_by_service = {}; |
| 20 local spam_source_ips_by_service = {}; | |
| 21 local spam_source_jids_by_service = {}; | |
| 22 | |
| 23 local service_probabilities = { | |
| 24 -- if_present = probability the address is a spammer if they are on the list | |
| 25 -- if_absent (optional): probability the address is a spammer if they are not on the list | |
| 26 -- [service_jid] = { if_present = 0.9, if_absent = 0.5 }; | |
| 27 }; | |
| 28 | |
| 29 | |
| 30 -- These "probabilistic sets" combine the multiple lists according to their weights | |
| 31 local p_spam_source_domains = pset.new(spam_source_domains_by_service, service_probabilities); | |
| 32 local p_spam_source_ips = pset.new(spam_source_ips_by_service, service_probabilities); | |
| 33 local p_spam_source_jids = pset.new(spam_source_jids_by_service, service_probabilities); | |
| 34 | |
| 35 local domain_local_report_threshold = module:get_option_number("anti_spam_local_report_threshold", 2); | |
| 18 | 36 |
| 19 local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"}); | 37 local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"}); |
| 20 | 38 |
| 21 local hosts = prosody.hosts; | 39 local hosts = prosody.hosts; |
| 22 | 40 |
| 65 return true; -- Stranger danger | 83 return true; -- Stranger danger |
| 66 end | 84 end |
| 67 end | 85 end |
| 68 | 86 |
| 69 function is_spammy_server(session) | 87 function is_spammy_server(session) |
| 70 if spam_source_domains:contains(session.from_host) then | 88 if p_spam_source_domains:contains(session.from_host) then |
| 71 return true; | 89 return true; |
| 72 end | 90 end |
| 73 local raw_ip = session.ip; | 91 local raw_ip = session.ip; |
| 74 local parsed_ip = raw_ip and ip.new_ip(session.ip); | 92 local parsed_ip = raw_ip and ip.new_ip(session.ip); |
| 75 -- Not every session has an ip - for example, stanzas sent from a | 93 -- Not every session has an ip - for example, stanzas sent from a |
| 76 -- local host session | 94 -- local host session |
| 77 if parsed_ip and spam_source_ips:contains_ip(parsed_ip) then | 95 if parsed_ip and p_spam_source_ips:contains_ip(parsed_ip) then |
| 78 return true; | 96 return true; |
| 79 end | 97 end |
| 80 end | 98 end |
| 81 | 99 |
| 82 function is_spammy_sender(sender_jid) | 100 function is_spammy_sender(sender_jid) |
| 83 return spam_source_jids:contains(sha256(sender_jid, true)); | 101 return p_spam_source_jids:contains(sha256(sender_jid, true)); |
| 84 end | 102 end |
| 85 | 103 |
| 86 local spammy_strings = module:get_option_array("anti_spam_block_strings"); | 104 local spammy_strings = module:get_option_array("anti_spam_block_strings"); |
| 87 local spammy_patterns = module:get_option_array("anti_spam_block_patterns"); | 105 local spammy_patterns = module:get_option_array("anti_spam_block_patterns"); |
| 88 | 106 |
| 113 -- Set up RTBLs | 131 -- Set up RTBLs |
| 114 | 132 |
| 115 local anti_spam_services = module:get_option_array("anti_spam_services", {}); | 133 local anti_spam_services = module:get_option_array("anti_spam_services", {}); |
| 116 | 134 |
| 117 for _, rtbl_service_jid in ipairs(anti_spam_services) do | 135 for _, rtbl_service_jid in ipairs(anti_spam_services) do |
| 136 service_probabilities[rtbl_service_jid] = { if_present = 0.95 }; | |
| 137 | |
| 138 local spam_source_domains = set.new(); | |
| 139 local spam_source_ips = trie.new(); | |
| 140 local spam_source_jids = set.new(); | |
| 141 | |
| 142 spam_source_domains_by_service[rtbl_service_jid] = spam_source_domains; | |
| 143 spam_source_ips_by_service[rtbl_service_jid] = spam_source_ips; | |
| 144 spam_source_jids_by_service[rtbl_service_jid] = spam_source_jids; | |
| 145 | |
| 118 new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", { | 146 new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", { |
| 119 added = function (item) | 147 added = function (item) |
| 120 spam_source_domains:add(item); | 148 spam_source_domains:add(item); |
| 121 end; | 149 end; |
| 122 removed = function (item) | 150 removed = function (item) |
| 147 spam_source_jids:remove(item); | 175 spam_source_jids:remove(item); |
| 148 end; | 176 end; |
| 149 }); | 177 }); |
| 150 end | 178 end |
| 151 | 179 |
| 180 -- And local reports... | |
| 181 | |
| 182 do | |
| 183 local spam_source_domains = set.new(); | |
| 184 local spam_source_ips = set.new(); | |
| 185 | |
| 186 local domain_counts = cache.new(100); | |
| 187 | |
| 188 service_probabilities[module.host] = { if_present = 0.6, if_absent = 0.4 }; | |
| 189 | |
| 190 module:hook("mod_spam_reporting/spam-report", function (event) | |
| 191 -- TODO: check for >= prosody:member | |
| 192 local reported_jid = event.jid; | |
| 193 local reported_domain = jid_host(reported_jid); | |
| 194 local report_count = (domain_counts:get(reported_domain) or 0) + 1; | |
| 195 domain_counts:set(reported_domain, report_count); | |
| 196 | |
| 197 if report_count >= domain_local_report_threshold then | |
| 198 spam_source_domains:add(reported_domain); | |
| 199 end | |
| 200 end); | |
| 201 | |
| 202 module:add_item("shell-command", { | |
| 203 section = "antispam"; | |
| 204 section_desc = "Anti-spam management commands"; | |
| 205 name = "filter_domain"; | |
| 206 desc = "Restrict interactions from a remote domain to a virtual host"; | |
| 207 args = { | |
| 208 { name = "host", type = "string" }; | |
| 209 { name = "remote_domain", type = "string" }; | |
| 210 }; | |
| 211 host_selector = "host"; | |
| 212 handler = function(self, host, remote_domain) --luacheck: ignore 212/self 212/host | |
| 213 spam_source_domains:add(remote_domain); | |
| 214 return true, "Remote domain now restricted: "..remote_domain; | |
| 215 end; | |
| 216 }); | |
| 217 | |
| 218 module:add_item("shell-command", { | |
| 219 section = "antispam"; | |
| 220 section_desc = "Anti-spam management commands"; | |
| 221 name = "filter_ip"; | |
| 222 desc = "Restrict interactions from a remote IP/CIDR to a virtual host"; | |
| 223 args = { | |
| 224 { name = "host", type = "string" }; | |
| 225 { name = "remote_ip", type = "string" }; | |
| 226 }; | |
| 227 host_selector = "host"; | |
| 228 handler = function(self, host, remote_ip) --luacheck: ignore 212/self 212/host | |
| 229 local subnet_ip, subnet_bits = ip.parse_cidr(remote_ip); | |
| 230 if not subnet_ip then | |
| 231 return false, subnet_bits; -- false, err | |
| 232 end | |
| 233 | |
| 234 spam_source_ips:add_subnet(subnet_ip, subnet_bits); | |
| 235 | |
| 236 return true, "Remote IP now restricted: "..remote_ip; | |
| 237 end; | |
| 238 }); | |
| 239 | |
| 240 end | |
| 241 | |
| 152 module:hook("message/bare", function (event) | 242 module:hook("message/bare", function (event) |
| 153 local to_user, to_host = jid_split(event.stanza.attr.to); | 243 local to_user, to_host = jid_split(event.stanza.attr.to); |
| 154 | 244 |
| 155 if not hosts[to_host] then | 245 if not hosts[to_host] then |
| 156 module:log("warn", "Skipping filtering of message to unknown host <%s>", to_host); | 246 module:log("warn", "Skipping filtering of message to unknown host <%s>", to_host); |
| 198 | 288 |
| 199 module:log("debug", "Not from known spam source JID"); | 289 module:log("debug", "Not from known spam source JID"); |
| 200 | 290 |
| 201 module:log("debug", "Allowing subscription request through"); | 291 module:log("debug", "Allowing subscription request through"); |
| 202 end, 500); | 292 end, 500); |
| 293 |