Diff

mod_anti_spam/mod_anti_spam.lua @ 6132:ffec70ddbffc

mod_flags: trunk version backported to 0.12
author Matthew Wild <mwild1@gmail.com>
date Sat, 04 Jan 2025 17:50:35 +0000
parent 6130:5a0e47ad7d6b
child 6134:00b55c7ef393
line wrap: on
line diff
--- a/mod_anti_spam/mod_anti_spam.lua	Wed Jan 01 14:15:20 2025 +0000
+++ b/mod_anti_spam/mod_anti_spam.lua	Sat Jan 04 17:50:35 2025 +0000
@@ -1,5 +1,7 @@
+local cache = require "util.cache";
 local ip = require "util.ip";
 local jid_bare = require "util.jid".bare;
+local jid_host = require "util.jid".host;
 local jid_split = require "util.jid".split;
 local set = require "util.set";
 local sha256 = require "util.hashes".sha256;
@@ -11,10 +13,26 @@
 
 local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription;
 local trie = module:require("trie");
+local pset = module:require("pset");
 
-local spam_source_domains = set.new();
-local spam_source_ips = trie.new();
-local spam_source_jids = set.new();
+-- { [service_jid] = set, ... }
+local spam_source_domains_by_service = {};
+local spam_source_ips_by_service = {};
+local spam_source_jids_by_service = {};
+
+local service_probabilities = {
+	-- if_present = probability the address is a spammer if they are on the list
+	-- if_absent (optional): probability the address is a spammer if they are not on the list
+	-- [service_jid] = { if_present = 0.9, if_absent = 0.5 };
+};
+
+
+-- These "probabilistic sets" combine the multiple lists according to their weights
+local p_spam_source_domains = pset.new(spam_source_domains_by_service, service_probabilities);
+local p_spam_source_ips = pset.new(spam_source_ips_by_service, service_probabilities);
+local p_spam_source_jids = pset.new(spam_source_jids_by_service, service_probabilities);
+
+local domain_local_report_threshold = module:get_option_number("anti_spam_local_report_threshold", 2);
 
 local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"});
 
@@ -67,20 +85,20 @@
 end
 
 function is_spammy_server(session)
-	if spam_source_domains:contains(session.from_host) then
+	if p_spam_source_domains:contains(session.from_host) then
 		return true;
 	end
 	local raw_ip = session.ip;
 	local parsed_ip = raw_ip and ip.new_ip(session.ip);
 	-- Not every session has an ip - for example, stanzas sent from a
 	-- local host session
-	if parsed_ip and spam_source_ips:contains_ip(parsed_ip) then
+	if parsed_ip and p_spam_source_ips:contains_ip(parsed_ip) then
 		return true;
 	end
 end
 
 function is_spammy_sender(sender_jid)
-	return spam_source_jids:contains(sha256(sender_jid, true));
+	return p_spam_source_jids:contains(sha256(sender_jid, true));
 end
 
 local spammy_strings = module:get_option_array("anti_spam_block_strings");
@@ -115,6 +133,16 @@
 local anti_spam_services = module:get_option_array("anti_spam_services", {});
 
 for _, rtbl_service_jid in ipairs(anti_spam_services) do
+	service_probabilities[rtbl_service_jid] = { if_present = 0.95 };
+
+	local spam_source_domains = set.new();
+	local spam_source_ips = trie.new();
+	local spam_source_jids = set.new();
+
+	spam_source_domains_by_service[rtbl_service_jid] = spam_source_domains;
+	spam_source_ips_by_service[rtbl_service_jid] = spam_source_ips;
+	spam_source_jids_by_service[rtbl_service_jid] = spam_source_jids;
+
 	new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", {
 		added = function (item)
 			spam_source_domains:add(item);
@@ -149,6 +177,68 @@
 	});
 end
 
+-- And local reports...
+
+do
+	local spam_source_domains = set.new();
+	local spam_source_ips = set.new();
+
+	local domain_counts = cache.new(100);
+
+	service_probabilities[module.host] = { if_present = 0.6, if_absent = 0.4 };
+
+	module:hook("mod_spam_reporting/spam-report", function (event)
+		-- TODO: check for >= prosody:member
+		local reported_jid = event.jid;
+		local reported_domain = jid_host(reported_jid);
+		local report_count = (domain_counts:get(reported_domain) or 0) + 1;
+		domain_counts:set(reported_domain, report_count);
+
+		if report_count >= domain_local_report_threshold then
+			spam_source_domains:add(reported_domain);
+		end
+	end);
+
+	module:add_item("shell-command", {
+		section = "antispam";
+		section_desc = "Anti-spam management commands";
+		name = "filter_domain";
+		desc = "Restrict interactions from a remote domain to a virtual host";
+		args = {
+			{ name = "host", type = "string" };
+			{ name = "remote_domain", type = "string" };
+		};
+		host_selector = "host";
+		handler = function(self, host, remote_domain) --luacheck: ignore 212/self 212/host
+			spam_source_domains:add(remote_domain);
+			return true, "Remote domain now restricted: "..remote_domain;
+		end;
+	});
+
+	module:add_item("shell-command", {
+		section = "antispam";
+		section_desc = "Anti-spam management commands";
+		name = "filter_ip";
+		desc = "Restrict interactions from a remote IP/CIDR to a virtual host";
+		args = {
+			{ name = "host", type = "string" };
+			{ name = "remote_ip", type = "string" };
+		};
+		host_selector = "host";
+		handler = function(self, host, remote_ip) --luacheck: ignore 212/self 212/host
+			local subnet_ip, subnet_bits = ip.parse_cidr(remote_ip);
+			if not subnet_ip then
+				return false, subnet_bits; -- false, err
+			end
+
+			spam_source_ips:add_subnet(subnet_ip, subnet_bits);
+
+			return true, "Remote IP now restricted: "..remote_ip;
+		end;
+	});
+
+end
+
 module:hook("message/bare", function (event)
 	local to_user, to_host = jid_split(event.stanza.attr.to);
 
@@ -200,3 +290,4 @@
 
 	module:log("debug", "Allowing subscription request through");
 end, 500);
+