Diff

net/dns.lua @ 10957:8902cecbdd39

net.dns: Add jitter to spread queries and reduce failures due to congestion
author Matthew Wild <mwild1@gmail.com>
date Thu, 25 Jun 2020 15:29:49 +0100
parent 10956:03a09fa02e8e
child 10958:25680ece29c2
line wrap: on
line diff
--- a/net/dns.lua	Thu Jun 25 15:28:23 2020 +0100
+++ b/net/dns.lua	Thu Jun 25 15:29:49 2020 +0100
@@ -72,6 +72,8 @@
 local get, set = ztact.get, ztact.set;
 
 local default_timeout = 15;
+local default_jitter = 1;
+local default_retry_jitter = 2;
 
 -------------------------------------------------- module dns
 local _ENV = nil;
@@ -668,6 +670,8 @@
 
 resolver.delays = { 1, 3 };
 
+resolver.jitter = have_timer and default_jitter or nil;
+resolver.retry_jitter = have_timer and default_retry_jitter or nil;
 
 function resolver:addnameserver(address)    -- - - - - - - - - - addnameserver
 	self.server = self.server or {};
@@ -855,7 +859,7 @@
 		packet = header..question,
 		server = self.best_server,
 		delay  = 1,
-		retry  = socket.gettime() + self.delays[1]
+		retry  = socket.gettime() + self.delays[1];
 		qclass = qclass;
 		qtype  = qtype;
 		qname  = qname;
@@ -869,7 +873,13 @@
 	if not conn then
 		return nil, err;
 	end
-	conn:send (o.packet)
+	if self.jitter then
+		timer.add_task(math.random()*self.jitter, function ()
+			conn:send(o.packet);
+		end);
+	else
+		conn:send(o.packet);
+	end
 
 	-- remember which coroutine wants the answer
 	if co then
@@ -920,8 +930,16 @@
 					sock, err = self:getsocket(o.server);
 					if sock then
 						retried = true;
+						if self.retry_jitter then
+							local delay = self.delays[((o.retries-1)%#self.delays)+1] + (math.random()*self.retry_jitter);
+							log("debug", "retry %d in %0.2fs", o.retries, delay);
+							timer.add_task(delay, function ()
+								sock:send(o.packet);
+							end);
+						else
 							log("debug", "retry %d (immediate)", o.retries);
 							sock:send(o.packet);
+						end
 					end
 				end	
 				if not retried then