Changeset

12032:3db09eb4c43b

util.format: Ensure sanitation of strings passed to wrong format Ie. log("debug", "%d", "\1\2\3") should not result in garbage. Also optimizing for the common case of ASCII string passed to %s and early returns everywhere. Returning nil from a gsub callback keeps the original substring.
author Kim Alvefur <zash@zash.se>
date Sat, 11 Dec 2021 13:30:34 +0100
parents 12031:87bc26f23d9b
children 12033:161f8268c4b3
files spec/util_format_spec.lua util/format.lua
diffstat 2 files changed, 38 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/spec/util_format_spec.lua	Fri Dec 10 22:48:45 2021 +0100
+++ b/spec/util_format_spec.lua	Sat Dec 11 13:30:34 2021 +0100
@@ -18,6 +18,7 @@
 
 		it("escapes ascii control stuff", function ()
 			assert.equal("␁", format("%s", "\1"));
+			assert.equal("[␁]", format("%d", "\1"));
 		end);
 
 		it("escapes invalid UTF-8", function ()
--- a/util/format.lua	Fri Dec 10 22:48:45 2021 +0100
+++ b/util/format.lua	Sat Dec 11 13:30:34 2021 +0100
@@ -49,36 +49,52 @@
 	-- process each format specifier
 	local i = 0;
 	formatstring = formatstring:gsub("%%[^cdiouxXaAeEfgGqs%%]*[cdiouxXaAeEfgGqs%%]", function(spec)
-		if spec ~= "%%" then
-			i = i + 1;
-			local arg = args[i];
+		if spec == "%%" then return end
+		i = i + 1;
+		local arg = args[i];
+
+		if arg == nil then
+			args[i] = "nil";
+			return "(%s)";
+		end
 
-			local option = spec:sub(-1);
-			if arg == nil then
-				args[i] = "nil";
-				spec = "(%s)";
-			elseif option == "q" then
-				args[i] = dump(arg);
-				spec = "%s";
-			elseif option == "s" then
+		local option = spec:sub(-1);
+		local t = type(arg);
+
+		if option == "s" and t == "string" and not arg:find("[%z\1-\31\128-\255]") then
+			-- No UTF-8 or control characters, assumed to be the common case.
+			return
+		end
+
+		if option ~= "s" and option ~= "q" then
+			-- all other options expect numbers
+			if t ~= "number" then
+				-- arg isn't number as expected?
 				arg = tostring(arg);
-				if arg:find("[\128-\255]") and not valid_utf8(arg) then
-					args[i] = dump(arg);
-				else
-					args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
-				end
-			elseif type(arg) ~= "number" then -- arg isn't number as expected?
-				args[i] = tostring(arg);
-				spec = "[%s]";
 				option = "s";
 				spec = "[%s]";
 				t = "string";
 			elseif expects_integer[option] and num_type(arg) ~= "integer" then
 				args[i] = tostring(arg);
-				spec = "[%s]";
+				return "[%s]";
+			else
+				return -- acceptable number
 			end
 		end
-		return spec;
+
+		if t == "string" then
+			if not valid_utf8(arg) then
+				option = "q";
+			else
+				args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+				return spec;
+			end
+		end
+
+		if option == "q" then
+			args[i] = dump(arg);
+			return "%s";
+		end
 	end);
 
 	-- process extra args