Changeset

12031:87bc26f23d9b

util.format: Escape invalid UTF-8 by passing trough serialization Should prevent invalid UTF-8 from making it into the logs, which can cause trouble with terminals or log viewers or other tools, such as when grep determines that log files are binary.
author Kim Alvefur <zash@zash.se>
date Fri, 10 Dec 2021 22:48:45 +0100
parents 12030:9f8206e99b89
children 12032:3db09eb4c43b
files spec/util_format_spec.lua util/format.lua
diffstat 2 files changed, 14 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/spec/util_format_spec.lua	Fri Dec 10 22:25:34 2021 +0100
+++ b/spec/util_format_spec.lua	Fri Dec 10 22:48:45 2021 +0100
@@ -20,5 +20,9 @@
 			assert.equal("␁", format("%s", "\1"));
 		end);
 
+		it("escapes invalid UTF-8", function ()
+			assert.equal("\"Hello w\\195rld\"", format("%s", "Hello w\195rld"));
+		end);
+
 	end);
 end);
--- a/util/format.lua	Fri Dec 10 22:25:34 2021 +0100
+++ b/util/format.lua	Fri Dec 10 22:48:45 2021 +0100
@@ -5,6 +5,7 @@
 local tostring = tostring;
 local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack
 local pack = require "util.table".pack; -- TODO table.pack in 5.2+
+local valid_utf8 = require "util.encodings".utf8.valid;
 local type = type;
 local dump = require "util.serialization".new("debug");
 local num_type = math.type or function (n)
@@ -60,10 +61,18 @@
 				args[i] = dump(arg);
 				spec = "%s";
 			elseif option == "s" then
-				args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+				arg = tostring(arg);
+				if arg:find("[\128-\255]") and not valid_utf8(arg) then
+					args[i] = dump(arg);
+				else
+					args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+				end
 			elseif type(arg) ~= "number" then -- arg isn't number as expected?
 				args[i] = tostring(arg);
 				spec = "[%s]";
+				option = "s";
+				spec = "[%s]";
+				t = "string";
 			elseif expects_integer[option] and num_type(arg) ~= "integer" then
 				args[i] = tostring(arg);
 				spec = "[%s]";