Diff

util/format.lua @ 12031:87bc26f23d9b

util.format: Escape invalid UTF-8 by passing trough serialization Should prevent invalid UTF-8 from making it into the logs, which can cause trouble with terminals or log viewers or other tools, such as when grep determines that log files are binary.
author Kim Alvefur <zash@zash.se>
date Fri, 10 Dec 2021 22:48:45 +0100
parent 11648:96d3cbeb9275
child 12032:3db09eb4c43b
line wrap: on
line diff
--- a/util/format.lua	Fri Dec 10 22:25:34 2021 +0100
+++ b/util/format.lua	Fri Dec 10 22:48:45 2021 +0100
@@ -5,6 +5,7 @@
 local tostring = tostring;
 local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack
 local pack = require "util.table".pack; -- TODO table.pack in 5.2+
+local valid_utf8 = require "util.encodings".utf8.valid;
 local type = type;
 local dump = require "util.serialization".new("debug");
 local num_type = math.type or function (n)
@@ -60,10 +61,18 @@
 				args[i] = dump(arg);
 				spec = "%s";
 			elseif option == "s" then
-				args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+				arg = tostring(arg);
+				if arg:find("[\128-\255]") and not valid_utf8(arg) then
+					args[i] = dump(arg);
+				else
+					args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+				end
 			elseif type(arg) ~= "number" then -- arg isn't number as expected?
 				args[i] = tostring(arg);
 				spec = "[%s]";
+				option = "s";
+				spec = "[%s]";
+				t = "string";
 			elseif expects_integer[option] and num_type(arg) ~= "integer" then
 				args[i] = tostring(arg);
 				spec = "[%s]";