Software /
code /
prosody
File
util/format.lua @ 13552:f0b2c026e542
net.server_epoll: Improve efficiency of sending much buffered data
Problem: The string slice operations when a lot of data gets buffered
ends up being expensive and memory-consuming. We have util.dbuffer for
precisely this kind of thing.
I want to keep the behavior of writebuffer being upgraded from nil to a
string to full buffer since the last step involves three table
allocations, where the previous buffer method only used one. Avoiding
those allocations for simple writes like white space keep alive feels
like it would keep memory churn down.
This work was started in 2020
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Sat, 09 Nov 2024 00:37:15 +0100 |
parent | 12984:f08125a8be34 |
line wrap: on
line source
-- -- A string.format wrapper that gracefully handles invalid arguments since -- certain format string and argument combinations may cause errors or other -- issues like log spoofing -- -- Provides some protection from e.g. CAPEC-135, CWE-117, CWE-134, CWE-93 local tostring = tostring; local unpack = table.unpack; local pack = table.pack; local valid_utf8 = require "prosody.util.encodings".utf8.valid; local type = type; local dump = require"prosody.util.serialization".new({ preset = "compact"; fallback = function(v, why) return "_[[" .. (why or tostring(v)) .. "]] "; end; freeze = true; fatal = false; maxdepth = 5; }); local num_type = math.type; -- In Lua 5.3+ these formats throw an error if given a float local expects_integer = { c = true, d = true, i = true, o = true, u = true, X = true, x = true, }; -- In Lua 5.2 these throw an error given a negative number local expects_positive = { o = true; u = true; x = true; X = true }; -- Printable Unicode replacements for control characters local control_symbols = { -- 0x00 .. 0x1F --> U+2400 .. U+241F, 0x7F --> U+2421 ["\000"] = "\226\144\128", ["\001"] = "\226\144\129", ["\002"] = "\226\144\130", ["\003"] = "\226\144\131", ["\004"] = "\226\144\132", ["\005"] = "\226\144\133", ["\006"] = "\226\144\134", ["\007"] = "\226\144\135", ["\008"] = "\226\144\136", ["\009"] = "\226\144\137", ["\010"] = "\226\144\138", ["\011"] = "\226\144\139", ["\012"] = "\226\144\140", ["\013"] = "\226\144\141", ["\014"] = "\226\144\142", ["\015"] = "\226\144\143", ["\016"] = "\226\144\144", ["\017"] = "\226\144\145", ["\018"] = "\226\144\146", ["\019"] = "\226\144\147", ["\020"] = "\226\144\148", ["\021"] = "\226\144\149", ["\022"] = "\226\144\150", ["\023"] = "\226\144\151", ["\024"] = "\226\144\152", ["\025"] = "\226\144\153", ["\026"] = "\226\144\154", ["\027"] = "\226\144\155", ["\028"] = "\226\144\156", ["\029"] = "\226\144\157", ["\030"] = "\226\144\158", ["\031"] = "\226\144\159", ["\127"] = "\226\144\161", }; local supports_p = pcall(string.format, "%p", ""); -- >= Lua 5.4 local function format(formatstring, ...) local args = pack(...); local args_length = args.n; -- format specifier spec: -- 1. Start: '%%' -- 2. Flags: '[%-%+ #0]' -- 3. Width: '%d?%d?' -- 4. Precision: '%.?%d?%d?' -- 5. Option: '[cdiouxXaAeEfgGqs%%]' -- -- The options c, d, E, e, f, g, G, i, o, u, X, and x all expect a number as argument, whereas q and s expect a string. -- This function does not accept string values containing embedded zeros, except as arguments to the q option. -- a and A are only in Lua 5.2+ -- Lua 5.4 adds a p format that produces a pointer -- process each format specifier local i = 0; formatstring = formatstring:gsub("%%[^cdiouxXaAeEfgGpqs%%]*[cdiouxXaAeEfgGpqs%%]", function(spec) if spec == "%%" then return end i = i + 1; local arg = args[i]; if arg == nil then args[i] = "nil"; return "(%s)"; end local option = spec:sub(-1); local t = type(arg); if option == "s" and t == "string" and not arg:find("[%z\1-\31\128-\255]") then -- No UTF-8 or control characters, assumed to be the common case. return elseif t == "number" then if option == "g" or (option == "d" and num_type(arg) == "integer") then return end elseif option == "s" and t ~= "string" then arg = tostring(arg); t = "string"; end if option ~= "s" and option ~= "q" and option ~= "p" then -- all other options expect numbers if t ~= "number" then -- arg isn't number as expected? arg = tostring(arg); option = "s"; spec = "[%s]"; t = "string"; elseif expects_integer[option] and num_type(arg) ~= "integer" then args[i] = tostring(arg); return "[%s]"; elseif expects_positive[option] and arg < 0 then args[i] = tostring(arg); return "[%s]"; else return -- acceptable number end end if option == "p" and not supports_p then arg = tostring(arg); option = "s"; spec = "[%s]"; t = "string"; end if t == "string" and option ~= "p" then if not valid_utf8(arg) then option = "q"; elseif option ~= "q" then -- gets fully escaped in the next block -- Prevent funny things with ASCII control characters and ANSI escape codes (CWE-117) -- Also ensure embedded newlines can't look like another log line (CWE-93) args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t"); return spec; end end if option == "q" then args[i] = dump(arg); return "%s"; end if option == "p" and (t == "boolean" or t == "number") then args[i] = tostring(arg); return "[%s]"; end end); -- process extra args while i < args_length do i = i + 1; local arg = args[i]; if arg == nil then args[i] = "(nil)"; else args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t"); end formatstring = formatstring .. " [%s]" end return formatstring:format(unpack(args)); end return { format = format; };