# HG changeset patch # User Kim Alvefur # Date 1605106841 -3600 # Node ID 9d1e21c23784d716c01365ea9bf7b9bb3af16ca0 # Parent 4ae1d485a9c6c894ff58ad0bdf4fad0d497df7e6 util.stanza: Reject ASCII control characters (fixes #1606) diff -r 4ae1d485a9c6 -r 9d1e21c23784 spec/util_stanza_spec.lua --- a/spec/util_stanza_spec.lua Fri Nov 06 13:49:40 2020 +0100 +++ b/spec/util_stanza_spec.lua Wed Nov 11 16:00:41 2020 +0100 @@ -200,6 +200,7 @@ ["number"] = 1234, ["table"] = {}; ["utf8"] = string.char(0xF4, 0x90, 0x80, 0x80); ["nil"] = "nil"; ["boolean"] = true; + ["control characters"] = "\0\1\2\3"; }; for value_type, value in pairs(invalid_names) do diff -r 4ae1d485a9c6 -r 9d1e21c23784 util/stanza.lua --- a/util/stanza.lua Fri Nov 06 13:49:40 2020 +0100 +++ b/util/stanza.lua Wed Nov 11 16:00:41 2020 +0100 @@ -45,6 +45,10 @@ local stanza_mt = { __name = "stanza" }; stanza_mt.__index = stanza_mt; +local function valid_xml_cdata(str, attr) + return not s_find(str, attr and "[^\1\9\10\13\20-~\128-\247]" or "[^\9\10\13\20-~\128-\247]"); +end + local function check_name(name, name_type) if type(name) ~= "string" then error("invalid "..name_type.." name: expected string, got "..type(name)); @@ -52,6 +56,8 @@ error("invalid "..name_type.." name: empty string"); elseif s_find(name, "[<>& '\"]") then error("invalid "..name_type.." name: contains invalid characters"); + elseif not valid_xml_cdata(name, name_type == "attribute") then + error("invalid "..name_type.." name: contains control characters"); elseif not valid_utf8(name) then error("invalid "..name_type.." name: contains invalid utf8"); end @@ -60,7 +66,9 @@ local function check_text(text, text_type) if type(text) ~= "string" then error("invalid "..text_type.." value: expected string, got "..type(text)); - elseif not valid_utf8(text) then + elseif not valid_xml_cdata(text) then + error("invalid "..text_type.." value: contains control characters"); + elseif not valid_utf8(text, false) then error("invalid "..text_type.." value: contains invalid utf8"); end end