Comparison

util/stanza.lua @ 12797:be09ac8300a7 0.11 0.11.14

util.stanza: Allow U+7F Allowed by XML despite arguably being a control character. Drops the part of the range meant to rule out octets invalid in UTF-8 (\247 starts a 4-byte sequence), since UTF-8 correctness is validated by util.encodings.utf8.valid().
author Kim Alvefur <zash@zash.se>
date Tue, 22 Nov 2022 23:56:01 +0100
parent 11261:be38ae8fdfa5
child 12799:3784a8ce0596
comparison
equal deleted inserted replaced
12210:458c5f8d5d3e 12797:be09ac8300a7
43 -- luacheck: std none 43 -- luacheck: std none
44 44
45 local stanza_mt = { __name = "stanza" }; 45 local stanza_mt = { __name = "stanza" };
46 stanza_mt.__index = stanza_mt; 46 stanza_mt.__index = stanza_mt;
47 47
48 -- Basic check for valid XML character data.
49 -- Disallow control characters.
50 -- Tab U+09 and newline U+0A are allowed.
51 -- For attributes, allow the \1 separator between namespace and name.
48 local function valid_xml_cdata(str, attr) 52 local function valid_xml_cdata(str, attr)
49 return not s_find(str, attr and "[^\1\9\10\13\20-~\128-\247]" or "[^\9\10\13\20-~\128-\247]"); 53 return not s_find(str, attr and "[^\1\9\10\13\20-\255]" or "[^\9\10\13\20-\255]");
50 end 54 end
51 55
52 local function check_name(name, name_type) 56 local function check_name(name, name_type)
53 if type(name) ~= "string" then 57 if type(name) ~= "string" then
54 error("invalid "..name_type.." name: expected string, got "..type(name)); 58 error("invalid "..name_type.." name: expected string, got "..type(name));