Diff

util/human/io.lua @ 11896:93e9f7ae2f9b

util.human.io: Fix cutting of UTF-8 into pieces Down the rabbit hole we go...
author Kim Alvefur <zash@zash.se>
date Fri, 12 Nov 2021 14:21:15 +0100
parent 11895:d278a4c6da7f
child 11897:e84ea5b58b29
line wrap: on
line diff
--- a/util/human/io.lua	Fri Nov 12 12:19:01 2021 +0100
+++ b/util/human/io.lua	Fri Nov 12 14:21:15 2021 +0100
@@ -1,5 +1,9 @@
 local array = require "util.array";
-local utf8 = rawget(_G,"utf8") or require"util.encodings".utf8;
+local utf8 = rawget(_G, "utf8") or require"util.encodings".utf8;
+local len = utf8.len or function(s)
+	local _, count = s:gsub("[%z\001-\127\194-\253][\128-\191]*", "");
+	return count;
+end;
 
 local function getchar(n)
 	local stty_ret = os.execute("stty raw -echo 2>/dev/null");
@@ -96,11 +100,21 @@
 	return string.rep(" ", width-#s)..s;
 end
 
+local pat = "[%z\001-\127\194-\253][\128-\191]*";
+local function utf8_cut(s, pos)
+	return s:match("^"..pat:rep(pos)) or s;
+end
+
+if utf8.len and utf8.offset then
+	function utf8_cut(s, pos)
+		return s:sub(1, utf8.offset(s, pos+1)-1);
+	end
+end
+
 local function ellipsis(s, width)
-	if #s <= width then return s; end
-	s = s:sub(1, width - 1)
-	while not utf8.len(s) do s = s:sub(1, -2); end
-	return s .. "…";
+	if len(s) <= width then return s; end
+	if width == 1 then return "…"; end
+	return utf8_cut(s, width - 1) .. "…";
 end
 
 local function new_table(col_specs, max_width)
@@ -148,13 +162,13 @@
 			else
 				v = tostring(v);
 			end
-			if #v < width then
+			if len(v) < width then
 				if column.align == "right" then
 					v = padleft(v, width);
 				else
 					v = padright(v, width);
 				end
-			elseif #v > width then
+			elseif len(v) > width then
 				v = ellipsis(v, width);
 			end
 			table.insert(output, v);