Changeset

9976:0e2f663d0714

util.encodings: Add binding to confusables skeleton function in ICU
author Kim Alvefur <zash@zash.se>
date Wed, 24 Apr 2019 22:40:38 +0200
parents 9975:ca01c449357f
children 9977:90ac1b6c3f28
files util-src/encodings.c
diffstat 1 files changed, 45 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/util-src/encodings.c	Wed Apr 24 18:06:48 2019 +0200
+++ b/util-src/encodings.c	Wed Apr 24 22:40:38 2019 +0200
@@ -268,6 +268,7 @@
 #include <unicode/usprep.h>
 #include <unicode/ustring.h>
 #include <unicode/utrace.h>
+#include <unicode/uspoof.h>
 
 static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) {
 	size_t input_len;
@@ -321,6 +322,7 @@
 UStringPrepProfile *icu_nodeprep;
 UStringPrepProfile *icu_resourceprep;
 UStringPrepProfile *icu_saslprep;
+USpoofChecker *icu_spoofcheck;
 
 /* initialize global ICU stringprep profiles */
 void init_icu() {
@@ -330,6 +332,8 @@
 	icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err);
 	icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err);
 	icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err);
+	icu_spoofcheck = uspoof_open(&err);
+	uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err);
 
 	if(U_FAILURE(err)) {
 		fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err));
@@ -477,6 +481,40 @@
 	}
 }
 
+static int Lskeleton(lua_State *L) {
+	size_t len;
+	int32_t ulen, dest_len, output_len;
+	const char *s = luaL_checklstring(L, 1, &len);
+	UErrorCode err = U_ZERO_ERROR;
+	UChar ustr[1024];
+	UChar dest[1024];
+	char output[1024];
+
+	u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
+
+	if(U_FAILURE(err)) {
+		lua_pushnil(L);
+		return 1;
+	}
+
+	dest_len = uspoof_getSkeleton(icu_spoofcheck, 0, ustr, ulen, dest, 1024, &err);
+
+	if(U_FAILURE(err)) {
+		lua_pushnil(L);
+		return 1;
+	}
+
+	u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
+
+	if(U_SUCCESS(err)) {
+		lua_pushlstring(L, output, output_len);
+		return 1;
+	}
+
+	lua_pushnil(L);
+	return 1;
+}
+
 #else /* USE_STRINGPREP_ICU */
 /****************** libidn ********************/
 
@@ -558,6 +596,13 @@
 	luaL_setfuncs(L, Reg_utf8, 0);
 	lua_setfield(L, -2, "utf8");
 
+#ifdef USE_STRINGPREP_ICU
+	lua_newtable(L);
+	lua_pushcfunction(L, Lskeleton);
+	lua_setfield(L, -2, "skeleton");
+	lua_setfield(L, -2, "confusable");
+#endif
+
 	lua_pushliteral(L, "-3.14");
 	lua_setfield(L, -2, "version");
 	return 1;