Comparison

util-src/encodings.c @ 9976:0e2f663d0714

util.encodings: Add binding to confusables skeleton function in ICU
author Kim Alvefur <zash@zash.se>
date Wed, 24 Apr 2019 22:40:38 +0200
parent 9973:640a2b8e7806
child 9979:b06f6ff878ee
comparison
equal deleted inserted replaced
9975:ca01c449357f 9976:0e2f663d0714
266 #ifdef USE_STRINGPREP_ICU 266 #ifdef USE_STRINGPREP_ICU
267 267
268 #include <unicode/usprep.h> 268 #include <unicode/usprep.h>
269 #include <unicode/ustring.h> 269 #include <unicode/ustring.h>
270 #include <unicode/utrace.h> 270 #include <unicode/utrace.h>
271 #include <unicode/uspoof.h>
271 272
272 static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) { 273 static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) {
273 size_t input_len; 274 size_t input_len;
274 int32_t unprepped_len, prepped_len, output_len; 275 int32_t unprepped_len, prepped_len, output_len;
275 const char *input; 276 const char *input;
319 320
320 UStringPrepProfile *icu_nameprep; 321 UStringPrepProfile *icu_nameprep;
321 UStringPrepProfile *icu_nodeprep; 322 UStringPrepProfile *icu_nodeprep;
322 UStringPrepProfile *icu_resourceprep; 323 UStringPrepProfile *icu_resourceprep;
323 UStringPrepProfile *icu_saslprep; 324 UStringPrepProfile *icu_saslprep;
325 USpoofChecker *icu_spoofcheck;
324 326
325 /* initialize global ICU stringprep profiles */ 327 /* initialize global ICU stringprep profiles */
326 void init_icu() { 328 void init_icu() {
327 UErrorCode err = U_ZERO_ERROR; 329 UErrorCode err = U_ZERO_ERROR;
328 utrace_setLevel(UTRACE_VERBOSE); 330 utrace_setLevel(UTRACE_VERBOSE);
329 icu_nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &err); 331 icu_nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &err);
330 icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err); 332 icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err);
331 icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err); 333 icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err);
332 icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err); 334 icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err);
335 icu_spoofcheck = uspoof_open(&err);
336 uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err);
333 337
334 if(U_FAILURE(err)) { 338 if(U_FAILURE(err)) {
335 fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err)); 339 fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err));
336 } 340 }
337 } 341 }
475 479
476 return 1; 480 return 1;
477 } 481 }
478 } 482 }
479 483
484 static int Lskeleton(lua_State *L) {
485 size_t len;
486 int32_t ulen, dest_len, output_len;
487 const char *s = luaL_checklstring(L, 1, &len);
488 UErrorCode err = U_ZERO_ERROR;
489 UChar ustr[1024];
490 UChar dest[1024];
491 char output[1024];
492
493 u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
494
495 if(U_FAILURE(err)) {
496 lua_pushnil(L);
497 return 1;
498 }
499
500 dest_len = uspoof_getSkeleton(icu_spoofcheck, 0, ustr, ulen, dest, 1024, &err);
501
502 if(U_FAILURE(err)) {
503 lua_pushnil(L);
504 return 1;
505 }
506
507 u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
508
509 if(U_SUCCESS(err)) {
510 lua_pushlstring(L, output, output_len);
511 return 1;
512 }
513
514 lua_pushnil(L);
515 return 1;
516 }
517
480 #else /* USE_STRINGPREP_ICU */ 518 #else /* USE_STRINGPREP_ICU */
481 /****************** libidn ********************/ 519 /****************** libidn ********************/
482 520
483 #include <idna.h> 521 #include <idna.h>
484 #include <idn-free.h> 522 #include <idn-free.h>
556 594
557 lua_newtable(L); 595 lua_newtable(L);
558 luaL_setfuncs(L, Reg_utf8, 0); 596 luaL_setfuncs(L, Reg_utf8, 0);
559 lua_setfield(L, -2, "utf8"); 597 lua_setfield(L, -2, "utf8");
560 598
599 #ifdef USE_STRINGPREP_ICU
600 lua_newtable(L);
601 lua_pushcfunction(L, Lskeleton);
602 lua_setfield(L, -2, "skeleton");
603 lua_setfield(L, -2, "confusable");
604 #endif
605
561 lua_pushliteral(L, "-3.14"); 606 lua_pushliteral(L, "-3.14");
562 lua_setfield(L, -2, "version"); 607 lua_setfield(L, -2, "version");
563 return 1; 608 return 1;
564 } 609 }