Software /
code /
prosody
Comparison
util-src/encodings.c @ 6604:478308ee29dd
Merge 0.9->0.10 again
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Fri, 27 Mar 2015 00:27:29 +0100 |
parent | 6413:a552f4170aed |
parent | 6592:141afe8a167b |
child | 6605:03a43bf3ecd2 |
comparison
equal
deleted
inserted
replaced
6587:54306208f30b | 6604:478308ee29dd |
---|---|
1 /* Prosody IM | 1 /* Prosody IM |
2 -- Copyright (C) 2008-2010 Matthew Wild | 2 -- Copyright (C) 2008-2010 Matthew Wild |
3 -- Copyright (C) 2008-2010 Waqas Hussain | 3 -- Copyright (C) 2008-2010 Waqas Hussain |
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio. | |
4 -- | 5 -- |
5 -- This project is MIT/X11 licensed. Please see the | 6 -- This project is MIT/X11 licensed. Please see the |
6 -- COPYING file in the source package for more information. | 7 -- COPYING file in the source package for more information. |
7 -- | 8 -- |
8 */ | 9 */ |
118 { "encode", Lbase64_encode }, | 119 { "encode", Lbase64_encode }, |
119 { "decode", Lbase64_decode }, | 120 { "decode", Lbase64_decode }, |
120 { NULL, NULL } | 121 { NULL, NULL } |
121 }; | 122 }; |
122 | 123 |
124 /******************* UTF-8 ********************/ | |
125 | |
126 /* | |
127 * Adapted from Lua 5.3 | |
128 * Needed because libidn does not validate that input is valid UTF-8 | |
129 */ | |
130 | |
131 #define MAXUNICODE 0x10FFFF | |
132 | |
133 /* | |
134 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. | |
135 */ | |
136 static const char *utf8_decode (const char *o, int *val) { | |
137 static unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF}; | |
138 const unsigned char *s = (const unsigned char *)o; | |
139 unsigned int c = s[0]; | |
140 unsigned int res = 0; /* final result */ | |
141 if (c < 0x80) /* ascii? */ | |
142 res = c; | |
143 else { | |
144 int count = 0; /* to count number of continuation bytes */ | |
145 while (c & 0x40) { /* still have continuation bytes? */ | |
146 int cc = s[++count]; /* read next byte */ | |
147 if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ | |
148 return NULL; /* invalid byte sequence */ | |
149 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ | |
150 c <<= 1; /* to test next bit */ | |
151 } | |
152 res |= ((c & 0x7F) << (count * 5)); /* add first byte */ | |
153 if (count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff) ) | |
154 return NULL; /* invalid byte sequence */ | |
155 s += count; /* skip continuation bytes read */ | |
156 } | |
157 if (val) *val = res; | |
158 return (const char *)s + 1; /* +1 to include first byte */ | |
159 } | |
160 | |
161 /* | |
162 * Check that a string is valid UTF-8 | |
163 * Returns NULL if not | |
164 */ | |
165 const char* check_utf8 (lua_State *L, int idx, size_t *l) { | |
166 size_t pos, len; | |
167 const char *s = luaL_checklstring(L, 1, &len); | |
168 pos = 0; | |
169 while (pos <= len) { | |
170 const char *s1 = utf8_decode(s + pos, NULL); | |
171 if (s1 == NULL) { /* conversion error? */ | |
172 return NULL; | |
173 } | |
174 pos = s1 - s; | |
175 } | |
176 if(l != NULL) { | |
177 *l = len; | |
178 } | |
179 return s; | |
180 } | |
181 | |
182 static int Lutf8_valid(lua_State *L) { | |
183 lua_pushboolean(L, check_utf8(L, 1, NULL) != NULL); | |
184 return 1; | |
185 } | |
186 | |
187 static int Lutf8_length(lua_State *L) { | |
188 size_t len; | |
189 if(!check_utf8(L, 1, &len)) { | |
190 lua_pushnil(L); | |
191 lua_pushliteral(L, "invalid utf8"); | |
192 return 2; | |
193 } | |
194 lua_pushinteger(L, len); | |
195 return 1; | |
196 } | |
197 | |
198 static const luaL_Reg Reg_utf8[] = | |
199 { | |
200 { "valid", Lutf8_valid }, | |
201 { "length", Lutf8_length }, | |
202 { NULL, NULL } | |
203 }; | |
204 | |
205 | |
123 /***************** STRINGPREP *****************/ | 206 /***************** STRINGPREP *****************/ |
124 #ifdef USE_STRINGPREP_ICU | 207 #ifdef USE_STRINGPREP_ICU |
125 | 208 |
126 #include <unicode/usprep.h> | 209 #include <unicode/usprep.h> |
127 #include <unicode/ustring.h> | 210 #include <unicode/ustring.h> |
214 int ret; | 297 int ret; |
215 if(!lua_isstring(L, 1)) { | 298 if(!lua_isstring(L, 1)) { |
216 lua_pushnil(L); | 299 lua_pushnil(L); |
217 return 1; | 300 return 1; |
218 } | 301 } |
219 s = lua_tolstring(L, 1, &len); | 302 s = check_utf8(L, 1, &len); |
220 if (len >= 1024) { | 303 if (s == NULL || len >= 1024 || len != strlen(s)) { |
221 lua_pushnil(L); | 304 lua_pushnil(L); |
222 return 1; /* TODO return error message */ | 305 return 1; /* TODO return error message */ |
223 } | 306 } |
224 strcpy(string, s); | 307 strcpy(string, s); |
225 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); | 308 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); |
322 #include <idn-free.h> | 405 #include <idn-free.h> |
323 | 406 |
324 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ | 407 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ |
325 { | 408 { |
326 size_t len; | 409 size_t len; |
327 const char *s = luaL_checklstring(L, 1, &len); | 410 const char *s = check_utf8(L, 1, &len); |
411 if (s == NULL || len != strlen(s)) { | |
412 lua_pushnil(L); | |
413 return 1; /* TODO return error message */ | |
414 } | |
328 char* output = NULL; | 415 char* output = NULL; |
329 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); | 416 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); |
330 if (ret == IDNA_SUCCESS) { | 417 if (ret == IDNA_SUCCESS) { |
331 lua_pushstring(L, output); | 418 lua_pushstring(L, output); |
332 idn_free(output); | 419 idn_free(output); |
382 | 469 |
383 lua_newtable(L); | 470 lua_newtable(L); |
384 luaL_register(L, NULL, Reg_idna); | 471 luaL_register(L, NULL, Reg_idna); |
385 lua_setfield(L, -2, "idna"); | 472 lua_setfield(L, -2, "idna"); |
386 | 473 |
474 lua_newtable(L); | |
475 luaL_register(L, NULL, Reg_utf8); | |
476 lua_setfield(L, -2, "utf8"); | |
477 | |
387 lua_pushliteral(L, "-3.14"); | 478 lua_pushliteral(L, "-3.14"); |
388 lua_setfield(L, -2, "version"); | 479 lua_setfield(L, -2, "version"); |
389 return 1; | 480 return 1; |
390 } | 481 } |