Comparison

util-src/encodings.c @ 6604:478308ee29dd

Merge 0.9->0.10 again
author Kim Alvefur <zash@zash.se>
date Fri, 27 Mar 2015 00:27:29 +0100
parent 6413:a552f4170aed
parent 6592:141afe8a167b
child 6605:03a43bf3ecd2
comparison
equal deleted inserted replaced
6587:54306208f30b 6604:478308ee29dd
1 /* Prosody IM 1 /* Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild 2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain 3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio.
4 -- 5 --
5 -- This project is MIT/X11 licensed. Please see the 6 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information. 7 -- COPYING file in the source package for more information.
7 -- 8 --
8 */ 9 */
118 { "encode", Lbase64_encode }, 119 { "encode", Lbase64_encode },
119 { "decode", Lbase64_decode }, 120 { "decode", Lbase64_decode },
120 { NULL, NULL } 121 { NULL, NULL }
121 }; 122 };
122 123
124 /******************* UTF-8 ********************/
125
126 /*
127 * Adapted from Lua 5.3
128 * Needed because libidn does not validate that input is valid UTF-8
129 */
130
131 #define MAXUNICODE 0x10FFFF
132
133 /*
134 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
135 */
136 static const char *utf8_decode (const char *o, int *val) {
137 static unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF};
138 const unsigned char *s = (const unsigned char *)o;
139 unsigned int c = s[0];
140 unsigned int res = 0; /* final result */
141 if (c < 0x80) /* ascii? */
142 res = c;
143 else {
144 int count = 0; /* to count number of continuation bytes */
145 while (c & 0x40) { /* still have continuation bytes? */
146 int cc = s[++count]; /* read next byte */
147 if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
148 return NULL; /* invalid byte sequence */
149 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
150 c <<= 1; /* to test next bit */
151 }
152 res |= ((c & 0x7F) << (count * 5)); /* add first byte */
153 if (count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff) )
154 return NULL; /* invalid byte sequence */
155 s += count; /* skip continuation bytes read */
156 }
157 if (val) *val = res;
158 return (const char *)s + 1; /* +1 to include first byte */
159 }
160
161 /*
162 * Check that a string is valid UTF-8
163 * Returns NULL if not
164 */
165 const char* check_utf8 (lua_State *L, int idx, size_t *l) {
166 size_t pos, len;
167 const char *s = luaL_checklstring(L, 1, &len);
168 pos = 0;
169 while (pos <= len) {
170 const char *s1 = utf8_decode(s + pos, NULL);
171 if (s1 == NULL) { /* conversion error? */
172 return NULL;
173 }
174 pos = s1 - s;
175 }
176 if(l != NULL) {
177 *l = len;
178 }
179 return s;
180 }
181
182 static int Lutf8_valid(lua_State *L) {
183 lua_pushboolean(L, check_utf8(L, 1, NULL) != NULL);
184 return 1;
185 }
186
187 static int Lutf8_length(lua_State *L) {
188 size_t len;
189 if(!check_utf8(L, 1, &len)) {
190 lua_pushnil(L);
191 lua_pushliteral(L, "invalid utf8");
192 return 2;
193 }
194 lua_pushinteger(L, len);
195 return 1;
196 }
197
198 static const luaL_Reg Reg_utf8[] =
199 {
200 { "valid", Lutf8_valid },
201 { "length", Lutf8_length },
202 { NULL, NULL }
203 };
204
205
123 /***************** STRINGPREP *****************/ 206 /***************** STRINGPREP *****************/
124 #ifdef USE_STRINGPREP_ICU 207 #ifdef USE_STRINGPREP_ICU
125 208
126 #include <unicode/usprep.h> 209 #include <unicode/usprep.h>
127 #include <unicode/ustring.h> 210 #include <unicode/ustring.h>
214 int ret; 297 int ret;
215 if(!lua_isstring(L, 1)) { 298 if(!lua_isstring(L, 1)) {
216 lua_pushnil(L); 299 lua_pushnil(L);
217 return 1; 300 return 1;
218 } 301 }
219 s = lua_tolstring(L, 1, &len); 302 s = check_utf8(L, 1, &len);
220 if (len >= 1024) { 303 if (s == NULL || len >= 1024 || len != strlen(s)) {
221 lua_pushnil(L); 304 lua_pushnil(L);
222 return 1; /* TODO return error message */ 305 return 1; /* TODO return error message */
223 } 306 }
224 strcpy(string, s); 307 strcpy(string, s);
225 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); 308 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile);
322 #include <idn-free.h> 405 #include <idn-free.h>
323 406
324 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ 407 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */
325 { 408 {
326 size_t len; 409 size_t len;
327 const char *s = luaL_checklstring(L, 1, &len); 410 const char *s = check_utf8(L, 1, &len);
411 if (s == NULL || len != strlen(s)) {
412 lua_pushnil(L);
413 return 1; /* TODO return error message */
414 }
328 char* output = NULL; 415 char* output = NULL;
329 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); 416 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES);
330 if (ret == IDNA_SUCCESS) { 417 if (ret == IDNA_SUCCESS) {
331 lua_pushstring(L, output); 418 lua_pushstring(L, output);
332 idn_free(output); 419 idn_free(output);
382 469
383 lua_newtable(L); 470 lua_newtable(L);
384 luaL_register(L, NULL, Reg_idna); 471 luaL_register(L, NULL, Reg_idna);
385 lua_setfield(L, -2, "idna"); 472 lua_setfield(L, -2, "idna");
386 473
474 lua_newtable(L);
475 luaL_register(L, NULL, Reg_utf8);
476 lua_setfield(L, -2, "utf8");
477
387 lua_pushliteral(L, "-3.14"); 478 lua_pushliteral(L, "-3.14");
388 lua_setfield(L, -2, "version"); 479 lua_setfield(L, -2, "version");
389 return 1; 480 return 1;
390 } 481 }