Comparison

util-src/encodings.c @ 6591:fe3018a2f187

util.encodings: Perform validation of UTF-8 strings before passing to libidn (Based on code from the utf8 library in Lua 5.3)
author Kim Alvefur <zash@zash.se>
date Mon, 23 Mar 2015 14:26:34 +0100
parent 4302:bbb0bf0a09f5
child 6592:141afe8a167b
comparison
equal deleted inserted replaced
6579:5a82ee60e07e 6591:fe3018a2f187
1 /* Prosody IM 1 /* Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild 2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain 3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio.
4 -- 5 --
5 -- This project is MIT/X11 licensed. Please see the 6 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information. 7 -- COPYING file in the source package for more information.
7 -- 8 --
8 */ 9 */
114 { "encode", Lbase64_encode }, 115 { "encode", Lbase64_encode },
115 { "decode", Lbase64_decode }, 116 { "decode", Lbase64_decode },
116 { NULL, NULL } 117 { NULL, NULL }
117 }; 118 };
118 119
120 /******************* UTF-8 ********************/
121
122 /*
123 * Adapted from Lua 5.3
124 * Needed because libidn does not validate that input is valid UTF-8
125 */
126
127 #define MAXUNICODE 0x10FFFF
128
129 /*
130 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
131 */
132 static const char *utf8_decode (const char *o, int *val) {
133 static unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF};
134 const unsigned char *s = (const unsigned char *)o;
135 unsigned int c = s[0];
136 unsigned int res = 0; /* final result */
137 if (c < 0x80) /* ascii? */
138 res = c;
139 else {
140 int count = 0; /* to count number of continuation bytes */
141 while (c & 0x40) { /* still have continuation bytes? */
142 int cc = s[++count]; /* read next byte */
143 if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
144 return NULL; /* invalid byte sequence */
145 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
146 c <<= 1; /* to test next bit */
147 }
148 res |= ((c & 0x7F) << (count * 5)); /* add first byte */
149 if (count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff) )
150 return NULL; /* invalid byte sequence */
151 s += count; /* skip continuation bytes read */
152 }
153 if (val) *val = res;
154 return (const char *)s + 1; /* +1 to include first byte */
155 }
156
157 /*
158 * Check that a string is valid UTF-8
159 * Returns NULL if not
160 */
161 const char* check_utf8 (lua_State *L, int idx, size_t *l) {
162 size_t pos, len;
163 const char *s = luaL_checklstring(L, 1, &len);
164 pos = 0;
165 while (pos <= len) {
166 const char *s1 = utf8_decode(s + pos, NULL);
167 if (s1 == NULL) { /* conversion error? */
168 return NULL;
169 }
170 pos = s1 - s;
171 }
172 if(l != NULL) {
173 *l = len;
174 }
175 return s;
176 }
177
178
119 /***************** STRINGPREP *****************/ 179 /***************** STRINGPREP *****************/
120 #ifdef USE_STRINGPREP_ICU 180 #ifdef USE_STRINGPREP_ICU
121 181
122 #include <unicode/usprep.h> 182 #include <unicode/usprep.h>
123 #include <unicode/ustring.h> 183 #include <unicode/ustring.h>
210 int ret; 270 int ret;
211 if(!lua_isstring(L, 1)) { 271 if(!lua_isstring(L, 1)) {
212 lua_pushnil(L); 272 lua_pushnil(L);
213 return 1; 273 return 1;
214 } 274 }
215 s = lua_tolstring(L, 1, &len); 275 s = check_utf8(L, 1, &len);
216 if (len >= 1024) { 276 if (s == NULL || len >= 1024 || len != strlen(s)) {
217 lua_pushnil(L); 277 lua_pushnil(L);
218 return 1; /* TODO return error message */ 278 return 1; /* TODO return error message */
219 } 279 }
220 strcpy(string, s); 280 strcpy(string, s);
221 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); 281 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile);
318 #include <idn-free.h> 378 #include <idn-free.h>
319 379
320 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ 380 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */
321 { 381 {
322 size_t len; 382 size_t len;
323 const char *s = luaL_checklstring(L, 1, &len); 383 const char *s = check_utf8(L, 1, &len);
384 if (s == NULL || len != strlen(s)) {
385 lua_pushnil(L);
386 return 1; /* TODO return error message */
387 }
324 char* output = NULL; 388 char* output = NULL;
325 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); 389 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES);
326 if (ret == IDNA_SUCCESS) { 390 if (ret == IDNA_SUCCESS) {
327 lua_pushstring(L, output); 391 lua_pushstring(L, output);
328 idn_free(output); 392 idn_free(output);