Software /
code /
prosody
Comparison
util-src/encodings.c @ 6591:fe3018a2f187
util.encodings: Perform validation of UTF-8 strings before passing to libidn (Based on code from the utf8 library in Lua 5.3)
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Mon, 23 Mar 2015 14:26:34 +0100 |
parent | 4302:bbb0bf0a09f5 |
child | 6592:141afe8a167b |
comparison
equal
deleted
inserted
replaced
6579:5a82ee60e07e | 6591:fe3018a2f187 |
---|---|
1 /* Prosody IM | 1 /* Prosody IM |
2 -- Copyright (C) 2008-2010 Matthew Wild | 2 -- Copyright (C) 2008-2010 Matthew Wild |
3 -- Copyright (C) 2008-2010 Waqas Hussain | 3 -- Copyright (C) 2008-2010 Waqas Hussain |
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio. | |
4 -- | 5 -- |
5 -- This project is MIT/X11 licensed. Please see the | 6 -- This project is MIT/X11 licensed. Please see the |
6 -- COPYING file in the source package for more information. | 7 -- COPYING file in the source package for more information. |
7 -- | 8 -- |
8 */ | 9 */ |
114 { "encode", Lbase64_encode }, | 115 { "encode", Lbase64_encode }, |
115 { "decode", Lbase64_decode }, | 116 { "decode", Lbase64_decode }, |
116 { NULL, NULL } | 117 { NULL, NULL } |
117 }; | 118 }; |
118 | 119 |
120 /******************* UTF-8 ********************/ | |
121 | |
122 /* | |
123 * Adapted from Lua 5.3 | |
124 * Needed because libidn does not validate that input is valid UTF-8 | |
125 */ | |
126 | |
127 #define MAXUNICODE 0x10FFFF | |
128 | |
129 /* | |
130 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. | |
131 */ | |
132 static const char *utf8_decode (const char *o, int *val) { | |
133 static unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF}; | |
134 const unsigned char *s = (const unsigned char *)o; | |
135 unsigned int c = s[0]; | |
136 unsigned int res = 0; /* final result */ | |
137 if (c < 0x80) /* ascii? */ | |
138 res = c; | |
139 else { | |
140 int count = 0; /* to count number of continuation bytes */ | |
141 while (c & 0x40) { /* still have continuation bytes? */ | |
142 int cc = s[++count]; /* read next byte */ | |
143 if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ | |
144 return NULL; /* invalid byte sequence */ | |
145 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ | |
146 c <<= 1; /* to test next bit */ | |
147 } | |
148 res |= ((c & 0x7F) << (count * 5)); /* add first byte */ | |
149 if (count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff) ) | |
150 return NULL; /* invalid byte sequence */ | |
151 s += count; /* skip continuation bytes read */ | |
152 } | |
153 if (val) *val = res; | |
154 return (const char *)s + 1; /* +1 to include first byte */ | |
155 } | |
156 | |
157 /* | |
158 * Check that a string is valid UTF-8 | |
159 * Returns NULL if not | |
160 */ | |
161 const char* check_utf8 (lua_State *L, int idx, size_t *l) { | |
162 size_t pos, len; | |
163 const char *s = luaL_checklstring(L, 1, &len); | |
164 pos = 0; | |
165 while (pos <= len) { | |
166 const char *s1 = utf8_decode(s + pos, NULL); | |
167 if (s1 == NULL) { /* conversion error? */ | |
168 return NULL; | |
169 } | |
170 pos = s1 - s; | |
171 } | |
172 if(l != NULL) { | |
173 *l = len; | |
174 } | |
175 return s; | |
176 } | |
177 | |
178 | |
119 /***************** STRINGPREP *****************/ | 179 /***************** STRINGPREP *****************/ |
120 #ifdef USE_STRINGPREP_ICU | 180 #ifdef USE_STRINGPREP_ICU |
121 | 181 |
122 #include <unicode/usprep.h> | 182 #include <unicode/usprep.h> |
123 #include <unicode/ustring.h> | 183 #include <unicode/ustring.h> |
210 int ret; | 270 int ret; |
211 if(!lua_isstring(L, 1)) { | 271 if(!lua_isstring(L, 1)) { |
212 lua_pushnil(L); | 272 lua_pushnil(L); |
213 return 1; | 273 return 1; |
214 } | 274 } |
215 s = lua_tolstring(L, 1, &len); | 275 s = check_utf8(L, 1, &len); |
216 if (len >= 1024) { | 276 if (s == NULL || len >= 1024 || len != strlen(s)) { |
217 lua_pushnil(L); | 277 lua_pushnil(L); |
218 return 1; /* TODO return error message */ | 278 return 1; /* TODO return error message */ |
219 } | 279 } |
220 strcpy(string, s); | 280 strcpy(string, s); |
221 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); | 281 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); |
318 #include <idn-free.h> | 378 #include <idn-free.h> |
319 | 379 |
320 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ | 380 static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ |
321 { | 381 { |
322 size_t len; | 382 size_t len; |
323 const char *s = luaL_checklstring(L, 1, &len); | 383 const char *s = check_utf8(L, 1, &len); |
384 if (s == NULL || len != strlen(s)) { | |
385 lua_pushnil(L); | |
386 return 1; /* TODO return error message */ | |
387 } | |
324 char* output = NULL; | 388 char* output = NULL; |
325 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); | 389 int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); |
326 if (ret == IDNA_SUCCESS) { | 390 if (ret == IDNA_SUCCESS) { |
327 lua_pushstring(L, output); | 391 lua_pushstring(L, output); |
328 idn_free(output); | 392 idn_free(output); |