浏览代码

fixed url_decode and url_encode with out of bmp char

Nicolas Cannasse 7 年之前
父节点
当前提交
bf76098b25
共有 1 个文件被更改,包括 38 次插入25 次删除
  1. 38 25
      src/std/string.c

+ 38 - 25
src/std/string.c

@@ -245,40 +245,39 @@ HL_PRIM char *hl_to_utf8( const uchar *bytes ) {
 	return (char*)hl_utf16_to_utf8((vbyte*)bytes, 0, &size);
 }
 
+static void hl_buffer_hex( hl_buffer *b, int c ) {
+	static const uchar *hex = USTR("0123456789ABCDEF");
+	hl_buffer_char(b,'%');
+	hl_buffer_char(b,hex[c>>4]);
+	hl_buffer_char(b,hex[c&0xF]);
+}
+
 HL_PRIM vbyte *hl_url_encode( vbyte *str, int *len ) {
 	hl_buffer *b = hl_alloc_buffer();
 	uchar *cstr = (uchar*)str;
+	unsigned int sur;
 	while( true ) {
-		static const uchar *hex = USTR("0123456789ABCDEF");
 		unsigned int c = (unsigned)*cstr++;
 		if( c == 0 ) break;
 		if( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c == '.' )
 			hl_buffer_char(b,(uchar)c);
 		else {
-			hl_buffer_char(b,'%');
 			if( c < 0x80 ) {
-				hl_buffer_char(b,hex[c>>4]);
-				hl_buffer_char(b,hex[c&0xF]);
+				hl_buffer_hex(b,c);
 			} else if( c < 0x800 ) {
-				unsigned int c1 = 0xC0|(c>>6);
-				unsigned int c2 = 0x80|(c&63);
-				hl_buffer_char(b,hex[c1>>4]);
-				hl_buffer_char(b,hex[c1&0xF]);
-				hl_buffer_char(b,'%');
-				hl_buffer_char(b,hex[c2>>4]);
-				hl_buffer_char(b,hex[c2&0xF]);
-			} else {
-				unsigned int c1 = 0xE0|(c>>12);
-				unsigned int c2 = 0x80|((c>>6)&63);
-				unsigned int c3 = 0x80|(c&63);
-				hl_buffer_char(b,hex[c1>>4]);
-				hl_buffer_char(b,hex[c1&0xF]);
-				hl_buffer_char(b,'%');
-				hl_buffer_char(b,hex[c2>>4]);
-				hl_buffer_char(b,hex[c2&0xF]);
-				hl_buffer_char(b,'%');
-				hl_buffer_char(b,hex[c3>>4]);
-				hl_buffer_char(b,hex[c3&0xF]);
+				hl_buffer_hex(b, 0xC0|(c>>6));
+				hl_buffer_hex(b, 0x80|(c&63));
+			} else if( c >= 0xD800 && c <= 0xDBFF && (sur = (unsigned)*cstr) && sur >= 0xDC00 && sur < 0xDFFF ) {
+				cstr++;
+				c = ((((int)c - 0xD800) << 10) | ((int)sur - 0xDC00)) + 0x10000;
+				hl_buffer_hex(b, 0xF0|(c>>18));
+				hl_buffer_hex(b, 0x80|((c >> 12) & 63));
+				hl_buffer_hex(b, 0x80|((c >> 6) & 63));
+				hl_buffer_hex(b, 0x80|(c & 63));
+			} else{
+				hl_buffer_hex(b, 0xE0|(c>>12));
+				hl_buffer_hex(b, 0x80|((c>>6)&63));
+				hl_buffer_hex(b, 0x80|(c&63));
 			}
 		}
 	}
@@ -340,8 +339,22 @@ HL_PRIM vbyte *hl_url_decode( vbyte *str, int *len ) {
 				p3 = decode_hex(&cstr);
 				if( p3 < 0 ) break;
 				c = ((p1 & 0x1F) << 12) | ((p2 & 0x7F) << 6) | (p3 & 0x7F);
-			} else
-				hl_error("TODO");
+			} else {			
+				int k;
+				uchar p2, p3, p4;
+				if( *cstr++ != '%' ) break;
+				p2 = decode_hex(&cstr);
+				if( p2 < 0 ) break;
+				if( *cstr++ != '%' ) break;
+				p3 = decode_hex(&cstr);
+				if( p3 < 0 ) break;
+				if( *cstr++ != '%' ) break;
+				p4 = decode_hex(&cstr);
+				if( p4 < 0 ) break;
+				k = ((p1 & 0x0F) << 18) | ((p2 & 0x7F) << 12) | ((p3 & 0x7F) << 6) | (p4 & 0x7F);
+				hl_buffer_char(b,(uchar)((k >> 10) + 0xD7C0));
+				c = (uchar)((k & 0x3FF) | 0xDC00);
+			}
 		}
 		hl_buffer_char(b,c);
 	}