ソースを参照

Unicode fixes (#246)

Aurel 6 年 前
コミット
260b628bad
2 ファイル変更15 行追加5 行削除
  1. 4 4
      src/std/string.c
  2. 11 1
      src/std/ucs2.c

+ 4 - 4
src/std/string.c

@@ -126,10 +126,10 @@ HL_PRIM int hl_from_utf8( uchar *out, int outLen, const char *str ) {
 		} else {
 			c2 = (unsigned)*str++;
 			c3 = (unsigned)*str++;
-			c = ((c & 0x0F) << 18) | ((c2 & 0x7F) << 12) | ((c3 & 0x7F) << 6) | ((*str++) & 0x7F);
+			c = (((c & 0x0F) << 18) | ((c2 & 0x7F) << 12) | ((c3 & 0x7F) << 6) | ((*str++) & 0x7F)) - 0x10000;
 			// surrogate pair
 			if( p++ == outLen ) break;
-			*out++ = (uchar)((c >> 10) + 0xD7C0);
+			*out++ = (uchar)((c >> 10) + 0xD800);
 			*out++ = (uchar)((c & 0x3FF) | 0xDC00);
 			continue;
 		}
@@ -359,8 +359,8 @@ HL_PRIM vbyte *hl_url_decode( vbyte *str, int *len ) {
 				if( *cstr++ != '%' ) break;
 				p4 = decode_hex(&cstr);
 				if( p4 < 0 ) break;
-				k = ((p1 & 0x0F) << 18) | ((p2 & 0x7F) << 12) | ((p3 & 0x7F) << 6) | (p4 & 0x7F);
-				hl_buffer_char(b,(uchar)((k >> 10) + 0xD7C0));
+				k = (((p1 & 0x0F) << 18) | ((p2 & 0x7F) << 12) | ((p3 & 0x7F) << 6) | (p4 & 0x7F)) - 0x10000;
+				hl_buffer_char(b,(uchar)((k >> 10) + 0xD800));
 				c = (uchar)((k & 0x3FF) | 0xDC00);
 			}
 		}

+ 11 - 1
src/std/ucs2.c

@@ -50,7 +50,10 @@ int ustrlen_utf8( const uchar *str ) {
 			size++;
 		else if( c < 0x800 )
 			size += 2;
-		else
+		else if( c >= 0xD800 && c <= 0xDFFF ) {
+			str++;
+			size += 4;
+		} else
 			size += 3;
 	}
 	return size;
@@ -133,6 +136,13 @@ int utostr( char *out, int out_size, const uchar *str ) {
 			if( out + 2 > end ) break;
 			*out++ = (char)(0xC0|(c>>6));
 			*out++ = 0x80|(c&63);
+		} else if( c >= 0xD800 && c <= 0xDFFF ) { // surrogate pair
+			if( out + 4 > end ) break;
+			unsigned int full = (((c - 0xD800) << 10) | ((*str++) - 0xDC00)) + 0x10000;
+			*out++ = (char)(0xF0|(full>>18));
+			*out++ = 0x80|((full>>12)&63);
+			*out++ = 0x80|((full>>6)&63);
+			*out++ = 0x80|(full&63);
 		} else {
 			if( out + 3 > end ) break;
 			*out++ = (char)(0xE0|(c>>12));