Browse Source

Add some UTF-8 functionality to SQString

mingodad 4 years ago
parent
commit
2972d252b2

+ 1 - 0
SquiLu/include/sqapi.h

@@ -90,6 +90,7 @@ SQUIRREL_API_FUNC(SQRESULT, tobool, (HSQUIRRELVM v, SQInteger idx))
 SQUIRREL_API_FUNC(SQRESULT, tointeger, (HSQUIRRELVM v, SQInteger idx))
 SQUIRREL_API_FUNC(SQRESULT, tointeger, (HSQUIRRELVM v, SQInteger idx))
 SQUIRREL_API_FUNC(SQRESULT, tofloat, (HSQUIRRELVM v, SQInteger idx))
 SQUIRREL_API_FUNC(SQRESULT, tofloat, (HSQUIRRELVM v, SQInteger idx))
 SQUIRREL_API_FUNC(SQRESULT, getstring, (HSQUIRRELVM v,SQInteger idx,const SQChar **c))
 SQUIRREL_API_FUNC(SQRESULT, getstring, (HSQUIRRELVM v,SQInteger idx,const SQChar **c))
+SQUIRREL_API_FUNC(SQRESULT, str_as_utf8, (HSQUIRRELVM v,SQInteger idx))
 SQUIRREL_API_FUNC(SQRESULT, getstr_and_size, (HSQUIRRELVM v,SQInteger idx,const SQChar **c, SQInteger *size))
 SQUIRREL_API_FUNC(SQRESULT, getstr_and_size, (HSQUIRRELVM v,SQInteger idx,const SQChar **c, SQInteger *size))
 SQUIRREL_API_FUNC(SQRESULT, getinteger, (HSQUIRRELVM v,SQInteger idx,SQInteger *i))
 SQUIRREL_API_FUNC(SQRESULT, getinteger, (HSQUIRRELVM v,SQInteger idx,SQInteger *i))
 SQUIRREL_API_FUNC(SQRESULT, getinteger_ptr, (HSQUIRRELVM v,SQInteger idx,SQInteger **i))
 SQUIRREL_API_FUNC(SQRESULT, getinteger_ptr, (HSQUIRRELVM v,SQInteger idx,SQInteger **i))

+ 4 - 0
SquiLu/include/squirrel.h

@@ -110,6 +110,7 @@ struct SQOuter;
 #define _RT_FLOAT			0x00000004
 #define _RT_FLOAT			0x00000004
 #define _RT_BOOL			0x00000008
 #define _RT_BOOL			0x00000008
 #define _RT_STRING			0x00000010
 #define _RT_STRING			0x00000010
+#define _RT_STRING_UTF8		0x00100010
 #define _RT_TABLE			0x00000020
 #define _RT_TABLE			0x00000020
 #define _RT_ARRAY			0x00000040
 #define _RT_ARRAY			0x00000040
 #define _RT_USERDATA		0x00000080
 #define _RT_USERDATA		0x00000080
@@ -130,6 +131,7 @@ typedef enum tagSQObjectType{
 	OT_FLOAT =			(_RT_FLOAT|SQOBJECT_NUMERIC|SQOBJECT_CANBEFALSE),
 	OT_FLOAT =			(_RT_FLOAT|SQOBJECT_NUMERIC|SQOBJECT_CANBEFALSE),
 	OT_BOOL =			(_RT_BOOL|SQOBJECT_CANBEFALSE),
 	OT_BOOL =			(_RT_BOOL|SQOBJECT_CANBEFALSE),
 	OT_STRING =			(_RT_STRING|SQOBJECT_REF_COUNTED),
 	OT_STRING =			(_RT_STRING|SQOBJECT_REF_COUNTED),
+	OT_STRING_UTF8 =	(_RT_STRING_UTF8|SQOBJECT_REF_COUNTED),
 	OT_TABLE =			(_RT_TABLE|SQOBJECT_REF_COUNTED|SQOBJECT_DELEGABLE),
 	OT_TABLE =			(_RT_TABLE|SQOBJECT_REF_COUNTED|SQOBJECT_DELEGABLE),
 	OT_ARRAY =			(_RT_ARRAY|SQOBJECT_REF_COUNTED),
 	OT_ARRAY =			(_RT_ARRAY|SQOBJECT_REF_COUNTED),
 	OT_USERDATA =		(_RT_USERDATA|SQOBJECT_REF_COUNTED|SQOBJECT_DELEGABLE),
 	OT_USERDATA =		(_RT_USERDATA|SQOBJECT_REF_COUNTED|SQOBJECT_DELEGABLE),
@@ -164,6 +166,7 @@ typedef union tagSQObjectValue
 	struct SQGenerator *pGenerator;
 	struct SQGenerator *pGenerator;
 	struct SQNativeClosure *pNativeClosure;
 	struct SQNativeClosure *pNativeClosure;
 	struct SQString *pString;
 	struct SQString *pString;
+	struct SQStringUtf8 *pStrUtf8;
 	struct SQUserData *pUserData;
 	struct SQUserData *pUserData;
 	SQInteger nInteger;
 	SQInteger nInteger;
 	SQFloat fFloat;
 	SQFloat fFloat;
@@ -251,6 +254,7 @@ typedef struct {
 #define sq_isgenerator(o) ((o)._type==OT_GENERATOR)
 #define sq_isgenerator(o) ((o)._type==OT_GENERATOR)
 #define sq_isnativeclosure(o) ((o)._type==OT_NATIVECLOSURE)
 #define sq_isnativeclosure(o) ((o)._type==OT_NATIVECLOSURE)
 #define sq_isstring(o) ((o)._type==OT_STRING)
 #define sq_isstring(o) ((o)._type==OT_STRING)
+#define sq_isstringutf8(o) ((o)._type==OT_STRING_UTF8)
 #define sq_isinteger(o) ((o)._type==OT_INTEGER)
 #define sq_isinteger(o) ((o)._type==OT_INTEGER)
 #define sq_isfloat(o) ((o)._type==OT_FLOAT)
 #define sq_isfloat(o) ((o)._type==OT_FLOAT)
 #define sq_isuserpointer(o) ((o)._type==OT_USERPOINTER)
 #define sq_isuserpointer(o) ((o)._type==OT_USERPOINTER)

+ 1 - 0
SquiLu/sqstdlib/sqstdaux.cpp

@@ -58,6 +58,7 @@ void sqstd_printcallstack(HSQUIRRELVM v)
 					pf(v,_SC("[%s] USERPOINTER\n"),name);
 					pf(v,_SC("[%s] USERPOINTER\n"),name);
 					break;
 					break;
 				case OT_STRING:
 				case OT_STRING:
+				case OT_STRING_UTF8:
 					sq_getstring(v,-1,&s);
 					sq_getstring(v,-1,&s);
 					if(sq_getsize(v, -1) > max_str_size)
 					if(sq_getsize(v, -1) > max_str_size)
 					{
 					{

+ 34 - 0
SquiLu/sqstdlib/sqstdstream.cpp

@@ -345,6 +345,40 @@ SQInteger _stream_writen(HSQUIRRELVM v)
 		self->Write(&d, sizeof(double));
 		self->Write(&d, sizeof(double));
 			  }
 			  }
 		break;
 		break;
+	case 'u': { //utf8
+		SQInteger ch;
+		char c;
+		sq_getinteger(v, 2, &ch);
+		if (ch < 0x80) {
+		    c = (char)ch;
+		    self->Write(&c, sizeof(char));
+		}
+		else if (ch < 0x800) {
+		    c = (char)((ch >> 6) | 0xC0);
+		    self->Write(&c, sizeof(char));
+		    c = (char)((ch & 0x3F) | 0x80);
+		    self->Write(&c, sizeof(char));
+		}
+		else if (ch < 0x10000) {
+		    c = (char)((ch >> 12) | 0xE0);
+		    self->Write(&c, sizeof(char));
+		    c = (char)(((ch >> 6) & 0x3F) | 0x80);
+		    self->Write(&c, sizeof(char));
+		    c = (char)((ch & 0x3F) | 0x80);
+		    self->Write(&c, sizeof(char));
+		}
+		else if (ch < 0x110000) {
+		    c = (char)((ch >> 18) | 0xF0);
+		    self->Write(&c, sizeof(char));
+		    c = (char)(((ch >> 12) & 0x3F) | 0x80);
+		    self->Write(&c, sizeof(char));
+		    c = (char)(((ch >> 6) & 0x3F) | 0x80);
+		    self->Write(&c, sizeof(char));
+		    c = (char)((ch & 0x3F) | 0x80);
+		    self->Write(&c, sizeof(char));
+		}
+		}
+		break;
 	default:
 	default:
 		return sq_throwerror(v, _SC("invalid format"));
 		return sq_throwerror(v, _SC("invalid format"));
 	}
 	}

+ 15 - 3
SquiLu/squirrel/sqapi.cpp

@@ -20,6 +20,7 @@ static void sq_raise_type_error(HSQUIRRELVM v, SQObjectType type,SQObjectPtr &o)
     v->Raise_Error(_SC("wrong argument type, expected '%s' got '%.50s'"),IdType2Name(type),_stringval(oval));
     v->Raise_Error(_SC("wrong argument type, expected '%s' got '%.50s'"),IdType2Name(type),_stringval(oval));
 }
 }
 #define _CHECK_OBJ_TYPE(v,otype,o) if(sq_type(o) != otype) {sq_raise_type_error(v, otype, o); return SQ_ERROR;}
 #define _CHECK_OBJ_TYPE(v,otype,o) if(sq_type(o) != otype) {sq_raise_type_error(v, otype, o); return SQ_ERROR;}
+#define _CHECK_OBJ_TYPE_STRING(v,o) if(!((sq_type(o) == OT_STRING) || (sq_type(o) == OT_STRING_UTF8))) {sq_raise_type_error(v, OT_STRING, o); return SQ_ERROR;}
 
 
 static bool sq_aux_gettypedarg(HSQUIRRELVM v,SQInteger idx,SQObjectType type,SQObjectPtr **o)
 static bool sq_aux_gettypedarg(HSQUIRRELVM v,SQInteger idx,SQObjectType type,SQObjectPtr **o)
 {
 {
@@ -240,7 +241,7 @@ SQUnsignedInteger sq_getvmrefcount(HSQUIRRELVM SQ_UNUSED_ARG(v), const HSQOBJECT
 
 
 const SQChar *sq_objtostring(const HSQOBJECT *o)
 const SQChar *sq_objtostring(const HSQOBJECT *o)
 {
 {
-	if(sq_type(*o) == OT_STRING) {
+	if(sq_type(*o) & _RT_STRING) {
 		return _stringval(*o);
 		return _stringval(*o);
 	}
 	}
 	return NULL;
 	return NULL;
@@ -1010,10 +1011,19 @@ SQRESULT sq_getbool(HSQUIRRELVM v,SQInteger idx,SQBool *b)
 	return SQ_ERROR;
 	return SQ_ERROR;
 }
 }
 
 
+SQRESULT sq_str_as_utf8(HSQUIRRELVM v,SQInteger idx)
+{
+	SQObjectPtr &o = stack_get(v,idx);
+	_CHECK_OBJ_TYPE_STRING(v, o);
+	sq_type(o) = OT_STRING_UTF8;
+	v->Push(o);
+	return SQ_OK;
+}
+
 SQRESULT sq_getstring(HSQUIRRELVM v,SQInteger idx,const SQChar **c)
 SQRESULT sq_getstring(HSQUIRRELVM v,SQInteger idx,const SQChar **c)
 {
 {
 	SQObjectPtr &o = stack_get(v,idx);
 	SQObjectPtr &o = stack_get(v,idx);
-	_CHECK_OBJ_TYPE(v, OT_STRING, o);
+	_CHECK_OBJ_TYPE_STRING(v, o);
 	*c = _stringval(o);
 	*c = _stringval(o);
 	return SQ_OK;
 	return SQ_OK;
 }
 }
@@ -1021,7 +1031,7 @@ SQRESULT sq_getstring(HSQUIRRELVM v,SQInteger idx,const SQChar **c)
 SQRESULT sq_getstr_and_size(HSQUIRRELVM v,SQInteger idx,const SQChar **c, SQInteger *size)
 SQRESULT sq_getstr_and_size(HSQUIRRELVM v,SQInteger idx,const SQChar **c, SQInteger *size)
 {
 {
 	SQObjectPtr &o = stack_get(v,idx);
 	SQObjectPtr &o = stack_get(v,idx);
-	_CHECK_OBJ_TYPE(v, OT_STRING, o);
+	_CHECK_OBJ_TYPE_STRING(v, o);
 	*c = _stringval(o);
 	*c = _stringval(o);
 	*size = _string(o)->_len;
 	*size = _string(o)->_len;
 	return SQ_OK;
 	return SQ_OK;
@@ -1051,6 +1061,7 @@ SQInteger sq_getsize(HSQUIRRELVM v, SQInteger idx)
 	SQObjectPtr &o = stack_get(v, idx);
 	SQObjectPtr &o = stack_get(v, idx);
 	SQObjectType type = sq_type(o);
 	SQObjectType type = sq_type(o);
 	switch(type) {
 	switch(type) {
+ 	case OT_STRING_UTF8:
 	case OT_STRING:		return _string(o)->_len;
 	case OT_STRING:		return _string(o)->_len;
 	case OT_TABLE:		return _table(o)->CountUsed();
 	case OT_TABLE:		return _table(o)->CountUsed();
 	case OT_ARRAY:		return _array(o)->Size();
 	case OT_ARRAY:		return _array(o)->Size();
@@ -2022,6 +2033,7 @@ SQRESULT sq_getdefaultdelegate(HSQUIRRELVM v,SQObjectType t)
 	switch(t) {
 	switch(t) {
 	case OT_TABLE: v->Push(ss->_table_default_delegate); break;
 	case OT_TABLE: v->Push(ss->_table_default_delegate); break;
 	case OT_ARRAY: v->Push(ss->_array_default_delegate); break;
 	case OT_ARRAY: v->Push(ss->_array_default_delegate); break;
+	case OT_STRING_UTF8:
 	case OT_STRING: v->Push(ss->_string_default_delegate); break;
 	case OT_STRING: v->Push(ss->_string_default_delegate); break;
 	case OT_INTEGER: case OT_FLOAT: v->Push(ss->_number_default_delegate); break;
 	case OT_INTEGER: case OT_FLOAT: v->Push(ss->_number_default_delegate); break;
 	case OT_GENERATOR: v->Push(ss->_generator_default_delegate); break;
 	case OT_GENERATOR: v->Push(ss->_generator_default_delegate); break;

+ 12 - 4
SquiLu/squirrel/sqbaselib.cpp

@@ -754,6 +754,7 @@ static SQRESULT default_delegate_tofloat(HSQUIRRELVM v)
 {
 {
 	SQObjectPtr &o=stack_get(v,1);
 	SQObjectPtr &o=stack_get(v,1);
 	switch(sq_type(o)){
 	switch(sq_type(o)){
+	case OT_STRING_UTF8:
 	case OT_STRING:{
 	case OT_STRING:{
 		SQObjectPtr res;
 		SQObjectPtr res;
 		if(str2num(_stringval(o),res)){
 		if(str2num(_stringval(o),res)){
@@ -778,6 +779,7 @@ static SQRESULT default_delegate_tointeger(HSQUIRRELVM v)
 {
 {
 	SQObjectPtr &o=stack_get(v,1);
 	SQObjectPtr &o=stack_get(v,1);
 	switch(sq_type(o)){
 	switch(sq_type(o)){
+	case OT_STRING_UTF8:
 	case OT_STRING:{
 	case OT_STRING:{
 		SQObjectPtr res;
 		SQObjectPtr res;
 		SQInteger base;
 		SQInteger base;
@@ -1386,6 +1388,7 @@ static SQRESULT array_concat0 (HSQUIRRELVM v, int allowAll) {
       SQObjectPtr str;
       SQObjectPtr str;
       arr->_get2(i, o);
       arr->_get2(i, o);
       switch(sq_type(o)){
       switch(sq_type(o)){
+          case OT_STRING_UTF8:
           case OT_STRING:
           case OT_STRING:
               break;
               break;
           case OT_INTEGER:
           case OT_INTEGER:
@@ -1405,7 +1408,7 @@ static SQRESULT array_concat0 (HSQUIRRELVM v, int allowAll) {
 
 
       const SQChar *value;
       const SQChar *value;
       SQInteger value_size;
       SQInteger value_size;
-      if(sq_type(o) == OT_STRING) {
+      if(sq_type(o) & _RT_STRING) {
 		value = _stringval(o);
 		value = _stringval(o);
 		value_size = _string(o)->_len;
 		value_size = _string(o)->_len;
       }
       }
@@ -1686,7 +1689,7 @@ static SQRESULT string_gsub(HSQUIRRELVM v)
     SQ_GET_STRING(v, 2, pattern);
     SQ_GET_STRING(v, 2, pattern);
     SQ_OPT_INTEGER(v, 4, max_sub, 0);
     SQ_OPT_INTEGER(v, 4, max_sub, 0);
     SQObjectType rtype = sq_gettype(v, 3);
     SQObjectType rtype = sq_gettype(v, 3);
-    if(rtype == OT_STRING){
+    if(rtype & _RT_STRING){
         SQ_GET_STRING(v, 3, replacement);
         SQ_GET_STRING(v, 3, replacement);
         lua_char_buffer_st *buf = lua_str_gsub (src, src_size, pattern, pattern_size,
         lua_char_buffer_st *buf = lua_str_gsub (src, src_size, pattern, pattern_size,
                               replacement, replacement_size, max_sub, &error_ptr, 0, 0);
                               replacement, replacement_size, max_sub, &error_ptr, 0, 0);
@@ -2177,11 +2180,15 @@ static int utf8Len(const unsigned char *s, size_t length)
 static SQRESULT string_utf8Len(HSQUIRRELVM v) {
 static SQRESULT string_utf8Len(HSQUIRRELVM v) {
     SQ_FUNC_VARS_NO_TOP(v);
     SQ_FUNC_VARS_NO_TOP(v);
     SQ_GET_STRING(v, 1, src);
     SQ_GET_STRING(v, 1, src);
-
     sq_pushinteger(v, utf8Len((const unsigned char*)src, src_size));
     sq_pushinteger(v, utf8Len((const unsigned char*)src, src_size));
     return 1;
     return 1;
 }
 }
 
 
+static SQRESULT string_asutf8(HSQUIRRELVM v) {
+    if(sq_str_as_utf8(v, 1) != SQ_OK) return SQ_ERROR;
+    return 1;
+}
+
 static bool isValidUtf8(const unsigned char *s, size_t length)
 static bool isValidUtf8(const unsigned char *s, size_t length)
 {
 {
     return utf8Len(s, length) >= 0;
     return utf8Len(s, length) >= 0;
@@ -2320,7 +2327,7 @@ static SQRESULT string_split_csv(HSQUIRRELVM v) {
 static SQRESULT string_split(HSQUIRRELVM v) {
 static SQRESULT string_split(HSQUIRRELVM v) {
     SQ_FUNC_VARS_NO_TOP(v);
     SQ_FUNC_VARS_NO_TOP(v);
     SQObjectType rtype = sq_gettype(v, 2);
     SQObjectType rtype = sq_gettype(v, 2);
-    if(rtype == OT_STRING)
+    if(rtype & _RT_STRING)
     {
     {
         const SQChar *str,*seps;
         const SQChar *str,*seps;
         SQChar *stemp,*tok;
         SQChar *stemp,*tok;
@@ -2688,6 +2695,7 @@ SQRegFunction SQSharedState::_string_default_delegate_funcz[]={
 	{_SC("edit_distance"),string_edit_distance,-2, _SC("ssi"), false},
 	{_SC("edit_distance"),string_edit_distance,-2, _SC("ssi"), false},
 	{_SC("mod_97_10"),string_mod_97_10,1, _SC("s"), false},
 	{_SC("mod_97_10"),string_mod_97_10,1, _SC("s"), false},
 	{_SC("iso88959_to_utf8"),string_iso88959_to_utf8,1, _SC("s"), false},
 	{_SC("iso88959_to_utf8"),string_iso88959_to_utf8,1, _SC("s"), false},
+	{_SC("asutf8"),string_asutf8,1, _SC("s"), false},
 	{_SC("isvalidutf8"),string_isvalidutf8,1, _SC("s"), false},
 	{_SC("isvalidutf8"),string_isvalidutf8,1, _SC("s"), false},
 	{_SC("utf8Len"),string_utf8Len,1, _SC("s"), false},
 	{_SC("utf8Len"),string_utf8Len,1, _SC("s"), false},
 	{_SC("longestcommonsubstr"),string_longestcommonsubstr,2, _SC("ss"), false},
 	{_SC("longestcommonsubstr"),string_longestcommonsubstr,2, _SC("ss"), false},

+ 1 - 0
SquiLu/squirrel/sqdebug.cpp

@@ -74,6 +74,7 @@ void SQVM::Raise_Error(const SQObjectPtr &desc)
 SQString *SQVM::PrintObjVal(const SQObjectPtr &o)
 SQString *SQVM::PrintObjVal(const SQObjectPtr &o)
 {
 {
     switch(sq_type(o)) {
     switch(sq_type(o)) {
+    case OT_STRING_UTF8:
     case OT_STRING: return _string(o);
     case OT_STRING: return _string(o);
     case OT_INTEGER:
     case OT_INTEGER:
         scsprintf(_sp(sq_rsl(NUMBER_MAX_CHAR+1)),sq_rsl(NUMBER_MAX_CHAR), _PRINT_INT_FMT, _integer(o));
         scsprintf(_sp(sq_rsl(NUMBER_MAX_CHAR+1)),sq_rsl(NUMBER_MAX_CHAR), _PRINT_INT_FMT, _integer(o));

+ 1 - 0
SquiLu/squirrel/sqfuncstate.cpp

@@ -27,6 +27,7 @@ SQInstructionDesc g_InstrDesc[]={
 void SQDumpLiteral(SQObjectPtr &o)
 void SQDumpLiteral(SQObjectPtr &o)
 {
 {
 	switch(sq_type(o)){
 	switch(sq_type(o)){
+		case OT_STRING_UTF8:
 		case OT_STRING: {
 		case OT_STRING: {
 		    int i, len, buf_idx = 0;
 		    int i, len, buf_idx = 0;
 		    #define BUF_SIZE 64
 		    #define BUF_SIZE 64

+ 75 - 2
SquiLu/squirrel/sqobject.cpp

@@ -20,6 +20,7 @@ const SQChar *IdType2Name(SQObjectType type)
 	case _RT_INTEGER:return _SC("integer");
 	case _RT_INTEGER:return _SC("integer");
 	case _RT_FLOAT:return _SC("float");
 	case _RT_FLOAT:return _SC("float");
 	case _RT_BOOL:return _SC("bool");
 	case _RT_BOOL:return _SC("bool");
+	case _RT_STRING_UTF8:return _SC("strutf8");
 	case _RT_STRING:return _SC("string");
 	case _RT_STRING:return _SC("string");
 	case _RT_TABLE:return _SC("table");
 	case _RT_TABLE:return _SC("table");
 	case _RT_ARRAY:return _SC("array");
 	case _RT_ARRAY:return _SC("array");
@@ -79,7 +80,7 @@ void SQString::Release()
 SQInteger SQString::Next(const SQObjectPtr &refpos, SQObjectPtr &outkey, SQObjectPtr &outval)
 SQInteger SQString::Next(const SQObjectPtr &refpos, SQObjectPtr &outkey, SQObjectPtr &outval)
 {
 {
 	SQInteger idx = (SQInteger)SQTranslateIndex(refpos);
 	SQInteger idx = (SQInteger)SQTranslateIndex(refpos);
-	while(idx < _len){
+	if(idx < _len){
 		outkey = (SQInteger)idx;
 		outkey = (SQInteger)idx;
 		outval = (SQInteger)((SQUnsignedInteger)_val[idx]);
 		outval = (SQInteger)((SQUnsignedInteger)_val[idx]);
 		//return idx for the next iteration
 		//return idx for the next iteration
@@ -89,6 +90,75 @@ SQInteger SQString::Next(const SQObjectPtr &refpos, SQObjectPtr &outkey, SQObjec
 	return -1;
 	return -1;
 }
 }
 
 
+/*
+** Decode one UTF-8 sequence, returning NULL if byte sequence is
+** invalid.  The array 'limits' stores the minimum value for each
+** sequence length, to check for overlong representations. Its first
+** entry forces an error for non-ascii bytes with no continuation
+** bytes (count == 0).
+*/
+
+#define MAXUNICODE	0x10FFFFu
+
+#define MAXUTF		0x7FFFFFFFu
+
+/*
+** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
+*/
+#if (UINT_MAX >> 30) >= 1
+typedef unsigned int utfint;
+#else
+typedef unsigned long utfint;
+#endif
+
+static int utf8_decode (const char *s, utfint *val, int strict) {
+  static const utfint limits[] =
+        {~(utfint)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u};
+  unsigned int c = (unsigned char)s[0];
+  int count = 0; /* to count number of continuation bytes */
+  utfint res = 0;  /* final result */
+  if (c < 0x80) { /* ascii? */
+    res = c;
+    count = 1;
+  } else {
+    for (; c & 0x40; c <<= 1) {  /* while it needs continuation bytes... */
+      unsigned int cc = (unsigned char)s[++count];  /* read next byte */
+      if ((cc & 0xC0) != 0x80)  /* not a continuation byte? */
+        return -1;  /* invalid byte sequence */
+      res = (res << 6) | (cc & 0x3F);  /* add lower 6 bits from cont. byte */
+    }
+    res |= ((utfint)(c & 0x7F) << (count * 5));  /* add first byte */
+    if (count > 5 || res > MAXUTF || res < limits[count])
+      return -1;  /* invalid byte sequence */
+    //s += count;  /* skip continuation bytes read */
+    ++count; //add the first byte
+  }
+  if (strict) {
+    /* check for invalid code points; too large or surrogates */
+    if (res > MAXUNICODE || (0xD800u <= res && res <= 0xDFFFu))
+      return -1;
+  }
+  if (val) *val = res;
+  return count;  /* +1 to include first byte */
+}
+
+SQInteger SQStringUtf8::Next(const SQObjectPtr &refpos, SQObjectPtr &outkey, SQObjectPtr &outval)
+{
+	SQInteger idx = (SQInteger)SQTranslateIndex(refpos);
+	if(idx < _len){
+		outkey = (SQInteger)idx;
+		const char *s = (const char *)(_val + idx);
+		utfint code;
+		int code_size = utf8_decode(s, &code, 1);
+		if(code_size > 0) {
+            outval = (SQInteger)code;
+            return idx + code_size;
+		}
+	}
+	//nothing to iterate anymore
+	return -1;
+}
+
 SQUnsignedInteger SQTranslateIndex(const SQObjectPtr &idx)
 SQUnsignedInteger SQTranslateIndex(const SQObjectPtr &idx)
 {
 {
 	switch(sq_type(idx)){
 	switch(sq_type(idx)){
@@ -268,7 +338,7 @@ const SQChar* SQFunctionProto::GetLocal(SQVM *vm,SQUnsignedInteger stackbase,SQU
 
 
 SQInteger SQFunctionProto::GetLine(SQInstruction *curr)
 SQInteger SQFunctionProto::GetLine(SQInstruction *curr)
 {
 {
-	SQInteger op = (SQInteger)(curr-_instructions);
+    SQInteger op = (SQInteger)(curr-_instructions);
     SQInteger line=_lineinfos[0]._line;
     SQInteger line=_lineinfos[0]._line;
     SQInteger low = 0;
     SQInteger low = 0;
     SQInteger high = _nlineinfos - 1;
     SQInteger high = _nlineinfos - 1;
@@ -362,6 +432,7 @@ static bool WriteObjectAsCode(HSQUIRRELVM v,SQUserPointer up,SQWRITEFUNC write,S
 	SQChar buf[32];
 	SQChar buf[32];
 	SQInteger sz;
 	SQInteger sz;
 	switch(sq_type(o)){
 	switch(sq_type(o)){
+	case OT_STRING_UTF8:
 	case OT_STRING:{
 	case OT_STRING:{
             SQInteger str_size = _string(o)->_len;
             SQInteger str_size = _string(o)->_len;
             if(str_size){
             if(str_size){
@@ -410,6 +481,7 @@ static bool WriteObject(HSQUIRRELVM v,SQUserPointer up,SQWRITEFUNC write,SQObjec
 	SQUnsignedInteger32 _type = (SQUnsignedInteger32)sq_type(o);
 	SQUnsignedInteger32 _type = (SQUnsignedInteger32)sq_type(o);
 	_CHECK_IO(SafeWrite(v,write,up,&_type,sizeof(_type)));
 	_CHECK_IO(SafeWrite(v,write,up,&_type,sizeof(_type)));
 	switch(sq_type(o)){
 	switch(sq_type(o)){
+	case OT_STRING_UTF8:
 	case OT_STRING:
 	case OT_STRING:
 		_CHECK_IO(SafeWrite(v,write,up,&_string(o)->_len,sizeof(SQInteger)));
 		_CHECK_IO(SafeWrite(v,write,up,&_string(o)->_len,sizeof(SQInteger)));
 		_CHECK_IO(SafeWrite(v,write,up,_stringval(o),rsl(_string(o)->_len)));
 		_CHECK_IO(SafeWrite(v,write,up,_stringval(o),rsl(_string(o)->_len)));
@@ -434,6 +506,7 @@ static bool ReadObject(HSQUIRRELVM v,SQUserPointer up,SQREADFUNC read,SQObjectPt
 	_CHECK_IO(SafeRead(v,read,up,&_type,sizeof(_type)));
 	_CHECK_IO(SafeRead(v,read,up,&_type,sizeof(_type)));
 	SQObjectType t = (SQObjectType)_type;
 	SQObjectType t = (SQObjectType)_type;
 	switch(t){
 	switch(t){
+	case OT_STRING_UTF8:
 	case OT_STRING:{
 	case OT_STRING:{
 		SQInteger len;
 		SQInteger len;
 		_CHECK_IO(SafeRead(v,read,up,&len,sizeof(SQInteger)));
 		_CHECK_IO(SafeRead(v,read,up,&len,sizeof(SQInteger)));

+ 2 - 0
SquiLu/squirrel/sqobject.h

@@ -137,6 +137,7 @@ struct SQObjectPtr;
 #define _integer(obj) ((obj)._unVal.nInteger)
 #define _integer(obj) ((obj)._unVal.nInteger)
 #define _float(obj) ((obj)._unVal.fFloat)
 #define _float(obj) ((obj)._unVal.fFloat)
 #define _string(obj) ((obj)._unVal.pString)
 #define _string(obj) ((obj)._unVal.pString)
+#define _stringutf8(obj) ((obj)._unVal.pStrUtf8)
 #define _table(obj) ((obj)._unVal.pTable)
 #define _table(obj) ((obj)._unVal.pTable)
 #define _array(obj) ((obj)._unVal.pArrayBase)
 #define _array(obj) ((obj)._unVal.pArrayBase)
 #define _closure(obj) ((obj)._unVal.pClosure)
 #define _closure(obj) ((obj)._unVal.pClosure)
@@ -250,6 +251,7 @@ struct SQObjectPtr : public SQObject
 	_REF_TYPE_DECL(OT_OUTER,SQOuter,Outer)
 	_REF_TYPE_DECL(OT_OUTER,SQOuter,Outer)
 	_REF_TYPE_DECL(OT_GENERATOR,SQGenerator,Generator)
 	_REF_TYPE_DECL(OT_GENERATOR,SQGenerator,Generator)
 	_REF_TYPE_DECL(OT_STRING,SQString,String)
 	_REF_TYPE_DECL(OT_STRING,SQString,String)
+	_REF_TYPE_DECL(OT_STRING_UTF8,SQStringUtf8,StrUtf8)
 	_REF_TYPE_DECL(OT_USERDATA,SQUserData,UserData)
 	_REF_TYPE_DECL(OT_USERDATA,SQUserData,UserData)
 	_REF_TYPE_DECL(OT_WEAKREF,SQWeakRef,WeakRef)
 	_REF_TYPE_DECL(OT_WEAKREF,SQWeakRef,WeakRef)
 	_REF_TYPE_DECL(OT_THREAD,SQVM,Thread)
 	_REF_TYPE_DECL(OT_THREAD,SQVM,Thread)

+ 3 - 0
SquiLu/squirrel/sqstring.h

@@ -32,6 +32,9 @@ public:
 	//<FIXME> Padding not accounted
 	//<FIXME> Padding not accounted
 };
 };
 
 
+struct SQStringUtf8 : public SQString {
+    SQInteger Next(const SQObjectPtr &refpos, SQObjectPtr &outkey, SQObjectPtr &outval);
+};
 
 
 
 
 #endif //_SQSTRING_H_
 #endif //_SQSTRING_H_

+ 2 - 1
SquiLu/squirrel/sqtable.h

@@ -15,6 +15,7 @@
 inline SQHash HashObj(const SQObject &key)
 inline SQHash HashObj(const SQObject &key)
 {
 {
 	switch(sq_type(key)) {
 	switch(sq_type(key)) {
+		case OT_STRING_UTF8:
 		case OT_STRING:		return _string(key)->_hash;
 		case OT_STRING:		return _string(key)->_hash;
 		case OT_FLOAT:		return (SQHash)((SQInteger)_float(key));
 		case OT_FLOAT:		return (SQHash)((SQInteger)_float(key));
 		case OT_BOOL: case OT_INTEGER:	return (SQHash)((SQInteger)_integer(key));
 		case OT_BOOL: case OT_INTEGER:	return (SQHash)((SQInteger)_integer(key));
@@ -81,7 +82,7 @@ public:
 		_HashNode *n = &_nodes[SQTABLE_HASH_NUMNODES(hash)];
 		_HashNode *n = &_nodes[SQTABLE_HASH_NUMNODES(hash)];
 		_HashNode *res = NULL;
 		_HashNode *res = NULL;
 		do{
 		do{
-			if(sq_type(n->key) == OT_STRING && (scstrcmp(_stringval(n->key),key) == 0)){
+			if(sq_type(n->key) & _RT_STRING && (scstrcmp(_stringval(n->key),key) == 0)){
 				res = n;
 				res = n;
 				break;
 				break;
 			}
 			}

+ 9 - 0
SquiLu/squirrel/sqvm.cpp

@@ -290,6 +290,7 @@ bool SQVM::ObjCmp(const SQObjectPtr &o1,const SQObjectPtr &o2,SQInteger &result)
 		if(_rawval(o1) == _rawval(o2))_RET_SUCCEED(0);
 		if(_rawval(o1) == _rawval(o2))_RET_SUCCEED(0);
 		SQObjectPtr res;
 		SQObjectPtr res;
 		switch(t1){
 		switch(t1){
+	        case OT_STRING_UTF8:
 		case OT_STRING:
 		case OT_STRING:
 			_RET_SUCCEED(sq_l_strcmp(o1,o2));
 			_RET_SUCCEED(sq_l_strcmp(o1,o2));
 		case OT_INTEGER:
 		case OT_INTEGER:
@@ -364,6 +365,7 @@ bool SQVM::ToString(const SQObjectPtr &o,SQObjectPtr &res)
 {
 {
 	switch(sq_type(o)) {
 	switch(sq_type(o)) {
 	case OT_STRING:
 	case OT_STRING:
+	case OT_STRING_UTF8:
 		res = o;
 		res = o;
 		return true;
 		return true;
 	case OT_FLOAT:
 	case OT_FLOAT:
@@ -612,6 +614,9 @@ bool SQVM::FOREACH_OP(SQObjectPtr &o1,SQObjectPtr &o2,SQObjectPtr
 	case OT_STRING:
 	case OT_STRING:
 		if((nrefidx = _string(o1)->Next(o4, o2, o3)) == -1)_FINISH(exitpos);
 		if((nrefidx = _string(o1)->Next(o4, o2, o3)) == -1)_FINISH(exitpos);
 		o4 = (SQInteger)nrefidx; _FINISH(1);
 		o4 = (SQInteger)nrefidx; _FINISH(1);
+	case OT_STRING_UTF8:
+		if((nrefidx = _stringutf8(o1)->Next(o4, o2, o3)) == -1)_FINISH(exitpos);
+		o4 = (SQInteger)nrefidx; _FINISH(1);
 	case OT_CLASS:
 	case OT_CLASS:
 		if((nrefidx = _class(o1)->Next(o4, o2, o3)) == -1)_FINISH(exitpos);
 		if((nrefidx = _class(o1)->Next(o4, o2, o3)) == -1)_FINISH(exitpos);
 		o4 = (SQInteger)nrefidx; _FINISH(1);
 		o4 = (SQInteger)nrefidx; _FINISH(1);
@@ -1546,6 +1551,7 @@ bool SQVM::Get(const SQObjectPtr &self,const SQObjectPtr &key,SQObjectPtr &dest,
 	case OT_CLASS:
 	case OT_CLASS:
 		if(_class(self)->Get(key,dest)) return true;
 		if(_class(self)->Get(key,dest)) return true;
 		break;
 		break;
+	case OT_STRING_UTF8:
 	case OT_STRING:
 	case OT_STRING:
 		if(sq_isnumeric(key)){
 		if(sq_isnumeric(key)){
 			SQInteger n = tointeger(key);
 			SQInteger n = tointeger(key);
@@ -1588,6 +1594,7 @@ bool SQVM::InvokeDefaultDelegate(const SQObjectPtr &self,const SQObjectPtr &key,
 		case OT_CLASS: ddel = _class_ddel; break;
 		case OT_CLASS: ddel = _class_ddel; break;
 		case OT_TABLE: ddel = _table_ddel; break;
 		case OT_TABLE: ddel = _table_ddel; break;
 		case OT_ARRAY: ddel = _array_ddel; break;
 		case OT_ARRAY: ddel = _array_ddel; break;
+		case OT_STRING_UTF8:
 		case OT_STRING: ddel = _string_ddel; break;
 		case OT_STRING: ddel = _string_ddel; break;
 		case OT_INSTANCE: ddel = _instance_ddel; break;
 		case OT_INSTANCE: ddel = _instance_ddel; break;
 		case OT_INTEGER:case OT_FLOAT:case OT_BOOL: ddel = _number_ddel; break;
 		case OT_INTEGER:case OT_FLOAT:case OT_BOOL: ddel = _number_ddel; break;
@@ -1739,6 +1746,7 @@ cloned_mt:
 	case OT_INTEGER:
 	case OT_INTEGER:
 	case OT_FLOAT:
 	case OT_FLOAT:
 	case OT_STRING:
 	case OT_STRING:
+	case OT_STRING_UTF8:
 		target = self;
 		target = self;
 		return true;
 		return true;
 	default:
 	default:
@@ -2136,6 +2144,7 @@ void SQVM::dumpstack(SQInteger stackbase,bool dumpall)
 		case OT_FLOAT:			scprintf(_SC("FLOAT %.3f"),_float(obj));break;
 		case OT_FLOAT:			scprintf(_SC("FLOAT %.3f"),_float(obj));break;
 		case OT_INTEGER:		scprintf(_SC("INTEGER " _PRINT_INT_FMT),_integer(obj));break;
 		case OT_INTEGER:		scprintf(_SC("INTEGER " _PRINT_INT_FMT),_integer(obj));break;
 		case OT_BOOL:			scprintf(_SC("BOOL %s"),_integer(obj)?"true":"false");break;
 		case OT_BOOL:			scprintf(_SC("BOOL %s"),_integer(obj)?"true":"false");break;
+		case OT_STRING_UTF8:
 		case OT_STRING:			scprintf(_SC("STRING %s"),_stringval(obj));break;
 		case OT_STRING:			scprintf(_SC("STRING %s"),_stringval(obj));break;
 		case OT_NULL:			scprintf(_SC("NULL"));	break;
 		case OT_NULL:			scprintf(_SC("NULL"));	break;
 		case OT_TABLE:			scprintf(_SC("TABLE %p[%p]"),_table(obj),_table(obj)->_delegate);break;
 		case OT_TABLE:			scprintf(_SC("TABLE %p[%p]"),_table(obj),_table(obj)->_delegate);break;