| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- /*
- AngelCode Scripting Library
- Copyright (c) 2003-2017 Andreas Jonsson
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any
- damages arising from the use of this software.
- Permission is granted to anyone to use this software for any
- purpose, including commercial applications, and to alter it and
- redistribute it freely, subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you
- must not claim that you wrote the original software. If you use
- this software in a product, an acknowledgment in the product
- documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and
- must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source
- distribution.
- The original version of this library can be located at:
- http://www.angelcode.com/angelscript/
- Andreas Jonsson
- [email protected]
- */
- #include "as_config.h"
- #include <string.h> // some compilers declare memcpy() here
- #include <math.h> // pow()
- #if !defined(AS_NO_MEMORY_H)
- #include <memory.h>
- #endif
- #include "as_string.h"
- #include "as_string_util.h"
- BEGIN_AS_NAMESPACE
- int asCompareStrings(const char *str1, size_t len1, const char *str2, size_t len2)
- {
- if( len1 == 0 )
- {
- if( str2 == 0 || len2 == 0 ) return 0; // Equal
- return 1; // The other string is larger than this
- }
- if( str2 == 0 )
- {
- if( len1 == 0 )
- return 0; // Equal
- return -1; // The other string is smaller than this
- }
- if( len2 < len1 )
- {
- int result = memcmp(str1, str2, len2);
- if( result == 0 ) return -1; // The other string is smaller than this
- return result;
- }
- int result = memcmp(str1, str2, len1);
- if( result == 0 && len1 < len2 ) return 1; // The other string is larger than this
- return result;
- }
- double asStringScanDouble(const char *string, size_t *numScanned)
- {
- // I decided to do my own implementation of strtod() because this function
- // doesn't seem to be present on all systems. iOS 5 for example doesn't appear
- // to include the function in the standard lib.
-
- // Another reason is that the standard implementation of strtod() is dependent
- // on the locale on some systems, i.e. it may use comma instead of dot for
- // the decimal indicator. This can be avoided by forcing the locale to "C" with
- // setlocale(), but this is another thing that is highly platform dependent.
- double value = 0;
- double fraction = 0.1;
- int exponent = 0;
- bool negativeExponent = false;
- int c = 0;
- // The tokenizer separates the sign from the number in
- // two tokens so we'll never have a sign to parse here
- // Parse the integer value
- for( ;; )
- {
- if( string[c] >= '0' && string[c] <= '9' )
- value = value*10 + double(string[c] - '0');
- else
- break;
- c++;
- }
- if( string[c] == '.' )
- {
- c++;
- // Parse the fraction
- for( ;; )
- {
- if( string[c] >= '0' && string[c] <= '9' )
- value += fraction * double(string[c] - '0');
- else
- break;
- c++;
- fraction *= 0.1;
- }
- }
- if( string[c] == 'e' || string[c] == 'E' )
- {
- c++;
- // Parse the sign of the exponent
- if( string[c] == '-' )
- {
- negativeExponent = true;
- c++;
- }
- else if( string[c] == '+' )
- c++;
- // Parse the exponent value
- for( ;; )
- {
- if( string[c] >= '0' && string[c] <= '9' )
- exponent = exponent*10 + int(string[c] - '0');
- else
- break;
- c++;
- }
- }
- if( exponent )
- {
- if( negativeExponent )
- exponent = -exponent;
- value *= pow(10.0, exponent);
- }
- if( numScanned )
- *numScanned = c;
- return value;
- }
- // Converts a character to the decimal number based on the radix
- // Returns -1 if the character is not valid for the radix
- static int asCharToNbr(char ch, int radix)
- {
- if( ch >= '0' && ch <= '9' ) return ((ch -= '0') < radix ? ch : -1);
- if( ch >= 'A' && ch <= 'Z' ) return ((ch -= 'A'-10) < radix ? ch : -1);
- if( ch >= 'a' && ch <= 'z' ) return ((ch -= 'a'-10) < radix ? ch : -1);
- return -1;
- }
- // If base is 0 the string should be prefixed by 0x, 0d, 0o, or 0b to allow the function to automatically determine the radix
- asQWORD asStringScanUInt64(const char *string, int base, size_t *numScanned, bool *overflow)
- {
- asASSERT(base == 10 || base == 16 || base == 0);
- if (overflow)
- *overflow = false;
- const char *end = string;
- static const asQWORD QWORD_MAX = (~asQWORD(0));
- asQWORD res = 0;
- if( base == 10 )
- {
- while( *end >= '0' && *end <= '9' )
- {
- if( overflow && ((res > QWORD_MAX / 10) || ((asUINT(*end - '0') > (QWORD_MAX - (QWORD_MAX / 10) * 10)) && res == QWORD_MAX / 10)) )
- *overflow = true;
- res *= 10;
- res += *end++ - '0';
- }
- }
- else
- {
- if( base == 0 && string[0] == '0')
- {
- // Determine the radix from the prefix
- switch( string[1] )
- {
- case 'b': case 'B': base = 2; break;
- case 'o': case 'O': base = 8; break;
- case 'd': case 'D': base = 10; break;
- case 'x': case 'X': base = 16; break;
- }
- end += 2;
- }
- asASSERT( base );
- if( base )
- {
- for (int nbr; (nbr = asCharToNbr(*end, base)) >= 0; end++)
- {
- if (overflow && ((res > QWORD_MAX / base) || ((asUINT(nbr) > (QWORD_MAX - (QWORD_MAX / base) * base)) && res == QWORD_MAX / base)) )
- *overflow = true;
- res = res * base + nbr;
- }
- }
- }
- if( numScanned )
- *numScanned = end - string;
- return res;
- }
- //
- // The function will encode the unicode code point into the outEncodedBuffer, and then
- // return the length of the encoded value. If the input value is not a valid unicode code
- // point, then the function will return -1.
- //
- // This function is taken from the AngelCode ToolBox.
- //
- int asStringEncodeUTF8(unsigned int value, char *outEncodedBuffer)
- {
- unsigned char *buf = (unsigned char*)outEncodedBuffer;
- int length = -1;
- if( value <= 0x7F )
- {
- buf[0] = static_cast<unsigned char>(value);
- return 1;
- }
- else if( value >= 0x80 && value <= 0x7FF )
- {
- // Encode it with 2 characters
- buf[0] = static_cast<unsigned char>(0xC0 + (value >> 6));
- length = 2;
- }
- else if( (value >= 0x800 && value <= 0xD7FF) || (value >= 0xE000 && value <= 0xFFFF) )
- {
- // Note: Values 0xD800 to 0xDFFF are not valid unicode characters
- buf[0] = static_cast<unsigned char>(0xE0 + (value >> 12));
- length = 3;
- }
- else if( value >= 0x10000 && value <= 0x10FFFF )
- {
- buf[0] = static_cast<unsigned char>(0xF0 + (value >> 18));
- length = 4;
- }
- int n = length-1;
- for( ; n > 0; n-- )
- {
- buf[n] = static_cast<unsigned char>(0x80 + (value & 0x3F));
- value >>= 6;
- }
- return length;
- }
- //
- // The function will decode an UTF8 character and return the unicode code point.
- // outLength will receive the number of bytes that were decoded.
- //
- // This function is taken from the AngelCode ToolBox.
- //
- int asStringDecodeUTF8(const char *encodedBuffer, unsigned int *outLength)
- {
- const unsigned char *buf = (const unsigned char*)encodedBuffer;
-
- int value = 0;
- int length = -1;
- unsigned char byte = buf[0];
- if( (byte & 0x80) == 0 )
- {
- // This is the only byte
- if( outLength ) *outLength = 1;
- return byte;
- }
- else if( (byte & 0xE0) == 0xC0 )
- {
- // There is one more byte
- value = int(byte & 0x1F);
- length = 2;
- // The value at this moment must not be less than 2, because
- // that should have been encoded with one byte only.
- if( value < 2 )
- length = -1;
- }
- else if( (byte & 0xF0) == 0xE0 )
- {
- // There are two more bytes
- value = int(byte & 0x0F);
- length = 3;
- }
- else if( (byte & 0xF8) == 0xF0 )
- {
- // There are three more bytes
- value = int(byte & 0x07);
- length = 4;
- }
- int n = 1;
- for( ; n < length; n++ )
- {
- byte = buf[n];
- if( (byte & 0xC0) == 0x80 )
- value = (value << 6) + int(byte & 0x3F);
- else
- break;
- }
- if( n == length )
- {
- if( outLength ) *outLength = (unsigned)length;
- return value;
- }
- // The byte sequence isn't a valid UTF-8 byte sequence.
- return -1;
- }
- //
- // The function will encode the unicode code point into the outEncodedBuffer, and then
- // return the length of the encoded value. If the input value is not a valid unicode code
- // point, then the function will return -1.
- //
- // This function is taken from the AngelCode ToolBox.
- //
- int asStringEncodeUTF16(unsigned int value, char *outEncodedBuffer)
- {
- if( value < 0x10000 )
- {
- #ifndef AS_BIG_ENDIAN
- outEncodedBuffer[0] = (value & 0xFF);
- outEncodedBuffer[1] = ((value >> 8) & 0xFF);
- #else
- outEncodedBuffer[1] = (value & 0xFF);
- outEncodedBuffer[0] = ((value >> 8) & 0xFF);
- #endif
- return 2;
- }
- else
- {
- value -= 0x10000;
- int surrogate1 = ((value >> 10) & 0x3FF) + 0xD800;
- int surrogate2 = (value & 0x3FF) + 0xDC00;
- #ifndef AS_BIG_ENDIAN
- outEncodedBuffer[0] = (surrogate1 & 0xFF);
- outEncodedBuffer[1] = ((surrogate1 >> 8) & 0xFF);
- outEncodedBuffer[2] = (surrogate2 & 0xFF);
- outEncodedBuffer[3] = ((surrogate2 >> 8) & 0xFF);
- #else
- outEncodedBuffer[1] = (surrogate1 & 0xFF);
- outEncodedBuffer[0] = ((surrogate1 >> 8) & 0xFF);
- outEncodedBuffer[3] = (surrogate2 & 0xFF);
- outEncodedBuffer[2] = ((surrogate2 >> 8) & 0xFF);
- #endif
- return 4;
- }
- }
- END_AS_NAMESPACE
|