|
@@ -1043,7 +1043,7 @@ void DrawTextEx(Font font, const char *text, Vector2 position, float fontSize, f
|
|
{
|
|
{
|
|
// Get next codepoint from byte string and glyph index in font
|
|
// Get next codepoint from byte string and glyph index in font
|
|
int codepointByteCount = 0;
|
|
int codepointByteCount = 0;
|
|
- int codepoint = GetCodepoint(&text[i], &codepointByteCount);
|
|
|
|
|
|
+ int codepoint = GetCodepointNext(&text[i], &codepointByteCount);
|
|
int index = GetGlyphIndex(font, codepoint);
|
|
int index = GetGlyphIndex(font, codepoint);
|
|
|
|
|
|
// NOTE: Normally we exit the decoding sequence as soon as a bad byte is found (and return 0x3f)
|
|
// NOTE: Normally we exit the decoding sequence as soon as a bad byte is found (and return 0x3f)
|
|
@@ -1185,7 +1185,7 @@ Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing
|
|
byteCounter++;
|
|
byteCounter++;
|
|
|
|
|
|
int next = 0;
|
|
int next = 0;
|
|
- letter = GetCodepoint(&text[i], &next);
|
|
|
|
|
|
+ letter = GetCodepointNext(&text[i], &next);
|
|
index = GetGlyphIndex(font, letter);
|
|
index = GetGlyphIndex(font, letter);
|
|
|
|
|
|
// NOTE: normally we exit the decoding sequence as soon as a bad byte is found (and return 0x3f)
|
|
// NOTE: normally we exit the decoding sequence as soon as a bad byte is found (and return 0x3f)
|
|
@@ -1627,7 +1627,7 @@ const char *TextToPascal(const char *text)
|
|
// Encode text codepoint into UTF-8 text
|
|
// Encode text codepoint into UTF-8 text
|
|
// REQUIRES: memcpy()
|
|
// REQUIRES: memcpy()
|
|
// WARNING: Allocated memory must be manually freed
|
|
// WARNING: Allocated memory must be manually freed
|
|
-char *TextCodepointsToUTF8(const int *codepoints, int length)
|
|
|
|
|
|
+char *LoadUTF8(const int *codepoints, int length)
|
|
{
|
|
{
|
|
// We allocate enough memory fo fit all possible codepoints
|
|
// We allocate enough memory fo fit all possible codepoints
|
|
// NOTE: 5 bytes for every codepoint should be enough
|
|
// NOTE: 5 bytes for every codepoint should be enough
|
|
@@ -1650,43 +1650,10 @@ char *TextCodepointsToUTF8(const int *codepoints, int length)
|
|
return text;
|
|
return text;
|
|
}
|
|
}
|
|
|
|
|
|
-// Encode codepoint into utf8 text (char array length returned as parameter)
|
|
|
|
-// NOTE: It uses a static array to store UTF-8 bytes
|
|
|
|
-RLAPI const char *CodepointToUTF8(int codepoint, int *byteSize)
|
|
|
|
|
|
+// Unload UTF-8 text encoded from codepoints array
|
|
|
|
+void UnloadUTF8(char *text)
|
|
{
|
|
{
|
|
- static char utf8[6] = { 0 };
|
|
|
|
- int size = 0; // Byte size of codepoint
|
|
|
|
-
|
|
|
|
- if (codepoint <= 0x7f)
|
|
|
|
- {
|
|
|
|
- utf8[0] = (char)codepoint;
|
|
|
|
- size = 1;
|
|
|
|
- }
|
|
|
|
- else if (codepoint <= 0x7ff)
|
|
|
|
- {
|
|
|
|
- utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0);
|
|
|
|
- utf8[1] = (char)((codepoint & 0x3f) | 0x80);
|
|
|
|
- size = 2;
|
|
|
|
- }
|
|
|
|
- else if (codepoint <= 0xffff)
|
|
|
|
- {
|
|
|
|
- utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0);
|
|
|
|
- utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
|
|
|
|
- utf8[2] = (char)((codepoint & 0x3f) | 0x80);
|
|
|
|
- size = 3;
|
|
|
|
- }
|
|
|
|
- else if (codepoint <= 0x10ffff)
|
|
|
|
- {
|
|
|
|
- utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0);
|
|
|
|
- utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80);
|
|
|
|
- utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
|
|
|
|
- utf8[3] = (char)((codepoint & 0x3f) | 0x80);
|
|
|
|
- size = 4;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- *byteSize = size;
|
|
|
|
-
|
|
|
|
- return utf8;
|
|
|
|
|
|
+ RL_FREE(text);
|
|
}
|
|
}
|
|
|
|
|
|
// Load all codepoints from a UTF-8 text string, codepoints count returned by parameter
|
|
// Load all codepoints from a UTF-8 text string, codepoints count returned by parameter
|
|
@@ -1694,7 +1661,7 @@ int *LoadCodepoints(const char *text, int *count)
|
|
{
|
|
{
|
|
int textLength = TextLength(text);
|
|
int textLength = TextLength(text);
|
|
|
|
|
|
- int bytesProcessed = 0;
|
|
|
|
|
|
+ int codepointSize = 0;
|
|
int codepointCount = 0;
|
|
int codepointCount = 0;
|
|
|
|
|
|
// Allocate a big enough buffer to store as many codepoints as text bytes
|
|
// Allocate a big enough buffer to store as many codepoints as text bytes
|
|
@@ -1702,8 +1669,8 @@ int *LoadCodepoints(const char *text, int *count)
|
|
|
|
|
|
for (int i = 0; i < textLength; codepointCount++)
|
|
for (int i = 0; i < textLength; codepointCount++)
|
|
{
|
|
{
|
|
- codepoints[codepointCount] = GetCodepoint(text + i, &bytesProcessed);
|
|
|
|
- i += bytesProcessed;
|
|
|
|
|
|
+ codepoints[codepointCount] = GetCodepointNext(text + i, &codepointSize);
|
|
|
|
+ i += codepointSize;
|
|
}
|
|
}
|
|
|
|
|
|
// Re-allocate buffer to the actual number of codepoints loaded
|
|
// Re-allocate buffer to the actual number of codepoints loaded
|
|
@@ -1731,7 +1698,7 @@ int GetCodepointCount(const char *text)
|
|
while (*ptr != '\0')
|
|
while (*ptr != '\0')
|
|
{
|
|
{
|
|
int next = 0;
|
|
int next = 0;
|
|
- int letter = GetCodepoint(ptr, &next);
|
|
|
|
|
|
+ int letter = GetCodepointNext(ptr, &next);
|
|
|
|
|
|
if (letter == 0x3f) ptr += 1;
|
|
if (letter == 0x3f) ptr += 1;
|
|
else ptr += next;
|
|
else ptr += next;
|
|
@@ -1741,6 +1708,45 @@ int GetCodepointCount(const char *text)
|
|
|
|
|
|
return length;
|
|
return length;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+// Encode codepoint into utf8 text (char array length returned as parameter)
|
|
|
|
+// NOTE: It uses a static array to store UTF-8 bytes
|
|
|
|
+const char *CodepointToUTF8(int codepoint, int *utf8Size)
|
|
|
|
+{
|
|
|
|
+ static char utf8[6] = { 0 };
|
|
|
|
+ int size = 0; // Byte size of codepoint
|
|
|
|
+
|
|
|
|
+ if (codepoint <= 0x7f)
|
|
|
|
+ {
|
|
|
|
+ utf8[0] = (char)codepoint;
|
|
|
|
+ size = 1;
|
|
|
|
+ }
|
|
|
|
+ else if (codepoint <= 0x7ff)
|
|
|
|
+ {
|
|
|
|
+ utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0);
|
|
|
|
+ utf8[1] = (char)((codepoint & 0x3f) | 0x80);
|
|
|
|
+ size = 2;
|
|
|
|
+ }
|
|
|
|
+ else if (codepoint <= 0xffff)
|
|
|
|
+ {
|
|
|
|
+ utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0);
|
|
|
|
+ utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
|
|
|
|
+ utf8[2] = (char)((codepoint & 0x3f) | 0x80);
|
|
|
|
+ size = 3;
|
|
|
|
+ }
|
|
|
|
+ else if (codepoint <= 0x10ffff)
|
|
|
|
+ {
|
|
|
|
+ utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0);
|
|
|
|
+ utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80);
|
|
|
|
+ utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
|
|
|
|
+ utf8[3] = (char)((codepoint & 0x3f) | 0x80);
|
|
|
|
+ size = 4;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ *utf8Size = size;
|
|
|
|
+
|
|
|
|
+ return utf8;
|
|
|
|
+}
|
|
#endif // SUPPORT_TEXT_MANIPULATION
|
|
#endif // SUPPORT_TEXT_MANIPULATION
|
|
|
|
|
|
// Get next codepoint in a UTF-8 encoded text, scanning until '\0' is found
|
|
// Get next codepoint in a UTF-8 encoded text, scanning until '\0' is found
|
|
@@ -1748,7 +1754,7 @@ int GetCodepointCount(const char *text)
|
|
// Total number of bytes processed are returned as a parameter
|
|
// Total number of bytes processed are returned as a parameter
|
|
// NOTE: The standard says U+FFFD should be returned in case of errors
|
|
// NOTE: The standard says U+FFFD should be returned in case of errors
|
|
// but that character is not supported by the default font in raylib
|
|
// but that character is not supported by the default font in raylib
|
|
-int GetCodepoint(const char *text, int *bytesProcessed)
|
|
|
|
|
|
+int GetCodepoint(const char *text, int *codepointSize)
|
|
{
|
|
{
|
|
/*
|
|
/*
|
|
UTF-8 specs from https://www.ietf.org/rfc/rfc3629.txt
|
|
UTF-8 specs from https://www.ietf.org/rfc/rfc3629.txt
|
|
@@ -1763,14 +1769,14 @@ int GetCodepoint(const char *text, int *bytesProcessed)
|
|
*/
|
|
*/
|
|
// NOTE: on decode errors we return as soon as possible
|
|
// NOTE: on decode errors we return as soon as possible
|
|
|
|
|
|
- int code = 0x3f; // Codepoint (defaults to '?')
|
|
|
|
|
|
+ int codepoint = 0x3f; // Codepoint (defaults to '?')
|
|
int octet = (unsigned char)(text[0]); // The first UTF8 octet
|
|
int octet = (unsigned char)(text[0]); // The first UTF8 octet
|
|
- *bytesProcessed = 1;
|
|
|
|
|
|
+ *codepointSize = 1;
|
|
|
|
|
|
if (octet <= 0x7f)
|
|
if (octet <= 0x7f)
|
|
{
|
|
{
|
|
// Only one octet (ASCII range x00-7F)
|
|
// Only one octet (ASCII range x00-7F)
|
|
- code = text[0];
|
|
|
|
|
|
+ codepoint = text[0];
|
|
}
|
|
}
|
|
else if ((octet & 0xe0) == 0xc0)
|
|
else if ((octet & 0xe0) == 0xc0)
|
|
{
|
|
{
|
|
@@ -1779,12 +1785,12 @@ int GetCodepoint(const char *text, int *bytesProcessed)
|
|
// [0]xC2-DF [1]UTF8-tail(x80-BF)
|
|
// [0]xC2-DF [1]UTF8-tail(x80-BF)
|
|
unsigned char octet1 = text[1];
|
|
unsigned char octet1 = text[1];
|
|
|
|
|
|
- if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
|
|
|
|
+ if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *codepointSize = 2; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
if ((octet >= 0xc2) && (octet <= 0xdf))
|
|
if ((octet >= 0xc2) && (octet <= 0xdf))
|
|
{
|
|
{
|
|
- code = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
|
|
|
|
- *bytesProcessed = 2;
|
|
|
|
|
|
+ codepoint = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
|
|
|
|
+ *codepointSize = 2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if ((octet & 0xf0) == 0xe0)
|
|
else if ((octet & 0xf0) == 0xe0)
|
|
@@ -1793,11 +1799,11 @@ int GetCodepoint(const char *text, int *bytesProcessed)
|
|
unsigned char octet1 = text[1];
|
|
unsigned char octet1 = text[1];
|
|
unsigned char octet2 = '\0';
|
|
unsigned char octet2 = '\0';
|
|
|
|
|
|
- if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
|
|
|
|
+ if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *codepointSize = 2; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
octet2 = text[2];
|
|
octet2 = text[2];
|
|
|
|
|
|
- if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
|
|
|
|
|
|
+ if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *codepointSize = 3; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
// [0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF)
|
|
// [0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF)
|
|
// [0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
|
// [0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
|
@@ -1805,50 +1811,105 @@ int GetCodepoint(const char *text, int *bytesProcessed)
|
|
// [0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
|
// [0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
|
|
|
|
|
if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
|
|
if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
|
|
- ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; }
|
|
|
|
|
|
+ ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *codepointSize = 2; return codepoint; }
|
|
|
|
|
|
if ((octet >= 0xe0) && (octet <= 0xef))
|
|
if ((octet >= 0xe0) && (octet <= 0xef))
|
|
{
|
|
{
|
|
- code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
|
|
|
|
- *bytesProcessed = 3;
|
|
|
|
|
|
+ codepoint = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
|
|
|
|
+ *codepointSize = 3;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if ((octet & 0xf8) == 0xf0)
|
|
else if ((octet & 0xf8) == 0xf0)
|
|
{
|
|
{
|
|
// Four octets
|
|
// Four octets
|
|
- if (octet > 0xf4) return code;
|
|
|
|
|
|
+ if (octet > 0xf4) return codepoint;
|
|
|
|
|
|
unsigned char octet1 = text[1];
|
|
unsigned char octet1 = text[1];
|
|
unsigned char octet2 = '\0';
|
|
unsigned char octet2 = '\0';
|
|
unsigned char octet3 = '\0';
|
|
unsigned char octet3 = '\0';
|
|
|
|
|
|
- if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
|
|
|
|
+ if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *codepointSize = 2; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
octet2 = text[2];
|
|
octet2 = text[2];
|
|
|
|
|
|
- if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
|
|
|
|
|
|
+ if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *codepointSize = 3; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
octet3 = text[3];
|
|
octet3 = text[3];
|
|
|
|
|
|
- if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; } // Unexpected sequence
|
|
|
|
|
|
+ if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *codepointSize = 4; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
// [0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail
|
|
// [0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail
|
|
// [0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail
|
|
// [0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail
|
|
// [0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail
|
|
// [0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail
|
|
|
|
|
|
if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
|
|
if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
|
|
- ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
|
|
|
|
+ ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *codepointSize = 2; return codepoint; } // Unexpected sequence
|
|
|
|
|
|
if (octet >= 0xf0)
|
|
if (octet >= 0xf0)
|
|
{
|
|
{
|
|
- code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
|
|
|
|
- *bytesProcessed = 4;
|
|
|
|
|
|
+ codepoint = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
|
|
|
|
+ *codepointSize = 4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- if (code > 0x10ffff) code = 0x3f; // Codepoints after U+10ffff are invalid
|
|
|
|
|
|
+ if (codepoint > 0x10ffff) codepoint = 0x3f; // Codepoints after U+10ffff are invalid
|
|
|
|
+
|
|
|
|
+ return codepoint;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+// Get next codepoint in a byte sequence and bytes processed
|
|
|
|
+int GetCodepointNext(const char *text, int *codepointSize)
|
|
|
|
+{
|
|
|
|
+ const char *ptr = text;
|
|
|
|
+ int codepoint = 0x3f; // Codepoint (defaults to '?')
|
|
|
|
+ *codepointSize = 0;
|
|
|
|
+
|
|
|
|
+ // Get current codepoint and bytes processed
|
|
|
|
+ if (0xf0 == (0xf8 & ptr[0]))
|
|
|
|
+ {
|
|
|
|
+ // 4 byte UTF-8 codepoint
|
|
|
|
+ codepoint = ((0x07 & ptr[0]) << 18) | ((0x3f & ptr[1]) << 12) | ((0x3f & ptr[2]) << 6) | (0x3f & ptr[3]);
|
|
|
|
+ *codepointSize = 4;
|
|
|
|
+ }
|
|
|
|
+ else if (0xe0 == (0xf0 & ptr[0]))
|
|
|
|
+ {
|
|
|
|
+ // 3 byte UTF-8 codepoint */
|
|
|
|
+ codepoint = ((0x0f & ptr[0]) << 12) | ((0x3f & ptr[1]) << 6) | (0x3f & ptr[2]);
|
|
|
|
+ *codepointSize = 3;
|
|
|
|
+ }
|
|
|
|
+ else if (0xc0 == (0xe0 & ptr[0]))
|
|
|
|
+ {
|
|
|
|
+ // 2 byte UTF-8 codepoint
|
|
|
|
+ codepoint = ((0x1f & ptr[0]) << 6) | (0x3f & ptr[1]);
|
|
|
|
+ *codepointSize = 2;
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ // 1 byte UTF-8 codepoint
|
|
|
|
+ codepoint = ptr[0];
|
|
|
|
+ *codepointSize = 1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return codepoint;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+// Get previous codepoint in a byte sequence and bytes processed
|
|
|
|
+int GetCodepointPrevious(const char *text, int *codepointSize)
|
|
|
|
+{
|
|
|
|
+ const char *ptr = text;
|
|
|
|
+ int codepoint = 0x3f; // Codepoint (defaults to '?')
|
|
|
|
+ int cpSize = 0;
|
|
|
|
+ *codepointSize = 0;
|
|
|
|
+
|
|
|
|
+ // Move to previous codepoint
|
|
|
|
+ do ptr--;
|
|
|
|
+ while (((0x80 & ptr[0]) != 0) && ((0xc0 & ptr[0]) == 0x80));
|
|
|
|
+
|
|
|
|
+ codepoint = GetCodepointNext(ptr, &cpSize);
|
|
|
|
+
|
|
|
|
+ if (codepoint != 0) *codepointSize = cpSize;
|
|
|
|
|
|
- return code;
|
|
|
|
|
|
+ return codepoint;
|
|
}
|
|
}
|
|
|
|
|
|
//----------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------
|