|
@@ -35,43 +35,393 @@
|
|
|
#define UTF8_REJECT 12
|
|
|
|
|
|
static const uint8_t utf8d[] = {
|
|
|
- // The first part of the table maps bytes to character classes that
|
|
|
- // to reduce the size of the transition table and create bitmasks.
|
|
|
- 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
- 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
- 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
- 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
- 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
|
|
- 7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
|
- 8, 8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
|
- 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
|
|
+ // The first part of the table maps bytes to character classes that
|
|
|
+ // to reduce the size of the transition table and create bitmasks.
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 9,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 7,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 2,
|
|
|
+ 10,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 4,
|
|
|
+ 3,
|
|
|
+ 3,
|
|
|
+ 11,
|
|
|
+ 6,
|
|
|
+ 6,
|
|
|
+ 6,
|
|
|
+ 5,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
+ 8,
|
|
|
|
|
|
- // The second part is a transition table that maps a combination
|
|
|
- // of a state of the automation and a character class to a state.
|
|
|
- 0, 12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
|
|
- 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
|
|
- 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
|
|
- 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
|
|
- 12,36,12,12,12,12,12,12,12,12,12,12,
|
|
|
+ // The second part is a transition table that maps a combination
|
|
|
+ // of a state of the automation and a character class to a state.
|
|
|
+ 0,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 36,
|
|
|
+ 60,
|
|
|
+ 96,
|
|
|
+ 84,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 48,
|
|
|
+ 72,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 0,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 0,
|
|
|
+ 12,
|
|
|
+ 0,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 24,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 36,
|
|
|
+ 12,
|
|
|
+ 36,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 36,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 36,
|
|
|
+ 12,
|
|
|
+ 36,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 36,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
+ 12,
|
|
|
};
|
|
|
|
|
|
-static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte)
|
|
|
+static inline uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte)
|
|
|
{
|
|
|
uint32_t type = utf8d[byte];
|
|
|
|
|
|
- *codep = (*state != UTF8_ACCEPT) ?
|
|
|
- (byte & 0x3fu) | (*codep << 6) :
|
|
|
- (0xff >> type) & (byte);
|
|
|
+ *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
|
|
|
+ : (0xff >> type) & (byte);
|
|
|
|
|
|
*state = utf8d[256 + *state + type];
|
|
|
return *state;
|
|
|
}
|
|
|
|
|
|
-static inline int IsUTF8(uint8_t* s, size_t len)
|
|
|
+static inline int IsUTF8(uint8_t *s, size_t len)
|
|
|
{
|
|
|
uint32_t codepoint, state = 0;
|
|
|
|
|
|
- while (len--)
|
|
|
+ while(len--)
|
|
|
decode(&state, &codepoint, *s++);
|
|
|
|
|
|
return state == UTF8_ACCEPT;
|