|
|
@@ -0,0 +1,45 @@
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012-2016 Daniele Bartolini and individual contributors.
|
|
|
+ * License: https://github.com/taylor001/crown/blob/master/LICENSE
|
|
|
+ */
|
|
|
+
|
|
|
+#include "utf8.h"
|
|
|
+
|
|
|
+namespace crown
|
|
|
+{
|
|
|
+namespace utf8
|
|
|
+{
|
|
|
+ // Copyright (c) 2008-2009 Bjoern Hoehrmann <[email protected]>
|
|
|
+ // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
|
|
+ static const uint8_t utf8d[] =
|
|
|
+ {
|
|
|
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
|
|
|
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
|
|
|
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
|
|
|
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
|
|
|
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
|
|
|
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
|
|
|
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
|
|
|
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
|
|
|
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
|
|
|
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
|
|
|
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
|
|
|
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
|
|
|
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
|
|
|
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
|
|
|
+ };
|
|
|
+
|
|
|
+ u32 decode(u32* state, u32* codep, u32 byte)
|
|
|
+ {
|
|
|
+ u32 type = utf8d[byte];
|
|
|
+
|
|
|
+ *codep = (*state != UTF8_ACCEPT) ?
|
|
|
+ (byte & 0x3fu) | (*codep << 6) :
|
|
|
+ (0xff >> type) & (byte);
|
|
|
+
|
|
|
+ *state = utf8d[256 + *state*16 + type];
|
|
|
+ return *state;
|
|
|
+ }
|
|
|
+} // namespace utf8
|
|
|
+
|
|
|
+} // namespace crown
|