Daniele Bartolini 10 лет назад
Родитель
Сommit
723422f7df
2 измененных файлов с 66 добавлено и 0 удалено
  1. 45 0
      src/core/utf8.cpp
  2. 21 0
      src/core/utf8.h

+ 45 - 0
src/core/utf8.cpp

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012-2016 Daniele Bartolini and individual contributors.
+ * License: https://github.com/taylor001/crown/blob/master/LICENSE
+ */
+
+#include "utf8.h"
+
+namespace crown
+{
+namespace utf8
+{
+	// Copyright (c) 2008-2009 Bjoern Hoehrmann <[email protected]>
+	// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+	static const uint8_t utf8d[] =
+	{
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+		8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+		0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+		0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+		0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+		1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+		1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+		1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+	};
+
+	u32 decode(u32* state, u32* codep, u32 byte)
+	{
+		u32 type = utf8d[byte];
+
+		*codep = (*state != UTF8_ACCEPT) ?
+			(byte & 0x3fu) | (*codep << 6) :
+			(0xff >> type) & (byte);
+
+		*state = utf8d[256 + *state*16 + type];
+		return *state;
+	}
+} // namespace utf8
+
+} // namespace crown

+ 21 - 0
src/core/utf8.h

@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2012-2016 Daniele Bartolini and individual contributors.
+ * License: https://github.com/taylor001/crown/blob/master/LICENSE
+ */
+
+#pragma once
+
+#include "types.h"
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+namespace crown
+{
+namespace utf8
+{
+	/// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+	u32 decode(u32* state, u32* codep, u32 byte);
+
+} // namespace utf8
+} // namespace crown