misctest.cpp 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. #include "perftest.h"
  2. #if TEST_MISC
  3. class Misc : public PerfTest {
  4. };
  5. // Copyright (c) 2008-2010 Bjoern Hoehrmann <[email protected]>
  6. // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
  7. #define UTF8_ACCEPT 0
  8. #define UTF8_REJECT 12
  9. static const unsigned char utf8d[] = {
  10. // The first part of the table maps bytes to character classes that
  11. // to reduce the size of the transition table and create bitmasks.
  12. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  13. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  14. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  15. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  16. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
  17. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  18. 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  19. 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
  20. // The second part is a transition table that maps a combination
  21. // of a state of the automaton and a character class to a state.
  22. 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  23. 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  24. 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  25. 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  26. 12,36,12,12,12,12,12,12,12,12,12,12,
  27. };
  28. static unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) {
  29. unsigned type = utf8d[byte];
  30. *codep = (*state != UTF8_ACCEPT) ?
  31. (byte & 0x3fu) | (*codep << 6) :
  32. (0xff >> type) & (byte);
  33. *state = utf8d[256 + *state + type];
  34. return *state;
  35. }
  36. static bool IsUTF8(unsigned char* s) {
  37. unsigned codepoint, state = 0;
  38. while (*s)
  39. decode(&state, &codepoint, *s++);
  40. return state == UTF8_ACCEPT;
  41. }
  42. TEST_F(Misc, Hoehrmann_IsUTF8) {
  43. for (int i = 0; i < kTrialCount; i++) {
  44. EXPECT_TRUE(IsUTF8((unsigned char*)json_));
  45. }
  46. }
  47. #endif // TEST_ULTRAJSON