utf8.h 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"); you
  4. // may not use this file except in compliance with the License. You
  5. // may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. // implied. See the License for the specific language governing
  13. // permissions and limitations under the License.
  14. #ifndef WEBGL_LOADER_UTF8_H_
  15. #define WEBGL_LOADER_UTF8_H_
  16. #include "base.h"
  17. #include "stream.h"
  18. namespace webgl_loader {
  19. const uint8 kUtf8MoreBytesPrefix = 0x80;
  20. const uint8 kUtf8TwoBytePrefix = 0xC0;
  21. const uint8 kUtf8ThreeBytePrefix = 0xE0;
  22. const uint16 kUtf8TwoByteLimit = 0x0800;
  23. const uint16 kUtf8SurrogatePairStart = 0xD800;
  24. const uint16 kUtf8SurrogatePairNum = 0x0800;
  25. const uint16 kUtf8EncodableEnd = 0x10000 - kUtf8SurrogatePairNum;
  26. const uint16 kUtf8MoreBytesMask = 0x3F;
  27. bool Uint16ToUtf8(uint16 word, ByteSinkInterface* sink) {
  28. if (word < 0x80) {
  29. sink->Put(static_cast<char>(word));
  30. } else if (word < kUtf8TwoByteLimit) {
  31. sink->Put(static_cast<char>(kUtf8TwoBytePrefix + (word >> 6)));
  32. sink->Put(static_cast<char>(kUtf8MoreBytesPrefix +
  33. (word & kUtf8MoreBytesMask)));
  34. } else if (word < kUtf8EncodableEnd) {
  35. // We can only encode 65535 - 2048 values because of illegal UTF-8
  36. // characters, such as surrogate pairs in [0xD800, 0xDFFF].
  37. if (word >= kUtf8SurrogatePairStart) {
  38. // Shift the result to avoid the surrogate pair range.
  39. word += kUtf8SurrogatePairNum;
  40. }
  41. sink->Put(static_cast<char>(kUtf8ThreeBytePrefix + (word >> 12)));
  42. sink->Put(static_cast<char>(kUtf8MoreBytesPrefix +
  43. ((word >> 6) & kUtf8MoreBytesMask)));
  44. sink->Put(static_cast<char>(kUtf8MoreBytesPrefix +
  45. (word & kUtf8MoreBytesMask)));
  46. } else {
  47. return false;
  48. }
  49. return true;
  50. }
  51. } // namespace webgl_loader
  52. #endif // WEBGL_LOADER_UTF8_H_