Utf8.hx 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. /*
  2. * Copyright (C)2005-2019 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. package neko;
  23. @:coreApi
  24. class Utf8 {
  25. var __b:Dynamic;
  26. public function new(?size:Int):Void {
  27. __b = utf8_buf_alloc(if (size == null) 1 else size);
  28. }
  29. public function addChar(c:Int):Void {
  30. utf8_buf_add(__b, c);
  31. }
  32. public function toString():String {
  33. return new String(utf8_buf_content(__b));
  34. }
  35. public static function encode(s:String):String {
  36. s = untyped s.__s;
  37. var sl = untyped __dollar__ssize(s);
  38. var buf:Dynamic = utf8_buf_alloc(sl);
  39. var i = 0;
  40. while (i < sl) {
  41. utf8_buf_add(buf, untyped __dollar__sget(s, i));
  42. i += 1;
  43. }
  44. return new String(utf8_buf_content(buf));
  45. }
  46. public static function decode(s:String):String {
  47. s = untyped s.__s;
  48. var sl = untyped __dollar__ssize(s);
  49. var ret = untyped __dollar__smake(sl);
  50. var i = 0;
  51. utf8_iter(s, function(c) {
  52. if (c == 8364) // euro symbol
  53. c = 164;
  54. else if (c == 0xFEFF) // BOM
  55. return;
  56. else if (c > 255)
  57. throw "Utf8::decode invalid character (" + c + ")";
  58. untyped __dollar__sset(ret, i, c);
  59. i += 1;
  60. });
  61. return new String(untyped __dollar__ssub(ret, 0, i));
  62. }
  63. public static function iter(s:String, chars:Int->Void):Void {
  64. utf8_iter(untyped s.__s, chars);
  65. }
  66. public static function charCodeAt(s:String, index:Int):Int {
  67. return utf8_get(untyped s.__s, index);
  68. }
  69. public static function validate(s:String):Bool {
  70. return utf8_validate(untyped s.__s);
  71. }
  72. public static function length(s:String):Int {
  73. return utf8_length(untyped s.__s);
  74. }
  75. public static function compare(a:String, b:String):Int {
  76. return utf8_compare(untyped a.__s, untyped b.__s);
  77. }
  78. public static function sub(s:String, pos:Int, len:Int):String {
  79. return new String(utf8_sub(untyped s.__s, pos, len));
  80. }
  81. static var utf8_buf_alloc = neko.Lib.load("std", "utf8_buf_alloc", 1);
  82. static var utf8_buf_add = neko.Lib.load("std", "utf8_buf_add", 2);
  83. static var utf8_buf_content = neko.Lib.load("std", "utf8_buf_content", 1);
  84. static var utf8_buf_length = neko.Lib.load("std", "utf8_buf_length", 1);
  85. static var utf8_iter = neko.Lib.load("std", "utf8_iter", 2);
  86. static var utf8_get = neko.Lib.load("std", "utf8_get", 2);
  87. static var utf8_validate = neko.Lib.load("std", "utf8_validate", 1);
  88. static var utf8_length = neko.Lib.load("std", "utf8_length", 1);
  89. static var utf8_compare = neko.Lib.load("std", "utf8_compare", 2);
  90. static var utf8_sub = neko.Lib.load("std", "utf8_sub", 3);
  91. }