Utf8.hx 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. /*
  2. * Copyright (c) 2005, The haXe Project Contributors
  3. * All rights reserved.
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. *
  7. * - Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * - Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  17. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  19. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  20. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  23. * DAMAGE.
  24. */
  25. package neko;
  26. class Utf8 {
  27. var __b : Void;
  28. public function new( ?size : Int ) {
  29. __b = utf8_buf_alloc(if( size == null ) 1 else size);
  30. }
  31. public function addChar( c : Int ) {
  32. utf8_buf_add(__b,c);
  33. }
  34. public function toString() {
  35. return new String(utf8_buf_content(__b));
  36. }
  37. public static function encode( s : String ) : String {
  38. s = untyped s.__s;
  39. var sl = untyped __dollar__ssize(s);
  40. var buf = utf8_buf_alloc( sl );
  41. var i = 0;
  42. while( i < sl ) {
  43. utf8_buf_add(buf,untyped __dollar__sget(s,i));
  44. i += 1;
  45. }
  46. return new String( utf8_buf_content(buf) );
  47. }
  48. public static function decode( s : String ) : String {
  49. s = untyped s.__s;
  50. var sl = untyped __dollar__ssize(s);
  51. var ret = untyped __dollar__smake(sl);
  52. var i = 0;
  53. utf8_iter(s,function(c) {
  54. // euro symbol
  55. if( c == 8364 )
  56. c = 164;
  57. else if( c > 255 )
  58. throw "Utf8::decode invalid character ("+c+")";
  59. untyped __dollar__sset(ret,i,c);
  60. i += 1;
  61. });
  62. return new String( untyped __dollar__ssub(ret,0,i) );
  63. }
  64. public static function iter( s : String, chars : Int -> Void ) {
  65. utf8_iter(untyped s.__s,chars);
  66. }
  67. public static function charCodeAt( s : String, index : Int ) : Int {
  68. return utf8_get(untyped s.__s,index);
  69. }
  70. public static function validate( s : String ) : Bool {
  71. return utf8_validate(untyped s.__s);
  72. }
  73. public static function length( s : String ) : Int {
  74. return utf8_length(untyped s.__s);
  75. }
  76. public static function compare( a : String, b : String ) : Int {
  77. return utf8_compare(untyped a.__s,untyped b.__s);
  78. }
  79. public static function sub( s : String, pos : Int, len : Int ) : String {
  80. return new String(utf8_sub(untyped s.__s,pos,len));
  81. }
  82. static var utf8_buf_alloc = Lib.load("std","utf8_buf_alloc",1);
  83. static var utf8_buf_add = Lib.load("std","utf8_buf_add",2);
  84. static var utf8_buf_content = Lib.load("std","utf8_buf_content",1);
  85. static var utf8_buf_length = Lib.load("std","utf8_buf_length",1);
  86. static var utf8_iter = Lib.load("std","utf8_iter",2);
  87. static var utf8_get = Lib.load("std","utf8_get",2);
  88. static var utf8_validate = Lib.load("std","utf8_validate",1);
  89. static var utf8_length = Lib.load("std","utf8_length",1);
  90. static var utf8_compare = Lib.load("std","utf8_compare",2);
  91. static var utf8_sub = Lib.load("std","utf8_sub",3);
  92. }