string_unicode.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. package goja
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/dop251/goja/parser"
  6. "golang.org/x/text/cases"
  7. "golang.org/x/text/language"
  8. "hash"
  9. "io"
  10. "math"
  11. "reflect"
  12. "regexp"
  13. "strings"
  14. "unicode/utf16"
  15. "unicode/utf8"
  16. "unsafe"
  17. )
  18. type unicodeString []uint16
  19. type unicodeRuneReader struct {
  20. s unicodeString
  21. pos int
  22. }
  23. type runeReaderReplace struct {
  24. wrapped io.RuneReader
  25. }
  26. var (
  27. InvalidRuneError = errors.New("Invalid rune")
  28. )
  29. var (
  30. unicodeTrimRegexp = regexp.MustCompile("^[" + parser.WhitespaceChars + "]*(.*?)[" + parser.WhitespaceChars + "]*$")
  31. )
  32. func (rr runeReaderReplace) ReadRune() (r rune, size int, err error) {
  33. r, size, err = rr.wrapped.ReadRune()
  34. if err == InvalidRuneError {
  35. err = nil
  36. r = utf8.RuneError
  37. }
  38. return
  39. }
  40. func (rr *unicodeRuneReader) ReadRune() (r rune, size int, err error) {
  41. if rr.pos < len(rr.s) {
  42. r = rune(rr.s[rr.pos])
  43. if r != utf8.RuneError {
  44. if utf16.IsSurrogate(r) {
  45. if rr.pos+1 < len(rr.s) {
  46. r1 := utf16.DecodeRune(r, rune(rr.s[rr.pos+1]))
  47. size++
  48. rr.pos++
  49. if r1 == utf8.RuneError {
  50. err = InvalidRuneError
  51. } else {
  52. r = r1
  53. }
  54. } else {
  55. err = InvalidRuneError
  56. }
  57. }
  58. }
  59. size++
  60. rr.pos++
  61. } else {
  62. err = io.EOF
  63. }
  64. return
  65. }
  66. func (s unicodeString) reader(start int) io.RuneReader {
  67. return &unicodeRuneReader{
  68. s: s[start:],
  69. }
  70. }
  71. func (s unicodeString) ToInteger() int64 {
  72. return 0
  73. }
  74. func (s unicodeString) toString() valueString {
  75. return s
  76. }
  77. func (s unicodeString) ToPrimitiveString() Value {
  78. return s
  79. }
  80. func (s unicodeString) ToFloat() float64 {
  81. return math.NaN()
  82. }
  83. func (s unicodeString) ToBoolean() bool {
  84. return len(s) > 0
  85. }
  86. func (s unicodeString) toTrimmedUTF8() string {
  87. if len(s) == 0 {
  88. return ""
  89. }
  90. return unicodeTrimRegexp.FindStringSubmatch(s.String())[1]
  91. }
  92. func (s unicodeString) ToNumber() Value {
  93. return asciiString(s.toTrimmedUTF8()).ToNumber()
  94. }
  95. func (s unicodeString) ToObject(r *Runtime) *Object {
  96. return r._newString(s, r.global.StringPrototype)
  97. }
  98. func (s unicodeString) equals(other unicodeString) bool {
  99. if len(s) != len(other) {
  100. return false
  101. }
  102. for i, r := range s {
  103. if r != other[i] {
  104. return false
  105. }
  106. }
  107. return true
  108. }
  109. func (s unicodeString) SameAs(other Value) bool {
  110. if otherStr, ok := other.(unicodeString); ok {
  111. return s.equals(otherStr)
  112. }
  113. return false
  114. }
  115. func (s unicodeString) Equals(other Value) bool {
  116. if s.SameAs(other) {
  117. return true
  118. }
  119. if o, ok := other.(*Object); ok {
  120. return s.Equals(o.self.toPrimitive())
  121. }
  122. return false
  123. }
  124. func (s unicodeString) StrictEquals(other Value) bool {
  125. return s.SameAs(other)
  126. }
  127. func (s unicodeString) baseObject(r *Runtime) *Object {
  128. ss := r.stringSingleton
  129. ss.value = s
  130. ss.setLength()
  131. return ss.val
  132. }
  133. func (s unicodeString) charAt(idx int64) rune {
  134. return rune(s[idx])
  135. }
  136. func (s unicodeString) length() int64 {
  137. return int64(len(s))
  138. }
  139. func (s unicodeString) concat(other valueString) valueString {
  140. switch other := other.(type) {
  141. case unicodeString:
  142. return unicodeString(append(s, other...))
  143. case asciiString:
  144. b := make([]uint16, len(s)+len(other))
  145. copy(b, s)
  146. b1 := b[len(s):]
  147. for i := 0; i < len(other); i++ {
  148. b1[i] = uint16(other[i])
  149. }
  150. return unicodeString(b)
  151. default:
  152. panic(fmt.Errorf("Unknown string type: %T", other))
  153. }
  154. }
  155. func (s unicodeString) substring(start, end int64) valueString {
  156. ss := s[start:end]
  157. for _, c := range ss {
  158. if c >= utf8.RuneSelf {
  159. return unicodeString(ss)
  160. }
  161. }
  162. as := make([]byte, end-start)
  163. for i, c := range ss {
  164. as[i] = byte(c)
  165. }
  166. return asciiString(as)
  167. }
  168. func (s unicodeString) String() string {
  169. return string(utf16.Decode(s))
  170. }
  171. func (s unicodeString) compareTo(other valueString) int {
  172. return strings.Compare(s.String(), other.String())
  173. }
  174. func (s unicodeString) index(substr valueString, start int64) int64 {
  175. var ss []uint16
  176. switch substr := substr.(type) {
  177. case unicodeString:
  178. ss = substr
  179. case asciiString:
  180. ss = make([]uint16, len(substr))
  181. for i := 0; i < len(substr); i++ {
  182. ss[i] = uint16(substr[i])
  183. }
  184. default:
  185. panic(fmt.Errorf("Unknown string type: %T", substr))
  186. }
  187. // TODO: optimise
  188. end := int64(len(s) - len(ss))
  189. for start <= end {
  190. for i := int64(0); i < int64(len(ss)); i++ {
  191. if s[start+i] != ss[i] {
  192. goto nomatch
  193. }
  194. }
  195. return start
  196. nomatch:
  197. start++
  198. }
  199. return -1
  200. }
  201. func (s unicodeString) lastIndex(substr valueString, start int64) int64 {
  202. var ss []uint16
  203. switch substr := substr.(type) {
  204. case unicodeString:
  205. ss = substr
  206. case asciiString:
  207. ss = make([]uint16, len(substr))
  208. for i := 0; i < len(substr); i++ {
  209. ss[i] = uint16(substr[i])
  210. }
  211. default:
  212. panic(fmt.Errorf("Unknown string type: %T", substr))
  213. }
  214. if maxStart := int64(len(s) - len(ss)); start > maxStart {
  215. start = maxStart
  216. }
  217. // TODO: optimise
  218. for start >= 0 {
  219. for i := int64(0); i < int64(len(ss)); i++ {
  220. if s[start+i] != ss[i] {
  221. goto nomatch
  222. }
  223. }
  224. return start
  225. nomatch:
  226. start--
  227. }
  228. return -1
  229. }
  230. func (s unicodeString) toLower() valueString {
  231. caser := cases.Lower(language.Und)
  232. r := []rune(caser.String(s.String()))
  233. // Workaround
  234. ascii := true
  235. for i := 0; i < len(r)-1; i++ {
  236. if (i == 0 || r[i-1] != 0x3b1) && r[i] == 0x345 && r[i+1] == 0x3c2 {
  237. i++
  238. r[i] = 0x3c3
  239. }
  240. if r[i] >= utf8.RuneSelf {
  241. ascii = false
  242. }
  243. }
  244. if ascii {
  245. ascii = r[len(r)-1] < utf8.RuneSelf
  246. }
  247. if ascii {
  248. return asciiString(r)
  249. }
  250. return unicodeString(utf16.Encode(r))
  251. }
  252. func (s unicodeString) toUpper() valueString {
  253. caser := cases.Upper(language.Und)
  254. return newStringValue(caser.String(s.String()))
  255. }
  256. func (s unicodeString) Export() interface{} {
  257. return s.String()
  258. }
  259. func (s unicodeString) ExportType() reflect.Type {
  260. return reflectTypeString
  261. }
  262. func (s unicodeString) hash(hash hash.Hash64) uint64 {
  263. _, _ = hash.Write(*(*[]byte)(unsafe.Pointer(&s)))
  264. h := hash.Sum64()
  265. hash.Reset()
  266. return h
  267. }