string_unicode.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. package goja
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/dop251/goja/parser"
  6. "golang.org/x/text/cases"
  7. "golang.org/x/text/language"
  8. "io"
  9. "math"
  10. "reflect"
  11. "strings"
  12. "unicode/utf16"
  13. "unicode/utf8"
  14. )
  15. type unicodeString []uint16
  16. type unicodeRuneReader struct {
  17. s unicodeString
  18. pos int
  19. }
  20. type runeReaderReplace struct {
  21. wrapped io.RuneReader
  22. }
  23. var (
  24. InvalidRuneError = errors.New("Invalid rune")
  25. )
  26. func (rr runeReaderReplace) ReadRune() (r rune, size int, err error) {
  27. r, size, err = rr.wrapped.ReadRune()
  28. if err == InvalidRuneError {
  29. err = nil
  30. r = utf8.RuneError
  31. }
  32. return
  33. }
  34. func (rr *unicodeRuneReader) ReadRune() (r rune, size int, err error) {
  35. if rr.pos < len(rr.s) {
  36. r = rune(rr.s[rr.pos])
  37. if r != utf8.RuneError {
  38. if utf16.IsSurrogate(r) {
  39. if rr.pos+1 < len(rr.s) {
  40. r1 := utf16.DecodeRune(r, rune(rr.s[rr.pos+1]))
  41. size++
  42. rr.pos++
  43. if r1 == utf8.RuneError {
  44. err = InvalidRuneError
  45. } else {
  46. r = r1
  47. }
  48. } else {
  49. err = InvalidRuneError
  50. }
  51. }
  52. }
  53. size++
  54. rr.pos++
  55. } else {
  56. err = io.EOF
  57. }
  58. return
  59. }
  60. func (s unicodeString) reader(start int) io.RuneReader {
  61. return &unicodeRuneReader{
  62. s: s[start:],
  63. }
  64. }
  65. func (s unicodeString) ToInteger() int64 {
  66. return 0
  67. }
  68. func (s unicodeString) ToString() valueString {
  69. return s
  70. }
  71. func (s unicodeString) ToFloat() float64 {
  72. return math.NaN()
  73. }
  74. func (s unicodeString) ToBoolean() bool {
  75. return len(s) > 0
  76. }
  77. func (s unicodeString) toTrimmedUTF8() string {
  78. if len(s) == 0 {
  79. return ""
  80. }
  81. return strings.Trim(s.String(), parser.WhitespaceChars)
  82. }
  83. func (s unicodeString) ToNumber() Value {
  84. return asciiString(s.toTrimmedUTF8()).ToNumber()
  85. }
  86. func (s unicodeString) ToObject(r *Runtime) *Object {
  87. return r._newString(s)
  88. }
  89. func (s unicodeString) equals(other unicodeString) bool {
  90. if len(s) != len(other) {
  91. return false
  92. }
  93. for i, r := range s {
  94. if r != other[i] {
  95. return false
  96. }
  97. }
  98. return true
  99. }
  100. func (s unicodeString) SameAs(other Value) bool {
  101. if otherStr, ok := other.(unicodeString); ok {
  102. return s.equals(otherStr)
  103. }
  104. return false
  105. }
  106. func (s unicodeString) Equals(other Value) bool {
  107. if s.SameAs(other) {
  108. return true
  109. }
  110. if _, ok := other.assertInt(); ok {
  111. return false
  112. }
  113. if _, ok := other.assertFloat(); ok {
  114. return false
  115. }
  116. if _, ok := other.(valueBool); ok {
  117. return false
  118. }
  119. if o, ok := other.(*Object); ok {
  120. return s.Equals(o.self.toPrimitive())
  121. }
  122. return false
  123. }
  124. func (s unicodeString) StrictEquals(other Value) bool {
  125. return s.SameAs(other)
  126. }
  127. func (s unicodeString) assertInt() (int64, bool) {
  128. return 0, false
  129. }
  130. func (s unicodeString) assertFloat() (float64, bool) {
  131. return 0, false
  132. }
  133. func (s unicodeString) assertString() (valueString, bool) {
  134. return s, true
  135. }
  136. func (s unicodeString) baseObject(r *Runtime) *Object {
  137. ss := r.stringSingleton
  138. ss.value = s
  139. ss.setLength()
  140. return ss.val
  141. }
  142. func (s unicodeString) charAt(idx int64) rune {
  143. return rune(s[idx])
  144. }
  145. func (s unicodeString) length() int64 {
  146. return int64(len(s))
  147. }
  148. func (s unicodeString) concat(other valueString) valueString {
  149. switch other := other.(type) {
  150. case unicodeString:
  151. return unicodeString(append(s, other...))
  152. case asciiString:
  153. b := make([]uint16, len(s)+len(other))
  154. copy(b, s)
  155. b1 := b[len(s):]
  156. for i := 0; i < len(other); i++ {
  157. b1[i] = uint16(other[i])
  158. }
  159. return unicodeString(b)
  160. default:
  161. panic(fmt.Errorf("Unknown string type: %T", other))
  162. }
  163. }
  164. func (s unicodeString) substring(start, end int64) valueString {
  165. ss := s[start:end]
  166. for _, c := range ss {
  167. if c >= utf8.RuneSelf {
  168. return unicodeString(ss)
  169. }
  170. }
  171. as := make([]byte, end-start)
  172. for i, c := range ss {
  173. as[i] = byte(c)
  174. }
  175. return asciiString(as)
  176. }
  177. func (s unicodeString) String() string {
  178. return string(utf16.Decode(s))
  179. }
  180. func (s unicodeString) compareTo(other valueString) int {
  181. return strings.Compare(s.String(), other.String())
  182. }
  183. func (s unicodeString) index(substr valueString, start int64) int64 {
  184. var ss []uint16
  185. switch substr := substr.(type) {
  186. case unicodeString:
  187. ss = substr
  188. case asciiString:
  189. ss = make([]uint16, len(substr))
  190. for i := 0; i < len(substr); i++ {
  191. ss[i] = uint16(substr[i])
  192. }
  193. default:
  194. panic(fmt.Errorf("Unknown string type: %T", substr))
  195. }
  196. // TODO: optimise
  197. end := int64(len(s) - len(ss))
  198. for start <= end {
  199. for i := int64(0); i < int64(len(ss)); i++ {
  200. if s[start+i] != ss[i] {
  201. goto nomatch
  202. }
  203. }
  204. return start
  205. nomatch:
  206. start++
  207. }
  208. return -1
  209. }
  210. func (s unicodeString) lastIndex(substr valueString, start int64) int64 {
  211. var ss []uint16
  212. switch substr := substr.(type) {
  213. case unicodeString:
  214. ss = substr
  215. case asciiString:
  216. ss = make([]uint16, len(substr))
  217. for i := 0; i < len(substr); i++ {
  218. ss[i] = uint16(substr[i])
  219. }
  220. default:
  221. panic(fmt.Errorf("Unknown string type: %T", substr))
  222. }
  223. if maxStart := int64(len(s) - len(ss)); start > maxStart {
  224. start = maxStart
  225. }
  226. // TODO: optimise
  227. for start >= 0 {
  228. for i := int64(0); i < int64(len(ss)); i++ {
  229. if s[start+i] != ss[i] {
  230. goto nomatch
  231. }
  232. }
  233. return start
  234. nomatch:
  235. start--
  236. }
  237. return -1
  238. }
  239. func (s unicodeString) toLower() valueString {
  240. caser := cases.Lower(language.Und)
  241. r := []rune(caser.String(s.String()))
  242. // Workaround
  243. ascii := true
  244. for i := 0; i < len(r)-1; i++ {
  245. if (i == 0 || r[i-1] != 0x3b1) && r[i] == 0x345 && r[i+1] == 0x3c2 {
  246. i++
  247. r[i] = 0x3c3
  248. }
  249. if r[i] >= utf8.RuneSelf {
  250. ascii = false
  251. }
  252. }
  253. if ascii {
  254. ascii = r[len(r)-1] < utf8.RuneSelf
  255. }
  256. if ascii {
  257. return asciiString(r)
  258. }
  259. return unicodeString(utf16.Encode(r))
  260. }
  261. func (s unicodeString) toUpper() valueString {
  262. caser := cases.Upper(language.Und)
  263. return newStringValue(caser.String(s.String()))
  264. }
  265. func (s unicodeString) Export() interface{} {
  266. return s.String()
  267. }
  268. func (s unicodeString) ExportType() reflect.Type {
  269. return reflectTypeString
  270. }