textWrapping.test.ts 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. import { wrapText, parseTokens } from "../src/textWrapping";
  2. import type { FontString } from "../src/types";
  3. describe("Test wrapText", () => {
  4. // font is irrelevant as jsdom does not support FontFace API
  5. // `measureText` width is mocked to return `text.length` by `jest-canvas-mock`
  6. // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js
  7. const font = "10px Cascadia, Segoe UI Emoji" as FontString;
  8. it("should wrap the text correctly when word length is exactly equal to max width", () => {
  9. const text = "Hello Excalidraw";
  10. // Length of "Excalidraw" is 100 and exacty equal to max width
  11. const res = wrapText(text, font, 100);
  12. expect(res).toEqual(`Hello\nExcalidraw`);
  13. });
  14. it("should return the text as is if max width is invalid", () => {
  15. const text = "Hello Excalidraw";
  16. expect(wrapText(text, font, NaN)).toEqual(text);
  17. expect(wrapText(text, font, -1)).toEqual(text);
  18. expect(wrapText(text, font, Infinity)).toEqual(text);
  19. });
  20. it("should show the text correctly when max width reached", () => {
  21. const text = "Hello😀";
  22. const maxWidth = 10;
  23. const res = wrapText(text, font, maxWidth);
  24. expect(res).toBe("H\ne\nl\nl\no\n😀");
  25. });
  26. it("should not wrap number when wrapping line", () => {
  27. const text = "don't wrap this number 99,100.99";
  28. const maxWidth = 300;
  29. const res = wrapText(text, font, maxWidth);
  30. expect(res).toBe("don't wrap this number\n99,100.99");
  31. });
  32. it("should trim all trailing whitespaces", () => {
  33. const text = "Hello ";
  34. const maxWidth = 50;
  35. const res = wrapText(text, font, maxWidth);
  36. expect(res).toBe("Hello");
  37. });
  38. it("should trim all but one trailing whitespaces", () => {
  39. const text = "Hello ";
  40. const maxWidth = 60;
  41. const res = wrapText(text, font, maxWidth);
  42. expect(res).toBe("Hello ");
  43. });
  44. it("should keep preceding whitespaces and trim all trailing whitespaces", () => {
  45. const text = " Hello World";
  46. const maxWidth = 90;
  47. const res = wrapText(text, font, maxWidth);
  48. expect(res).toBe(" Hello\nWorld");
  49. });
  50. it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => {
  51. const text = " Hello World ";
  52. const maxWidth = 90;
  53. const res = wrapText(text, font, maxWidth);
  54. expect(res).toBe(" Hello\nWorld ");
  55. });
  56. it("should trim keep those whitespace that fit in the trailing line", () => {
  57. const text = "Hello Wo rl d ";
  58. const maxWidth = 100;
  59. const res = wrapText(text, font, maxWidth);
  60. expect(res).toBe("Hello Wo\nrl d ");
  61. });
  62. it("should support multiple (multi-codepoint) emojis", () => {
  63. const text = "😀🗺🔥👩🏽‍🦰👨‍👩‍👧‍👦🇨🇿";
  64. const maxWidth = 1;
  65. const res = wrapText(text, font, maxWidth);
  66. expect(res).toBe("😀\n🗺\n🔥\n👩🏽‍🦰\n👨‍👩‍👧‍👦\n🇨🇿");
  67. });
  68. it("should wrap the text correctly when text contains hyphen", () => {
  69. let text =
  70. "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects";
  71. const res = wrapText(text, font, 110);
  72. expect(res).toBe(
  73. `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`,
  74. );
  75. text = "Hello thereusing-now";
  76. expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now");
  77. });
  78. it("should support wrapping nested lists", () => {
  79. const text = `\tA) one tab\t\t- two tabs - 8 spaces`;
  80. const maxWidth = 100;
  81. const res = wrapText(text, font, maxWidth);
  82. expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`);
  83. const maxWidth2 = 50;
  84. const res2 = wrapText(text, font, maxWidth2);
  85. expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`);
  86. });
  87. describe("When text is CJK", () => {
  88. it("should break each CJK character when width is very small", () => {
  89. // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi"
  90. const text = "안녕하세요こんにちは世界コンニチハ你好";
  91. const maxWidth = 10;
  92. const res = wrapText(text, font, maxWidth);
  93. expect(res).toBe(
  94. "안\n녕\n하\n세\n요\nこ\nん\nに\nち\nは\n世\n界\nコ\nン\nニ\nチ\nハ\n你\n好",
  95. );
  96. });
  97. it("should break CJK text into longer segments when width is larger", () => {
  98. // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi"
  99. const text = "안녕하세요こんにちは世界コンニチハ你好";
  100. const maxWidth = 30;
  101. const res = wrapText(text, font, maxWidth);
  102. // measureText is mocked, so it's not precisely what would happen in prod
  103. expect(res).toBe("안녕하\n세요こ\nんにち\nは世界\nコンニ\nチハ你\n好");
  104. });
  105. it("should handle a combination of CJK, latin, emojis and whitespaces", () => {
  106. const text = `a醫 醫 bb 你好 world-i-😀🗺🔥`;
  107. const maxWidth = 150;
  108. const res = wrapText(text, font, maxWidth);
  109. expect(res).toBe(`a醫 醫 bb 你\n好 world-i-😀🗺\n🔥`);
  110. const maxWidth2 = 50;
  111. const res2 = wrapText(text, font, maxWidth2);
  112. expect(res2).toBe(`a醫 醫\nbb 你\n好\nworld\n-i-😀\n🗺🔥`);
  113. const maxWidth3 = 30;
  114. const res3 = wrapText(text, font, maxWidth3);
  115. expect(res3).toBe(`a醫\n醫\nbb\n你好\nwor\nld-\ni-\n😀\n🗺\n🔥`);
  116. });
  117. it("should break before and after a regular CJK character", () => {
  118. const text = "HelloたWorld";
  119. const maxWidth1 = 50;
  120. const res1 = wrapText(text, font, maxWidth1);
  121. expect(res1).toBe("Hello\nた\nWorld");
  122. const maxWidth2 = 60;
  123. const res2 = wrapText(text, font, maxWidth2);
  124. expect(res2).toBe("Helloた\nWorld");
  125. });
  126. it("should break before and after certain CJK symbols", () => {
  127. const text = "こんにちは〃世界";
  128. const maxWidth1 = 50;
  129. const res1 = wrapText(text, font, maxWidth1);
  130. expect(res1).toBe("こんにちは\n〃世界");
  131. const maxWidth2 = 60;
  132. const res2 = wrapText(text, font, maxWidth2);
  133. expect(res2).toBe("こんにちは〃\n世界");
  134. });
  135. it("should break after, not before for certain CJK pairs", () => {
  136. const text = "Hello た。";
  137. const maxWidth = 70;
  138. const res = wrapText(text, font, maxWidth);
  139. expect(res).toBe("Hello\nた。");
  140. });
  141. it("should break before, not after for certain CJK pairs", () => {
  142. const text = "Hello「たWorld」";
  143. const maxWidth = 60;
  144. const res = wrapText(text, font, maxWidth);
  145. expect(res).toBe("Hello\n「た\nWorld」");
  146. });
  147. it("should break after, not before for certain CJK character pairs", () => {
  148. const text = "「Helloた」World";
  149. const maxWidth = 70;
  150. const res = wrapText(text, font, maxWidth);
  151. expect(res).toBe("「Hello\nた」World");
  152. });
  153. it("should break Chinese sentences", () => {
  154. const text = `中国你好!这是一个测试。
  155. 我们来看看:人民币¥1234「很贵」
  156. (括号)、逗号,句号。空格 换行 全角符号…—`;
  157. const maxWidth1 = 80;
  158. const res1 = wrapText(text, font, maxWidth1);
  159. expect(res1).toBe(`中国你好!这是一\n个测试。
  160. 我们来看看:人民\n币¥1234「很\n贵」
  161. (括号)、逗号,\n句号。空格 换行\n全角符号…—`);
  162. const maxWidth2 = 50;
  163. const res2 = wrapText(text, font, maxWidth2);
  164. expect(res2).toBe(`中国你好!\n这是一个测\n试。
  165. 我们来看\n看:人民币\n¥1234\n「很贵」
  166. (括号)、\n逗号,句\n号。空格\n换行 全角\n符号…—`);
  167. });
  168. it("should break Japanese sentences", () => {
  169. const text = `日本こんにちは!これはテストです。
  170. 見てみましょう:円¥1234「高い」
  171. (括弧)、読点、句点。
  172. 空白 改行 全角記号…ー`;
  173. const maxWidth1 = 80;
  174. const res1 = wrapText(text, font, maxWidth1);
  175. expect(res1).toBe(`日本こんにちは!\nこれはテストで\nす。
  176. 見てみましょ\nう:円¥1234\n「高い」
  177. (括弧)、読\n点、句点。
  178. 空白 改行\n全角記号…ー`);
  179. const maxWidth2 = 50;
  180. const res2 = wrapText(text, font, maxWidth2);
  181. expect(res2).toBe(`日本こんに\nちは!これ\nはテストで\nす。
  182. 見てみ\nましょう:\n円\n¥1234\n「高い」
  183. (括\n弧)、読\n点、句点。
  184. 空白\n改行 全角\n記号…ー`);
  185. });
  186. it("should break Korean sentences", () => {
  187. const text = `한국 안녕하세요! 이것은 테스트입니다.
  188. 우리 보자: 원화₩1234「비싸다」
  189. (괄호), 쉼표, 마침표.
  190. 공백 줄바꿈 전각기호…—`;
  191. const maxWidth1 = 80;
  192. const res1 = wrapText(text, font, maxWidth1);
  193. expect(res1).toBe(`한국 안녕하세\n요! 이것은 테\n스트입니다.
  194. 우리 보자: 원\n화₩1234「비\n싸다」
  195. (괄호), 쉼\n표, 마침표.
  196. 공백 줄바꿈 전\n각기호…—`);
  197. const maxWidth2 = 60;
  198. const res2 = wrapText(text, font, maxWidth2);
  199. expect(res2).toBe(`한국 안녕하\n세요! 이것\n은 테스트입\n니다.
  200. 우리 보자:\n원화\n₩1234\n「비싸다」
  201. (괄호),\n쉼표, 마침\n표.
  202. 공백 줄바꿈\n전각기호…—`);
  203. });
  204. });
  205. describe("When text contains leading whitespaces", () => {
  206. const text = " \t Hello world";
  207. it("should preserve leading whitespaces", () => {
  208. const maxWidth = 120;
  209. const res = wrapText(text, font, maxWidth);
  210. expect(res).toBe(" \t Hello\nworld");
  211. });
  212. it("should break and collapse leading whitespaces when line breaks", () => {
  213. const maxWidth = 60;
  214. const res = wrapText(text, font, maxWidth);
  215. expect(res).toBe("\nHello\nworld");
  216. });
  217. it("should break and collapse leading whitespaces whe words break", () => {
  218. const maxWidth = 30;
  219. const res = wrapText(text, font, maxWidth);
  220. expect(res).toBe("\nHel\nlo\nwor\nld");
  221. });
  222. });
  223. describe("When text contains trailing whitespaces", () => {
  224. it("shouldn't add new lines for trailing spaces", () => {
  225. const text = "Hello whats up ";
  226. const maxWidth = 190;
  227. const res = wrapText(text, font, maxWidth);
  228. expect(res).toBe(text);
  229. });
  230. it("should ignore trailing whitespaces when line breaks", () => {
  231. const text = "Hippopotomonstrosesquippedaliophobia ??????";
  232. const maxWidth = 400;
  233. const res = wrapText(text, font, maxWidth);
  234. expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????");
  235. });
  236. it("should not ignore trailing whitespaces when word breaks", () => {
  237. const text = "Hippopotomonstrosesquippedaliophobia ??????";
  238. const maxWidth = 300;
  239. const res = wrapText(text, font, maxWidth);
  240. expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia ??????");
  241. });
  242. it("should ignore trailing whitespaces when word breaks and line breaks", () => {
  243. const text = "Hippopotomonstrosesquippedaliophobia ??????";
  244. const maxWidth = 180;
  245. const res = wrapText(text, font, maxWidth);
  246. expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????");
  247. });
  248. });
  249. describe("When text doesn't contain new lines", () => {
  250. const text = "Hello whats up";
  251. [
  252. {
  253. desc: "break all words when width of each word is less than container width",
  254. width: 70,
  255. res: `Hello\nwhats\nup`,
  256. },
  257. {
  258. desc: "break all characters when width of each character is less than container width",
  259. width: 15,
  260. res: `H\ne\nl\nl\no\nw\nh\na\nt\ns\nu\np`,
  261. },
  262. {
  263. desc: "break words as per the width",
  264. width: 130,
  265. res: `Hello whats\nup`,
  266. },
  267. {
  268. desc: "fit the container",
  269. width: 240,
  270. res: "Hello whats up",
  271. },
  272. {
  273. desc: "push the word if its equal to max width",
  274. width: 50,
  275. res: `Hello\nwhats\nup`,
  276. },
  277. ].forEach((data) => {
  278. it(`should ${data.desc}`, () => {
  279. const res = wrapText(text, font, data.width);
  280. expect(res).toEqual(data.res);
  281. });
  282. });
  283. });
  284. describe("When text contain new lines", () => {
  285. const text = `Hello\n whats up`;
  286. [
  287. {
  288. desc: "break all words when width of each word is less than container width",
  289. width: 70,
  290. res: `Hello\n whats\nup`,
  291. },
  292. {
  293. desc: "break all characters when width of each character is less than container width",
  294. width: 15,
  295. res: `H\ne\nl\nl\no\n\nw\nh\na\nt\ns\nu\np`,
  296. },
  297. {
  298. desc: "break words as per the width",
  299. width: 140,
  300. res: `Hello\n whats up`,
  301. },
  302. ].forEach((data) => {
  303. it(`should respect new lines and ${data.desc}`, () => {
  304. const res = wrapText(text, font, data.width);
  305. expect(res).toEqual(data.res);
  306. });
  307. });
  308. });
  309. describe("When text is long", () => {
  310. const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`;
  311. [
  312. {
  313. desc: "fit characters of long string as per container width",
  314. width: 160,
  315. res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`,
  316. },
  317. {
  318. desc: "fit characters of long string as per container width and break words as per the width",
  319. width: 120,
  320. res: `hellolongtex\ntthisiswhats\nupwithyouIam\ntypingggggan\ndtypinggg\nbreak it now`,
  321. },
  322. {
  323. desc: "fit the long text when container width is greater than text length and move the rest to next line",
  324. width: 590,
  325. res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`,
  326. },
  327. ].forEach((data) => {
  328. it(`should ${data.desc}`, () => {
  329. const res = wrapText(text, font, data.width);
  330. expect(res).toEqual(data.res);
  331. });
  332. });
  333. });
  334. describe("Test parseTokens", () => {
  335. it("should tokenize latin", () => {
  336. let text = "Excalidraw is a virtual collaborative whiteboard";
  337. expect(parseTokens(text)).toEqual([
  338. "Excalidraw",
  339. " ",
  340. "is",
  341. " ",
  342. "a",
  343. " ",
  344. "virtual",
  345. " ",
  346. "collaborative",
  347. " ",
  348. "whiteboard",
  349. ]);
  350. text =
  351. "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects";
  352. expect(parseTokens(text)).toEqual([
  353. "Wikipedia",
  354. " ",
  355. "is",
  356. " ",
  357. "hosted",
  358. " ",
  359. "by",
  360. " ",
  361. "Wikimedia-",
  362. " ",
  363. "Foundation,",
  364. " ",
  365. "a",
  366. " ",
  367. "non-",
  368. "profit",
  369. " ",
  370. "organization",
  371. " ",
  372. "that",
  373. " ",
  374. "also",
  375. " ",
  376. "hosts",
  377. " ",
  378. "a",
  379. " ",
  380. "range-",
  381. "of",
  382. " ",
  383. "other",
  384. " ",
  385. "projects",
  386. ]);
  387. });
  388. it("should not tokenize number", () => {
  389. const text = "99,100.99";
  390. const tokens = parseTokens(text);
  391. expect(tokens).toEqual(["99,100.99"]);
  392. });
  393. it("should tokenize joined emojis", () => {
  394. const text = `😬🌍🗺🔥☂️👩🏽‍🦰👨‍👩‍👧‍👦👩🏾‍🔬🏳️‍🌈🧔‍♀️🧑‍🤝‍🧑🙅🏽‍♂️✅0️⃣🇨🇿🦅`;
  395. const tokens = parseTokens(text);
  396. expect(tokens).toEqual([
  397. "😬",
  398. "🌍",
  399. "🗺",
  400. "🔥",
  401. "☂️",
  402. "👩🏽‍🦰",
  403. "👨‍👩‍👧‍👦",
  404. "👩🏾‍🔬",
  405. "🏳️‍🌈",
  406. "🧔‍♀️",
  407. "🧑‍🤝‍🧑",
  408. "🙅🏽‍♂️",
  409. "✅",
  410. "0️⃣",
  411. "🇨🇿",
  412. "🦅",
  413. ]);
  414. });
  415. it("should tokenize emojis mixed with mixed text", () => {
  416. const text = `😬a🌍b🗺c🔥d☂️《👩🏽‍🦰》👨‍👩‍👧‍👦德👩🏾‍🔬こ🏳️‍🌈안🧔‍♀️g🧑‍🤝‍🧑h🙅🏽‍♂️e✅f0️⃣g🇨🇿10🦅#hash`;
  417. const tokens = parseTokens(text);
  418. expect(tokens).toEqual([
  419. "😬",
  420. "a",
  421. "🌍",
  422. "b",
  423. "🗺",
  424. "c",
  425. "🔥",
  426. "d",
  427. "☂️",
  428. "《",
  429. "👩🏽‍🦰",
  430. "》",
  431. "👨‍👩‍👧‍👦",
  432. "德",
  433. "👩🏾‍🔬",
  434. "こ",
  435. "🏳️‍🌈",
  436. "안",
  437. "🧔‍♀️",
  438. "g",
  439. "🧑‍🤝‍🧑",
  440. "h",
  441. "🙅🏽‍♂️",
  442. "e",
  443. "✅",
  444. "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common)
  445. "🇨🇿",
  446. "10", // nice! do not break the number, as it's by default matched by \p{Emoji}
  447. "🦅",
  448. "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji}
  449. ]);
  450. });
  451. it("should tokenize decomposed chars into their composed variants", () => {
  452. // each input character is in a decomposed form
  453. const text = "čでäぴέ다й한";
  454. expect(text.normalize("NFC").length).toEqual(8);
  455. expect(text).toEqual(text.normalize("NFD"));
  456. const tokens = parseTokens(text);
  457. expect(tokens.length).toEqual(8);
  458. expect(tokens).toEqual(["č", "で", "ä", "ぴ", "έ", "다", "й", "한"]);
  459. });
  460. it("should tokenize artificial CJK", () => {
  461. const text = `《道德經》醫-醫こんにちは世界!안녕하세요세계;요』,다.다...원/달(((다)))[[1]]〚({((한))>)〛(「た」)た…[Hello] \t World?ニューヨーク・¥3700.55す。090-1234-5678¥1,000〜$5,000「素晴らしい!」〔重要〕#1:Taro君30%は、(たなばた)〰¥110±¥570で20℃〜9:30〜10:00【一番】`;
  462. // [
  463. // '《道', '德', '經》', '醫-',
  464. // '醫', 'こ', 'ん', 'に',
  465. // 'ち', 'は', '世', '界!',
  466. // '안', '녕', '하', '세',
  467. // '요', '세', '계;', '요』,',
  468. // '다.', '다...', '원/', '달',
  469. // '(((다)))', '[[1]]', '〚({((한))>)〛', '(「た」)',
  470. // 'た…', '[Hello]', ' ', '\t',
  471. // ' ', 'World?', 'ニ', 'ュ',
  472. // 'ー', 'ヨ', 'ー', 'ク・',
  473. // '¥3700.55', 'す。', '090-', '1234-',
  474. // '5678', '¥1,000〜', '$5,000', '「素',
  475. // '晴', 'ら', 'し', 'い!」',
  476. // '〔重', '要〕', '#', '1:',
  477. // 'Taro', '君', '30%', 'は、',
  478. // '(た', 'な', 'ば', 'た)',
  479. // '〰', '¥110±', '¥570', 'で',
  480. // '20℃〜', '9:30〜', '10:00', '【一',
  481. // '番】'
  482. // ]
  483. const tokens = parseTokens(text);
  484. // Latin
  485. expect(tokens).toContain("[[1]]");
  486. expect(tokens).toContain("[Hello]");
  487. expect(tokens).toContain("World?");
  488. expect(tokens).toContain("Taro");
  489. // Chinese
  490. expect(tokens).toContain("《道");
  491. expect(tokens).toContain("德");
  492. expect(tokens).toContain("經》");
  493. expect(tokens).toContain("醫-");
  494. expect(tokens).toContain("醫");
  495. // Japanese
  496. expect(tokens).toContain("こ");
  497. expect(tokens).toContain("ん");
  498. expect(tokens).toContain("に");
  499. expect(tokens).toContain("ち");
  500. expect(tokens).toContain("は");
  501. expect(tokens).toContain("世");
  502. expect(tokens).toContain("ク・");
  503. expect(tokens).toContain("界!");
  504. expect(tokens).toContain("た…");
  505. expect(tokens).toContain("す。");
  506. expect(tokens).toContain("ュ");
  507. expect(tokens).toContain("「素");
  508. expect(tokens).toContain("晴");
  509. expect(tokens).toContain("ら");
  510. expect(tokens).toContain("し");
  511. expect(tokens).toContain("い!」");
  512. expect(tokens).toContain("君");
  513. expect(tokens).toContain("は、");
  514. expect(tokens).toContain("(た");
  515. expect(tokens).toContain("な");
  516. expect(tokens).toContain("ば");
  517. expect(tokens).toContain("た)");
  518. expect(tokens).toContain("で");
  519. expect(tokens).toContain("【一");
  520. expect(tokens).toContain("番】");
  521. // Check for Korean
  522. expect(tokens).toContain("안");
  523. expect(tokens).toContain("녕");
  524. expect(tokens).toContain("하");
  525. expect(tokens).toContain("세");
  526. expect(tokens).toContain("요");
  527. expect(tokens).toContain("세");
  528. expect(tokens).toContain("계;");
  529. expect(tokens).toContain("요』,");
  530. expect(tokens).toContain("다.");
  531. expect(tokens).toContain("다...");
  532. expect(tokens).toContain("원/");
  533. expect(tokens).toContain("달");
  534. expect(tokens).toContain("(((다)))");
  535. expect(tokens).toContain("〚({((한))>)〛");
  536. expect(tokens).toContain("(「た」)");
  537. // Numbers and units
  538. expect(tokens).toContain("¥3700.55");
  539. expect(tokens).toContain("090-");
  540. expect(tokens).toContain("1234-");
  541. expect(tokens).toContain("5678");
  542. expect(tokens).toContain("¥1,000〜");
  543. expect(tokens).toContain("$5,000");
  544. expect(tokens).toContain("1:");
  545. expect(tokens).toContain("30%");
  546. expect(tokens).toContain("¥110±");
  547. expect(tokens).toContain("20℃〜");
  548. expect(tokens).toContain("9:30〜");
  549. expect(tokens).toContain("10:00");
  550. // Punctuation and symbols
  551. expect(tokens).toContain(" ");
  552. expect(tokens).toContain("\t");
  553. expect(tokens).toContain(" ");
  554. expect(tokens).toContain("ニ");
  555. expect(tokens).toContain("ー");
  556. expect(tokens).toContain("ヨ");
  557. expect(tokens).toContain("〰");
  558. expect(tokens).toContain("#");
  559. });
  560. });
  561. });