Browse Source

chore: simplify line-break regexes, separate text wrapping (#8715)

Marcel Mraz 10 months ago
parent
commit
2734e646ca

+ 1 - 1
packages/excalidraw/components/App.tsx

@@ -340,7 +340,6 @@ import {
   isValidTextContainer,
   isValidTextContainer,
   measureText,
   measureText,
   normalizeText,
   normalizeText,
-  wrapText,
 } from "../element/textElement";
 } from "../element/textElement";
 import {
 import {
   showHyperlinkTooltip,
   showHyperlinkTooltip,
@@ -461,6 +460,7 @@ import {
   vectorNormalize,
   vectorNormalize,
 } from "../../math";
 } from "../../math";
 import { cropElement } from "../element/cropElement";
 import { cropElement } from "../element/cropElement";
+import { wrapText } from "../element/textWrapping";
 
 
 const AppContext = React.createContext<AppClassProperties>(null!);
 const AppContext = React.createContext<AppClassProperties>(null!);
 const AppPropsContext = React.createContext<AppProps>(null!);
 const AppPropsContext = React.createContext<AppProps>(null!);

+ 1 - 1
packages/excalidraw/element/embeddable.ts

@@ -4,7 +4,7 @@ import type { ExcalidrawProps } from "../types";
 import { getFontString, updateActiveTool } from "../utils";
 import { getFontString, updateActiveTool } from "../utils";
 import { setCursorForShape } from "../cursor";
 import { setCursorForShape } from "../cursor";
 import { newTextElement } from "./newElement";
 import { newTextElement } from "./newElement";
-import { wrapText } from "./textElement";
+import { wrapText } from "./textWrapping";
 import { isIframeElement } from "./typeChecks";
 import { isIframeElement } from "./typeChecks";
 import type {
 import type {
   ExcalidrawElement,
   ExcalidrawElement,

+ 1 - 1
packages/excalidraw/element/newElement.ts

@@ -34,9 +34,9 @@ import { getResizedElementAbsoluteCoords } from "./bounds";
 import {
 import {
   measureText,
   measureText,
   normalizeText,
   normalizeText,
-  wrapText,
   getBoundTextMaxWidth,
   getBoundTextMaxWidth,
 } from "./textElement";
 } from "./textElement";
+import { wrapText } from "./textWrapping";
 import {
 import {
   DEFAULT_ELEMENT_PROPS,
   DEFAULT_ELEMENT_PROPS,
   DEFAULT_FONT_FAMILY,
   DEFAULT_FONT_FAMILY,

+ 1 - 1
packages/excalidraw/element/resizeElements.ts

@@ -47,10 +47,10 @@ import {
   handleBindTextResize,
   handleBindTextResize,
   getBoundTextMaxWidth,
   getBoundTextMaxWidth,
   getApproxMinLineHeight,
   getApproxMinLineHeight,
-  wrapText,
   measureText,
   measureText,
   getMinTextElementWidth,
   getMinTextElementWidth,
 } from "./textElement";
 } from "./textElement";
+import { wrapText } from "./textWrapping";
 import { LinearElementEditor } from "./linearElementEditor";
 import { LinearElementEditor } from "./linearElementEditor";
 import { isInGroup } from "../groups";
 import { isInGroup } from "../groups";
 import { mutateElbowArrow } from "./routing";
 import { mutateElbowArrow } from "./routing";

+ 2 - 669
packages/excalidraw/element/textElement.test.ts

@@ -1,4 +1,4 @@
-import { BOUND_TEXT_PADDING, FONT_FAMILY } from "../constants";
+import { FONT_FAMILY } from "../constants";
 import { getLineHeight } from "../fonts";
 import { getLineHeight } from "../fonts";
 import { API } from "../tests/helpers/api";
 import { API } from "../tests/helpers/api";
 import {
 import {
@@ -6,677 +6,10 @@ import {
   getContainerCoords,
   getContainerCoords,
   getBoundTextMaxWidth,
   getBoundTextMaxWidth,
   getBoundTextMaxHeight,
   getBoundTextMaxHeight,
-  wrapText,
   detectLineHeight,
   detectLineHeight,
   getLineHeightInPx,
   getLineHeightInPx,
-  parseTokens,
 } from "./textElement";
 } from "./textElement";
-import type { ExcalidrawTextElementWithContainer, FontString } from "./types";
-
-describe("Test wrapText", () => {
-  // font is irrelevant as jsdom does not support FontFace API
-  // `measureText` width is mocked to return `text.length` by `jest-canvas-mock`
-  // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js
-  const font = "10px Cascadia, Segoe UI Emoji" as FontString;
-
-  it("should wrap the text correctly when word length is exactly equal to max width", () => {
-    const text = "Hello Excalidraw";
-    // Length of "Excalidraw" is 100 and exacty equal to max width
-    const res = wrapText(text, font, 100);
-    expect(res).toEqual(`Hello\nExcalidraw`);
-  });
-
-  it("should return the text as is if max width is invalid", () => {
-    const text = "Hello Excalidraw";
-    expect(wrapText(text, font, NaN)).toEqual(text);
-    expect(wrapText(text, font, -1)).toEqual(text);
-    expect(wrapText(text, font, Infinity)).toEqual(text);
-  });
-
-  it("should show the text correctly when max width reached", () => {
-    const text = "Hello😀";
-    const maxWidth = 10;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("H\ne\nl\nl\no\n😀");
-  });
-
-  it("should not wrap number when wrapping line", () => {
-    const text = "don't wrap this number 99,100.99";
-    const maxWidth = 300;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("don't wrap this number\n99,100.99");
-  });
-
-  it("should trim all trailing whitespaces", () => {
-    const text = "Hello     ";
-    const maxWidth = 50;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("Hello");
-  });
-
-  it("should trim all but one trailing whitespaces", () => {
-    const text = "Hello     ";
-    const maxWidth = 60;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("Hello ");
-  });
-
-  it("should keep preceding whitespaces and trim all trailing whitespaces", () => {
-    const text = "  Hello  World";
-    const maxWidth = 90;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("  Hello\nWorld");
-  });
-
-  it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => {
-    const text = "   Hello  World            ";
-    const maxWidth = 90;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("   Hello\nWorld    ");
-  });
-
-  it("should trim keep those whitespace that fit in the trailing line", () => {
-    const text = "Hello   Wo rl  d                     ";
-    const maxWidth = 100;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("Hello   Wo\nrl  d     ");
-  });
-
-  it("should support multiple (multi-codepoint) emojis", () => {
-    const text = "😀🗺🔥👩🏽‍🦰👨‍👩‍👧‍👦🇨🇿";
-    const maxWidth = 1;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe("😀\n🗺\n🔥\n👩🏽‍🦰\n👨‍👩‍👧‍👦\n🇨🇿");
-  });
-
-  it("should wrap the text correctly when text contains hyphen", () => {
-    let text =
-      "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects";
-    const res = wrapText(text, font, 110);
-    expect(res).toBe(
-      `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`,
-    );
-
-    text = "Hello thereusing-now";
-    expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now");
-  });
-
-  it("should support wrapping nested lists", () => {
-    const text = `\tA) one tab\t\t- two tabs        - 8 spaces`;
-
-    const maxWidth = 100;
-    const res = wrapText(text, font, maxWidth);
-    expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`);
-
-    const maxWidth2 = 50;
-    const res2 = wrapText(text, font, maxWidth2);
-    expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`);
-  });
-
-  describe("When text is CJK", () => {
-    it("should break each CJK character when width is very small", () => {
-      // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi"
-      const text = "안녕하세요こんにちは世界コンニチハ你好";
-      const maxWidth = 10;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe(
-        "안\n녕\n하\n세\n요\nこ\nん\nに\nち\nは\n世\n界\nコ\nン\nニ\nチ\nハ\n你\n好",
-      );
-    });
-
-    it("should break CJK text into longer segments when width is larger", () => {
-      // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi"
-      const text = "안녕하세요こんにちは世界コンニチハ你好";
-      const maxWidth = 30;
-      const res = wrapText(text, font, maxWidth);
-
-      // measureText is mocked, so it's not precisely what would happen in prod
-      expect(res).toBe("안녕하\n세요こ\nんにち\nは世界\nコンニ\nチハ你\n好");
-    });
-
-    it("should handle a combination of CJK, latin, emojis and whitespaces", () => {
-      const text = `a醫 醫      bb  你好  world-i-😀🗺🔥`;
-
-      const maxWidth = 150;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe(`a醫 醫      bb  你\n好  world-i-😀🗺\n🔥`);
-
-      const maxWidth2 = 50;
-      const res2 = wrapText(text, font, maxWidth2);
-      expect(res2).toBe(`a醫 醫\nbb  你\n好\nworld\n-i-😀\n🗺🔥`);
-
-      const maxWidth3 = 30;
-      const res3 = wrapText(text, font, maxWidth3);
-      expect(res3).toBe(`a醫\n醫\nbb\n你好\nwor\nld-\ni-\n😀\n🗺\n🔥`);
-    });
-
-    it("should break before and after a regular CJK character", () => {
-      const text = "HelloたWorld";
-      const maxWidth1 = 50;
-      const res1 = wrapText(text, font, maxWidth1);
-      expect(res1).toBe("Hello\nた\nWorld");
-
-      const maxWidth2 = 60;
-      const res2 = wrapText(text, font, maxWidth2);
-      expect(res2).toBe("Helloた\nWorld");
-    });
-
-    it("should break before and after certain CJK symbols", () => {
-      const text = "こんにちは〃世界";
-      const maxWidth1 = 50;
-      const res1 = wrapText(text, font, maxWidth1);
-      expect(res1).toBe("こんにちは\n〃世界");
-
-      const maxWidth2 = 60;
-      const res2 = wrapText(text, font, maxWidth2);
-      expect(res2).toBe("こんにちは〃\n世界");
-    });
-
-    it("should break after, not before for certain CJK pairs", () => {
-      const text = "Hello た。";
-      const maxWidth = 70;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("Hello\nた。");
-    });
-
-    it("should break before, not after for certain CJK pairs", () => {
-      const text = "Hello「たWorld」";
-      const maxWidth = 60;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("Hello\n「た\nWorld」");
-    });
-
-    it("should break after, not before for certain CJK character pairs", () => {
-      const text = "「Helloた」World";
-      const maxWidth = 70;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("「Hello\nた」World");
-    });
-
-    it("should break Chinese sentences", () => {
-      const text = `中国你好!这是一个测试。
-我们来看看:人民币¥1234「很贵」
-(括号)、逗号,句号。空格 换行 全角符号…—`;
-
-      const maxWidth1 = 80;
-      const res1 = wrapText(text, font, maxWidth1);
-      expect(res1).toBe(`中国你好!这是一\n个测试。
-我们来看看:人民\n币¥1234「很\n贵」
-(括号)、逗号,\n句号。空格 换行\n全角符号…—`);
-
-      const maxWidth2 = 50;
-      const res2 = wrapText(text, font, maxWidth2);
-      expect(res2).toBe(`中国你好!\n这是一个测\n试。
-我们来看\n看:人民币\n¥1234\n「很贵」
-(括号)、\n逗号,句\n号。空格\n换行 全角\n符号…—`);
-    });
-  });
-
-  it("should break Japanese sentences", () => {
-    const text = `日本こんにちは!これはテストです。
-  見てみましょう:円¥1234「高い」
-  (括弧)、読点、句点。
-  空白 改行 全角記号…ー`;
-
-    const maxWidth1 = 80;
-    const res1 = wrapText(text, font, maxWidth1);
-    expect(res1).toBe(`日本こんにちは!\nこれはテストで\nす。
-  見てみましょ\nう:円¥1234\n「高い」
-  (括弧)、読\n点、句点。
-  空白 改行\n全角記号…ー`);
-
-    const maxWidth2 = 50;
-    const res2 = wrapText(text, font, maxWidth2);
-    expect(res2).toBe(`日本こんに\nちは!これ\nはテストで\nす。
-  見てみ\nましょう:\n円\n¥1234\n「高い」
-  (括\n弧)、読\n点、句点。
-  空白\n改行 全角\n記号…ー`);
-  });
-
-  it("should break Korean sentences", () => {
-    const text = `한국 안녕하세요! 이것은 테스트입니다.
-우리 보자: 원화₩1234「비싸다」
-(괄호), 쉼표, 마침표.
-공백 줄바꿈 전각기호…—`;
-
-    const maxWidth1 = 80;
-    const res1 = wrapText(text, font, maxWidth1);
-    expect(res1).toBe(`한국 안녕하세\n요! 이것은 테\n스트입니다.
-우리 보자: 원\n화₩1234「비\n싸다」
-(괄호), 쉼\n표, 마침표.
-공백 줄바꿈 전\n각기호…—`);
-
-    const maxWidth2 = 60;
-    const res2 = wrapText(text, font, maxWidth2);
-    expect(res2).toBe(`한국 안녕하\n세요! 이것\n은 테스트입\n니다.
-우리 보자:\n원화\n₩1234\n「비싸다」
-(괄호),\n쉼표, 마침\n표.
-공백 줄바꿈\n전각기호…—`);
-  });
-
-  describe("When text contains leading whitespaces", () => {
-    const text = "  \t   Hello world";
-
-    it("should preserve leading whitespaces", () => {
-      const maxWidth = 120;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("  \t   Hello\nworld");
-    });
-
-    it("should break and collapse leading whitespaces when line breaks", () => {
-      const maxWidth = 60;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("\nHello\nworld");
-    });
-
-    it("should break and collapse leading whitespaces whe words break", () => {
-      const maxWidth = 30;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("\nHel\nlo\nwor\nld");
-    });
-  });
-
-  describe("When text contains trailing whitespaces", () => {
-    it("shouldn't add new lines for trailing spaces", () => {
-      const text = "Hello whats up     ";
-      const maxWidth = 200 - BOUND_TEXT_PADDING * 2;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe(text);
-    });
-
-    it("should ignore trailing whitespaces when line breaks", () => {
-      const text = "Hippopotomonstrosesquippedaliophobia        ??????";
-      const maxWidth = 400;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????");
-    });
-
-    it("should not ignore trailing whitespaces when word breaks", () => {
-      const text = "Hippopotomonstrosesquippedaliophobia        ??????";
-      const maxWidth = 300;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia        ??????");
-    });
-
-    it("should ignore trailing whitespaces when word breaks and line breaks", () => {
-      const text = "Hippopotomonstrosesquippedaliophobia        ??????";
-      const maxWidth = 180;
-      const res = wrapText(text, font, maxWidth);
-      expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????");
-    });
-  });
-
-  describe("When text doesn't contain new lines", () => {
-    const text = "Hello whats up";
-
-    [
-      {
-        desc: "break all words when width of each word is less than container width",
-        width: 80,
-        res: `Hello\nwhats\nup`,
-      },
-      {
-        desc: "break all characters when width of each character is less than container width",
-        width: 25,
-        res: `H
-e
-l
-l
-o
-w
-h
-a
-t
-s
-u
-p`,
-      },
-      {
-        desc: "break words as per the width",
-
-        width: 140,
-        res: `Hello whats\nup`,
-      },
-      {
-        desc: "fit the container",
-
-        width: 250,
-        res: "Hello whats up",
-      },
-      {
-        desc: "should push the word if its equal to max width",
-        width: 60,
-        res: `Hello
-whats
-up`,
-      },
-    ].forEach((data) => {
-      it(`should ${data.desc}`, () => {
-        const res = wrapText(text, font, data.width - BOUND_TEXT_PADDING * 2);
-        expect(res).toEqual(data.res);
-      });
-    });
-  });
-
-  describe("When text contain new lines", () => {
-    const text = `Hello
-whats up`;
-    [
-      {
-        desc: "break all words when width of each word is less than container width",
-        width: 80,
-        res: `Hello\nwhats\nup`,
-      },
-      {
-        desc: "break all characters when width of each character is less than container width",
-        width: 25,
-        res: `H
-e
-l
-l
-o
-w
-h
-a
-t
-s
-u
-p`,
-      },
-      {
-        desc: "break words as per the width",
-
-        width: 150,
-        res: `Hello
-whats up`,
-      },
-      {
-        desc: "fit the container",
-
-        width: 250,
-        res: `Hello
-whats up`,
-      },
-    ].forEach((data) => {
-      it(`should respect new lines and ${data.desc}`, () => {
-        const res = wrapText(text, font, data.width - BOUND_TEXT_PADDING * 2);
-        expect(res).toEqual(data.res);
-      });
-    });
-  });
-
-  describe("When text is long", () => {
-    const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`;
-    [
-      {
-        desc: "fit characters of long string as per container width",
-        width: 170,
-        res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`,
-      },
-      {
-        desc: "fit characters of long string as per container width and break words as per the width",
-
-        width: 130,
-        res: `hellolongtex
-tthisiswhats
-upwithyouIam
-typingggggan
-dtypinggg
-break it now`,
-      },
-      {
-        desc: "fit the long text when container width is greater than text length and move the rest to next line",
-
-        width: 600,
-        res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`,
-      },
-    ].forEach((data) => {
-      it(`should ${data.desc}`, () => {
-        const res = wrapText(text, font, data.width - BOUND_TEXT_PADDING * 2);
-        expect(res).toEqual(data.res);
-      });
-    });
-  });
-
-  describe("Test parseTokens", () => {
-    it("should tokenize latin", () => {
-      let text = "Excalidraw is a virtual collaborative whiteboard";
-
-      expect(parseTokens(text)).toEqual([
-        "Excalidraw",
-        " ",
-        "is",
-        " ",
-        "a",
-        " ",
-        "virtual",
-        " ",
-        "collaborative",
-        " ",
-        "whiteboard",
-      ]);
-
-      text =
-        "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects";
-      expect(parseTokens(text)).toEqual([
-        "Wikipedia",
-        " ",
-        "is",
-        " ",
-        "hosted",
-        " ",
-        "by",
-        " ",
-        "Wikimedia-",
-        " ",
-        "Foundation,",
-        " ",
-        "a",
-        " ",
-        "non-",
-        "profit",
-        " ",
-        "organization",
-        " ",
-        "that",
-        " ",
-        "also",
-        " ",
-        "hosts",
-        " ",
-        "a",
-        " ",
-        "range-",
-        "of",
-        " ",
-        "other",
-        " ",
-        "projects",
-      ]);
-    });
-
-    it("should not tokenize number", () => {
-      const text = "99,100.99";
-      const tokens = parseTokens(text);
-      expect(tokens).toEqual(["99,100.99"]);
-    });
-
-    it("should tokenize joined emojis", () => {
-      const text = `😬🌍🗺🔥☂️👩🏽‍🦰👨‍👩‍👧‍👦👩🏾‍🔬🏳️‍🌈🧔‍♀️🧑‍🤝‍🧑🙅🏽‍♂️✅0️⃣🇨🇿🦅`;
-      const tokens = parseTokens(text);
-
-      expect(tokens).toEqual([
-        "😬",
-        "🌍",
-        "🗺",
-        "🔥",
-        "☂️",
-        "👩🏽‍🦰",
-        "👨‍👩‍👧‍👦",
-        "👩🏾‍🔬",
-        "🏳️‍🌈",
-        "🧔‍♀️",
-        "🧑‍🤝‍🧑",
-        "🙅🏽‍♂️",
-        "✅",
-        "0️⃣",
-        "🇨🇿",
-        "🦅",
-      ]);
-    });
-
-    it("should tokenize emojis mixed with mixed text", () => {
-      const text = `😬a🌍b🗺c🔥d☂️《👩🏽‍🦰》👨‍👩‍👧‍👦德👩🏾‍🔬こ🏳️‍🌈안🧔‍♀️g🧑‍🤝‍🧑h🙅🏽‍♂️e✅f0️⃣g🇨🇿10🦅#hash`;
-      const tokens = parseTokens(text);
-
-      expect(tokens).toEqual([
-        "😬",
-        "a",
-        "🌍",
-        "b",
-        "🗺",
-        "c",
-        "🔥",
-        "d",
-        "☂️",
-        "《",
-        "👩🏽‍🦰",
-        "》",
-        "👨‍👩‍👧‍👦",
-        "德",
-        "👩🏾‍🔬",
-        "こ",
-        "🏳️‍🌈",
-        "안",
-        "🧔‍♀️",
-        "g",
-        "🧑‍🤝‍🧑",
-        "h",
-        "🙅🏽‍♂️",
-        "e",
-        "✅",
-        "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common)
-        "🇨🇿",
-        "10", // nice! do not break the number, as it's by default matched by \p{Emoji}
-        "🦅",
-        "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji}
-      ]);
-    });
-
-    it("should tokenize decomposed chars into their composed variants", () => {
-      // each input character is in a decomposed form
-      const text = "čでäぴέ다й한";
-      expect(text.normalize("NFC").length).toEqual(8);
-      expect(text).toEqual(text.normalize("NFD"));
-
-      const tokens = parseTokens(text);
-      expect(tokens.length).toEqual(8);
-      expect(tokens).toEqual(["č", "で", "ä", "ぴ", "έ", "다", "й", "한"]);
-    });
-
-    it("should tokenize artificial CJK", () => {
-      const text = `《道德經》醫-醫こんにちは世界!안녕하세요세계;다.다...원/달(((다)))[[1]]〚({((한))>)〛た…[Hello] World?ニューヨーク・¥3700.55す。090-1234-5678¥1,000〜$5,000「素晴らしい!」〔重要〕#1:Taro君30%は、(たなばた)〰¥110±¥570で20℃〜9:30〜10:00【一番】`;
-
-      // [
-      //   '《道',        '德',             '經》',    '醫-',
-      //   '醫',          'こ',             'ん',      'に',
-      //   'ち',          'は',             '世',      '界!',
-      //   '안',          '녕',             '하',      '세',
-      //   '요',          '세',             '계;',    '다.',
-      //   '다...',       '원/',            '달',      '(((다)))',
-      //   '[[1]]',       '〚({((한))>)〛', 'た…',     '[Hello]',
-      //   ' ',           'World?',        'ニ',      'ュ',
-      //   'ー',          'ヨ',             'ー',      'ク・',
-      //   '¥3700.55',   'す。',           '090-',    '1234-',
-      //   '5678¥1,000', '〜',             '$5,000', '「素',
-      //   '晴',          'ら',             'し',      'い!」',
-      //   '〔重',        '要〕',           '#',      '1:',
-      //   'Taro',        '君',             '30%',    'は、',
-      //   '(た',        'な',             'ば',      'た)',
-      //   '〰',          '¥110±',         '¥570',   'で',
-      //   '20℃',         '〜',             '9:30',    '〜',
-      //   '10:00',       '【一',           '番】'
-      // ]
-      const tokens = parseTokens(text);
-
-      // Latin
-      expect(tokens).toContain("[[1]]");
-      expect(tokens).toContain("[Hello]");
-      expect(tokens).toContain("World?");
-      expect(tokens).toContain("Taro");
-
-      // Chinese
-      expect(tokens).toContain("《道");
-      expect(tokens).toContain("德");
-      expect(tokens).toContain("經》");
-      expect(tokens).toContain("醫-");
-      expect(tokens).toContain("醫");
-
-      // Japanese
-      expect(tokens).toContain("こ");
-      expect(tokens).toContain("ん");
-      expect(tokens).toContain("に");
-      expect(tokens).toContain("ち");
-      expect(tokens).toContain("は");
-      expect(tokens).toContain("世");
-      expect(tokens).toContain("ニ");
-      expect(tokens).toContain("ク・");
-      expect(tokens).toContain("界!");
-      expect(tokens).toContain("た…");
-      expect(tokens).toContain("す。");
-      expect(tokens).toContain("ュ");
-      expect(tokens).toContain("ー");
-      expect(tokens).toContain("「素");
-      expect(tokens).toContain("晴");
-      expect(tokens).toContain("ら");
-      expect(tokens).toContain("し");
-      expect(tokens).toContain("い!」");
-      expect(tokens).toContain("君");
-      expect(tokens).toContain("は、");
-      expect(tokens).toContain("(た");
-      expect(tokens).toContain("な");
-      expect(tokens).toContain("ば");
-      expect(tokens).toContain("た)");
-      expect(tokens).toContain("で");
-      expect(tokens).toContain("【一");
-      expect(tokens).toContain("番】");
-
-      // Check for Korean
-      expect(tokens).toContain("안");
-      expect(tokens).toContain("녕");
-      expect(tokens).toContain("하");
-      expect(tokens).toContain("세");
-      expect(tokens).toContain("요");
-      expect(tokens).toContain("세");
-      expect(tokens).toContain("계;");
-      expect(tokens).toContain("다.");
-      expect(tokens).toContain("다...");
-      expect(tokens).toContain("원/");
-      expect(tokens).toContain("달");
-      expect(tokens).toContain("(((다)))");
-      expect(tokens).toContain("〚({((한))>)〛");
-
-      // Numbers and units
-      expect(tokens).toContain("¥3700.55");
-      expect(tokens).toContain("090-");
-      expect(tokens).toContain("1234-");
-      expect(tokens).toContain("5678¥1,000");
-      expect(tokens).toContain("$5,000");
-      expect(tokens).toContain("1:");
-      expect(tokens).toContain("30%");
-      expect(tokens).toContain("¥110±");
-      expect(tokens).toContain("¥570");
-      expect(tokens).toContain("20℃");
-      expect(tokens).toContain("9:30");
-      expect(tokens).toContain("10:00");
-
-      // Punctuation and symbols
-      expect(tokens).toContain("〜");
-      expect(tokens).toContain("〰");
-      expect(tokens).toContain("#");
-    });
-  });
-});
+import type { ExcalidrawTextElementWithContainer } from "./types";
 
 
 describe("Test measureText", () => {
 describe("Test measureText", () => {
   describe("Test getContainerCoords", () => {
   describe("Test getContainerCoords", () => {

+ 2 - 359
packages/excalidraw/element/textElement.ts

@@ -16,12 +16,12 @@ import {
   BOUND_TEXT_PADDING,
   BOUND_TEXT_PADDING,
   DEFAULT_FONT_FAMILY,
   DEFAULT_FONT_FAMILY,
   DEFAULT_FONT_SIZE,
   DEFAULT_FONT_SIZE,
-  ENV,
   TEXT_ALIGN,
   TEXT_ALIGN,
   VERTICAL_ALIGN,
   VERTICAL_ALIGN,
 } from "../constants";
 } from "../constants";
 import type { MaybeTransformHandleType } from "./transformHandles";
 import type { MaybeTransformHandleType } from "./transformHandles";
 import { isTextElement } from ".";
 import { isTextElement } from ".";
+import { wrapText } from "./textWrapping";
 import { isBoundToContainer, isArrowElement } from "./typeChecks";
 import { isBoundToContainer, isArrowElement } from "./typeChecks";
 import { LinearElementEditor } from "./linearElementEditor";
 import { LinearElementEditor } from "./linearElementEditor";
 import type { AppState } from "../types";
 import type { AppState } from "../types";
@@ -31,172 +31,6 @@ import {
 } from "./containerCache";
 } from "./containerCache";
 import type { ExtractSetType } from "../utility-types";
 import type { ExtractSetType } from "../utility-types";
 
 
-/**
- * Matches various emoji types.
- *
- * 1. basic emojis (😀, 🌍)
- * 2. flags (🇨🇿)
- * 3. multi-codepoint emojis:
- *    - skin tones (👍🏽)
- *    - variation selectors (☂️)
- *    - keycaps (1️⃣)
- *    - tag sequences (🏴󠁧󠁢󠁥󠁮󠁧󠁿)
- *    - emoji sequences (👨‍👩‍👧‍👦, 👩‍🚀, 🏳️‍🌈)
- *
- * Unicode points:
- * - \uFE0F: presentation selector
- * - \u20E3: enclosing keycap
- * - \u200D: ZWJ (zero width joiner)
- * - \u{E0020}-\u{E007E}: tags
- * - \u{E007F}: cancel tag
- *
- * @see https://unicode.org/reports/tr51/#EBNF_and_Regex, with changes:
- * - replaced \p{Emoji} with [\p{Extended_Pictographic}\p{Emoji_Presentation}], see more in `should tokenize emojis mixed with mixed text` test
- * - replaced \p{Emod} with \p{Emoji_Modifier} as some do not understand the abbreviation (i.e. https://devina.io/redos-checker)
- */
-const _EMOJI_CHAR =
-  /(\p{RI}\p{RI}|[\p{Extended_Pictographic}\p{Emoji_Presentation}](?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?(?:\u200D(?:\p{RI}\p{RI}|[\p{Emoji}](?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?))*)/u;
-
-/**
- * Detect a CJK char, though does not include every possible char used in CJK texts,
- * such as symbols and punctuations.
- *
- * By default every CJK is a breaking point, though CJK has additional breaking points,
- * including full width punctuations or symbols (Chinese and Japanese) and western punctuations (Korean).
- *
- * Additional CJK breaking point rules:
- * - expect a break before (lookahead), but not after (negative lookbehind),  i.e. "(" or "("
- * - expect a break after (lookbehind), but not before (negative lookahead), i.e. ")" or ")"
- * - expect a break always (lookahead and lookbehind), i.e. "〃"
- */
-const _CJK_CHAR =
-  /\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}/u;
-
-/**
- * Following characters break only with CJK, not with alphabetic characters.
- * This is essential for Korean, as it uses alphabetic punctuation, but expects CJK-like breaking points.
- *
- * Hello((た)) → ["Hello", "((た))"]
- * Hello((World)) → ["Hello((World))"]
- */
-const _CJK_BREAK_NOT_AFTER_BUT_BEFORE = /<\(\[\{/u;
-const _CJK_BREAK_NOT_BEFORE_BUT_AFTER = />\)\]\}.,:;\?!/u;
-const _CJK_BREAK_ALWAYS = / 〃〜~〰#&*+-ー/=|¬ ̄¦/u;
-const _CJK_SYMBOLS_AND_PUNCTUATION =
-  /()[]{}〈〉《》⦅⦆「」「」『』【】〖〗〔〕〘〙〚〛<>〝〞'〟・。゚゙,、.:;?!%ー/u;
-
-/**
- * Following characters break with any character, even though are mostly used with CJK.
- *
- * Hello た。→ ["Hello", "た。"]
- *        ↑ DON'T BREAK "た。" (negative lookahead)
- * Hello「た」 World → ["Hello", "「た」", "World"]
- *       ↑ DON'T BREAK "「た" (negative lookbehind)
- *        ↑ DON'T BREAK "た」"(negative lookahead)
- *      ↑ BREAK BEFORE "「" (lookahead)
- *         ↑ BREAK AFTER "」" (lookbehind)
- */
-const _ANY_BREAK_NOT_AFTER_BUT_BEFORE = /([{〈《⦅「「『【〖〔〘〚<〝/u;
-const _ANY_BREAK_NOT_BEFORE_BUT_AFTER =
-  /)]}〉》⦆」」』】〗〕〙〛>〞'〟・。゚゙,、.:;?!%±‥…\//u;
-
-/**
- * Natural breaking points for any grammars.
- *
- * Hello-world
- *       ↑ BREAK AFTER "-" → ["Hello-", "world"]
- * Hello world
- *      ↑ BREAK ALWAYS " " → ["Hello", " ", "world"]
- */
-const _ANY_BREAK_AFTER = /-/u;
-const _ANY_BREAK_ALWAYS = /\s/u;
-
-/**
- * Simple fallback for browsers (mainly Safari < 16.4) that don't support "Lookbehind assertion".
- *
- * Browser support as of 10/2024:
- * - 91% Lookbehind assertion https://caniuse.com/mdn-javascript_regular_expressions_lookbehind_assertion
- * - 94% Unicode character class escape https://caniuse.com/mdn-javascript_regular_expressions_unicode_character_class_escape
- *
- * Does not include advanced CJK breaking rules, but covers most of the core cases, especially for latin.
- */
-const BREAK_LINE_REGEX_SIMPLE = new RegExp(
-  `${_EMOJI_CHAR.source}|([${_ANY_BREAK_ALWAYS.source}${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}${_ANY_BREAK_AFTER.source}])`,
-  "u",
-);
-
-// Hello World → ["Hello", " World"]
-//      ↑ BREAK BEFORE " "
-// HelloたWorld → ["Hello", "たWorld"]
-//      ↑ BREAK BEFORE "た"
-// Hello「World」→ ["Hello", "「World」"]
-//      ↑ BREAK BEFORE "「"
-const getLookaheadBreakingPoints = () => {
-  const ANY_BREAKING_POINT = `(?<![${_ANY_BREAK_NOT_AFTER_BUT_BEFORE.source}])(?=[${_ANY_BREAK_NOT_AFTER_BUT_BEFORE.source}${_ANY_BREAK_ALWAYS.source}])`;
-  const CJK_BREAKING_POINT = `(?<![${_ANY_BREAK_NOT_AFTER_BUT_BEFORE.source}${_CJK_BREAK_NOT_AFTER_BUT_BEFORE.source}])(?=[${_CJK_BREAK_NOT_AFTER_BUT_BEFORE.source}]*[${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}])`;
-  return new RegExp(`(?:${ANY_BREAKING_POINT}|${CJK_BREAKING_POINT})`, "u");
-};
-
-// Hello World → ["Hello ", "World"]
-//       ↑ BREAK AFTER " "
-// Hello-World → ["Hello-", "World"]
-//       ↑ BREAK AFTER "-"
-// HelloたWorld → ["Helloた", "World"]
-//       ↑ BREAK AFTER "た"
-//「Hello」World → ["「Hello」", "World"]
-//       ↑ BREAK AFTER "」"
-const getLookbehindBreakingPoints = () => {
-  const ANY_BREAKING_POINT = `(?![${_ANY_BREAK_NOT_BEFORE_BUT_AFTER.source}])(?<=[${_ANY_BREAK_NOT_BEFORE_BUT_AFTER.source}${_ANY_BREAK_ALWAYS.source}${_ANY_BREAK_AFTER.source}])`;
-  const CJK_BREAKING_POINT = `(?![${_ANY_BREAK_NOT_BEFORE_BUT_AFTER.source}${_CJK_BREAK_NOT_BEFORE_BUT_AFTER.source}${_ANY_BREAK_AFTER.source}])(?<=[${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}][${_CJK_BREAK_NOT_BEFORE_BUT_AFTER.source}]*)`;
-  return new RegExp(`(?:${ANY_BREAKING_POINT}|${CJK_BREAKING_POINT})`, "u");
-};
-
-/**
- * Break a line based on the whitespaces, CJK / emoji chars and language specific breaking points,
- * like hyphen for alphabetic and various full-width codepoints for CJK - especially Japanese, e.g.:
- *
- *  "Hello 世界。🌎🗺" → ["Hello", " ", "世", "界。", "🌎", "🗺"]
- *  "Hello-world" → ["Hello-", "world"]
- *  "「Hello World」" → ["「Hello", " ", "World」"]
- */
-const getBreakLineRegexAdvanced = () =>
-  new RegExp(
-    `${_EMOJI_CHAR.source}|${getLookaheadBreakingPoints().source}|${
-      getLookbehindBreakingPoints().source
-    }`,
-    "u",
-  );
-
-let cachedBreakLineRegex: RegExp | undefined;
-
-// Lazy-load for browsers that don't support "Lookbehind assertion"
-const getBreakLineRegex = () => {
-  if (!cachedBreakLineRegex) {
-    try {
-      cachedBreakLineRegex = getBreakLineRegexAdvanced();
-    } catch {
-      cachedBreakLineRegex = BREAK_LINE_REGEX_SIMPLE;
-    }
-  }
-
-  return cachedBreakLineRegex;
-};
-
-const CJK_REGEX = new RegExp(
-  `[${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}${_CJK_SYMBOLS_AND_PUNCTUATION.source}]`,
-  "u",
-);
-
-const EMOJI_REGEX = new RegExp(`${_EMOJI_CHAR.source}`, "u");
-
-export const containsCJK = (text: string) => {
-  return CJK_REGEX.test(text);
-};
-
-export const containsEmoji = (text: string) => {
-  return EMOJI_REGEX.test(text);
-};
-
 export const normalizeText = (text: string) => {
 export const normalizeText = (text: string) => {
   return (
   return (
     normalizeEOL(text)
     normalizeEOL(text)
@@ -510,7 +344,7 @@ let canvas: HTMLCanvasElement | undefined;
  *
  *
  * `Math.ceil` of the final width adds additional buffer which stabilizes slight wrapping incosistencies.
  * `Math.ceil` of the final width adds additional buffer which stabilizes slight wrapping incosistencies.
  */
  */
-const getLineWidth = (
+export const getLineWidth = (
   text: string,
   text: string,
   font: FontString,
   font: FontString,
   forceAdvanceWidth?: true,
   forceAdvanceWidth?: true,
@@ -575,197 +409,6 @@ export const getTextHeight = (
   return getLineHeightInPx(fontSize, lineHeight) * lineCount;
   return getLineHeightInPx(fontSize, lineHeight) * lineCount;
 };
 };
 
 
-export const parseTokens = (line: string) => {
-  const breakLineRegex = getBreakLineRegex();
-
-  // normalizing to single-codepoint composed chars due to canonical equivalence of multi-codepoint versions for chars like č, で (~ so that we don't break a line in between c and ˇ)
-  // filtering due to multi-codepoint chars like 👨‍👩‍👧‍👦, 👩🏽‍🦰
-  return line.normalize("NFC").split(breakLineRegex).filter(Boolean);
-};
-
-// handles multi-byte chars (é, 中) and purposefully does not handle multi-codepoint char (👨‍👩‍👧‍👦, 👩🏽‍🦰)
-const isSingleCharacter = (maybeSingleCharacter: string) => {
-  return (
-    maybeSingleCharacter.codePointAt(0) !== undefined &&
-    maybeSingleCharacter.codePointAt(1) === undefined
-  );
-};
-
-const satisfiesWordInvariant = (word: string) => {
-  if (import.meta.env.MODE === ENV.TEST || import.meta.env.DEV) {
-    if (/\s/.test(word)) {
-      throw new Error("Word should not contain any whitespaces!");
-    }
-  }
-};
-
-const wrapWord = (
-  word: string,
-  font: FontString,
-  maxWidth: number,
-): Array<string> => {
-  // multi-codepoint emojis are already broken apart and shouldn't be broken further
-  if (EMOJI_REGEX.test(word)) {
-    return [word];
-  }
-
-  satisfiesWordInvariant(word);
-
-  const lines: Array<string> = [];
-  const chars = Array.from(word);
-
-  let currentLine = "";
-  let currentLineWidth = 0;
-
-  for (const char of chars) {
-    const _charWidth = charWidth.calculate(char, font);
-    const testLineWidth = currentLineWidth + _charWidth;
-
-    if (testLineWidth <= maxWidth) {
-      currentLine = currentLine + char;
-      currentLineWidth = testLineWidth;
-      continue;
-    }
-
-    if (currentLine) {
-      lines.push(currentLine);
-    }
-
-    currentLine = char;
-    currentLineWidth = _charWidth;
-  }
-
-  if (currentLine) {
-    lines.push(currentLine);
-  }
-
-  return lines;
-};
-
-const wrapLine = (
-  line: string,
-  font: FontString,
-  maxWidth: number,
-): string[] => {
-  const lines: Array<string> = [];
-  const tokens = parseTokens(line);
-  const tokenIterator = tokens[Symbol.iterator]();
-
-  let currentLine = "";
-  let currentLineWidth = 0;
-
-  let iterator = tokenIterator.next();
-
-  while (!iterator.done) {
-    const token = iterator.value;
-    const testLine = currentLine + token;
-
-    // cache single codepoint whitespace, CJK or emoji width calc. as kerning should not apply here
-    const testLineWidth = isSingleCharacter(token)
-      ? currentLineWidth + charWidth.calculate(token, font)
-      : getLineWidth(testLine, font, true);
-
-    // build up the current line, skipping length check for possibly trailing whitespaces
-    if (/\s/.test(token) || testLineWidth <= maxWidth) {
-      currentLine = testLine;
-      currentLineWidth = testLineWidth;
-      iterator = tokenIterator.next();
-      continue;
-    }
-
-    // current line is empty => just the token (word) is longer than `maxWidth` and needs to be wrapped
-    if (!currentLine) {
-      const wrappedWord = wrapWord(token, font, maxWidth);
-      const trailingLine = wrappedWord[wrappedWord.length - 1] ?? "";
-      const precedingLines = wrappedWord.slice(0, -1);
-
-      lines.push(...precedingLines);
-
-      // trailing line of the wrapped word might -still be joined with next token/s
-      currentLine = trailingLine;
-      currentLineWidth = getLineWidth(trailingLine, font, true);
-      iterator = tokenIterator.next();
-    } else {
-      // push & reset, but don't iterate on the next token, as we didn't use it yet!
-      lines.push(currentLine.trimEnd());
-
-      // purposefully not iterating and not setting `currentLine` to `token`, so that we could use a simple !currentLine check above
-      currentLine = "";
-      currentLineWidth = 0;
-    }
-  }
-
-  // iterator done, push the trailing line if exists
-  if (currentLine) {
-    const trailingLine = trimTrailingLine(currentLine, font, maxWidth);
-    lines.push(trailingLine);
-  }
-
-  return lines;
-};
-
-// similarly to browsers, does not trim all whitespaces, but only those exceeding the maxWidth
-const trimTrailingLine = (line: string, font: FontString, maxWidth: number) => {
-  const shouldTrimWhitespaces = getLineWidth(line, font, true) > maxWidth;
-
-  if (!shouldTrimWhitespaces) {
-    return line;
-  }
-
-  // defensively default to `trimeEnd` in case the regex does not match
-  let [, trimmedLine, whitespaces] = line.match(/^(.+?)(\s+)$/) ?? [
-    line,
-    line.trimEnd(),
-    "",
-  ];
-
-  let trimmedLineWidth = getLineWidth(trimmedLine, font, true);
-
-  for (const whitespace of Array.from(whitespaces)) {
-    const _charWidth = charWidth.calculate(whitespace, font);
-    const testLineWidth = trimmedLineWidth + _charWidth;
-
-    if (testLineWidth > maxWidth) {
-      break;
-    }
-
-    trimmedLine = trimmedLine + whitespace;
-    trimmedLineWidth = testLineWidth;
-  }
-
-  return trimmedLine;
-};
-
-export const wrapText = (
-  text: string,
-  font: FontString,
-  maxWidth: number,
-): string => {
-  // if maxWidth is not finite or NaN which can happen in case of bugs in
-  // computation, we need to make sure we don't continue as we'll end up
-  // in an infinite loop
-  if (!Number.isFinite(maxWidth) || maxWidth < 0) {
-    return text;
-  }
-
-  const lines: Array<string> = [];
-  const originalLines = text.split("\n");
-
-  for (const originalLine of originalLines) {
-    const currentLineWidth = getLineWidth(originalLine, font, true);
-
-    if (currentLineWidth <= maxWidth) {
-      lines.push(originalLine);
-      continue;
-    }
-
-    const wrappedLine = wrapLine(originalLine, font, maxWidth);
-    lines.push(...wrappedLine);
-  }
-
-  return lines.join("\n");
-};
-
 export const charWidth = (() => {
 export const charWidth = (() => {
   const cachedCharWidth: { [key: FontString]: Array<number> } = {};
   const cachedCharWidth: { [key: FontString]: Array<number> } = {};
 
 

+ 633 - 0
packages/excalidraw/element/textWrapping.test.ts

@@ -0,0 +1,633 @@
+import { wrapText, parseTokens } from "./textWrapping";
+import type { FontString } from "./types";
+
+describe("Test wrapText", () => {
+  // font is irrelevant as jsdom does not support FontFace API
+  // `measureText` width is mocked to return `text.length` by `jest-canvas-mock`
+  // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js
+  const font = "10px Cascadia, Segoe UI Emoji" as FontString;
+
+  it("should wrap the text correctly when word length is exactly equal to max width", () => {
+    const text = "Hello Excalidraw";
+    // Length of "Excalidraw" is 100 and exacty equal to max width
+    const res = wrapText(text, font, 100);
+    expect(res).toEqual(`Hello\nExcalidraw`);
+  });
+
+  it("should return the text as is if max width is invalid", () => {
+    const text = "Hello Excalidraw";
+    expect(wrapText(text, font, NaN)).toEqual(text);
+    expect(wrapText(text, font, -1)).toEqual(text);
+    expect(wrapText(text, font, Infinity)).toEqual(text);
+  });
+
+  it("should show the text correctly when max width reached", () => {
+    const text = "Hello😀";
+    const maxWidth = 10;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("H\ne\nl\nl\no\n😀");
+  });
+
+  it("should not wrap number when wrapping line", () => {
+    const text = "don't wrap this number 99,100.99";
+    const maxWidth = 300;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("don't wrap this number\n99,100.99");
+  });
+
+  it("should trim all trailing whitespaces", () => {
+    const text = "Hello     ";
+    const maxWidth = 50;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("Hello");
+  });
+
+  it("should trim all but one trailing whitespaces", () => {
+    const text = "Hello     ";
+    const maxWidth = 60;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("Hello ");
+  });
+
+  it("should keep preceding whitespaces and trim all trailing whitespaces", () => {
+    const text = "  Hello  World";
+    const maxWidth = 90;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("  Hello\nWorld");
+  });
+
+  it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => {
+    const text = "   Hello  World            ";
+    const maxWidth = 90;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("   Hello\nWorld    ");
+  });
+
+  it("should trim keep those whitespace that fit in the trailing line", () => {
+    const text = "Hello   Wo rl  d                     ";
+    const maxWidth = 100;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("Hello   Wo\nrl  d     ");
+  });
+
+  it("should support multiple (multi-codepoint) emojis", () => {
+    const text = "😀🗺🔥👩🏽‍🦰👨‍👩‍👧‍👦🇨🇿";
+    const maxWidth = 1;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe("😀\n🗺\n🔥\n👩🏽‍🦰\n👨‍👩‍👧‍👦\n🇨🇿");
+  });
+
+  it("should wrap the text correctly when text contains hyphen", () => {
+    let text =
+      "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects";
+    const res = wrapText(text, font, 110);
+    expect(res).toBe(
+      `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`,
+    );
+
+    text = "Hello thereusing-now";
+    expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now");
+  });
+
+  it("should support wrapping nested lists", () => {
+    const text = `\tA) one tab\t\t- two tabs        - 8 spaces`;
+
+    const maxWidth = 100;
+    const res = wrapText(text, font, maxWidth);
+    expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`);
+
+    const maxWidth2 = 50;
+    const res2 = wrapText(text, font, maxWidth2);
+    expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`);
+  });
+
+  describe("When text is CJK", () => {
+    it("should break each CJK character when width is very small", () => {
+      // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi"
+      const text = "안녕하세요こんにちは世界コンニチハ你好";
+      const maxWidth = 10;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe(
+        "안\n녕\n하\n세\n요\nこ\nん\nに\nち\nは\n世\n界\nコ\nン\nニ\nチ\nハ\n你\n好",
+      );
+    });
+
+    it("should break CJK text into longer segments when width is larger", () => {
+      // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi"
+      const text = "안녕하세요こんにちは世界コンニチハ你好";
+      const maxWidth = 30;
+      const res = wrapText(text, font, maxWidth);
+
+      // measureText is mocked, so it's not precisely what would happen in prod
+      expect(res).toBe("안녕하\n세요こ\nんにち\nは世界\nコンニ\nチハ你\n好");
+    });
+
+    it("should handle a combination of CJK, latin, emojis and whitespaces", () => {
+      const text = `a醫 醫      bb  你好  world-i-😀🗺🔥`;
+
+      const maxWidth = 150;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe(`a醫 醫      bb  你\n好  world-i-😀🗺\n🔥`);
+
+      const maxWidth2 = 50;
+      const res2 = wrapText(text, font, maxWidth2);
+      expect(res2).toBe(`a醫 醫\nbb  你\n好\nworld\n-i-😀\n🗺🔥`);
+
+      const maxWidth3 = 30;
+      const res3 = wrapText(text, font, maxWidth3);
+      expect(res3).toBe(`a醫\n醫\nbb\n你好\nwor\nld-\ni-\n😀\n🗺\n🔥`);
+    });
+
+    it("should break before and after a regular CJK character", () => {
+      const text = "HelloたWorld";
+      const maxWidth1 = 50;
+      const res1 = wrapText(text, font, maxWidth1);
+      expect(res1).toBe("Hello\nた\nWorld");
+
+      const maxWidth2 = 60;
+      const res2 = wrapText(text, font, maxWidth2);
+      expect(res2).toBe("Helloた\nWorld");
+    });
+
+    it("should break before and after certain CJK symbols", () => {
+      const text = "こんにちは〃世界";
+      const maxWidth1 = 50;
+      const res1 = wrapText(text, font, maxWidth1);
+      expect(res1).toBe("こんにちは\n〃世界");
+
+      const maxWidth2 = 60;
+      const res2 = wrapText(text, font, maxWidth2);
+      expect(res2).toBe("こんにちは〃\n世界");
+    });
+
+    it("should break after, not before for certain CJK pairs", () => {
+      const text = "Hello た。";
+      const maxWidth = 70;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("Hello\nた。");
+    });
+
+    it("should break before, not after for certain CJK pairs", () => {
+      const text = "Hello「たWorld」";
+      const maxWidth = 60;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("Hello\n「た\nWorld」");
+    });
+
+    it("should break after, not before for certain CJK character pairs", () => {
+      const text = "「Helloた」World";
+      const maxWidth = 70;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("「Hello\nた」World");
+    });
+
+    it("should break Chinese sentences", () => {
+      const text = `中国你好!这是一个测试。
+我们来看看:人民币¥1234「很贵」
+(括号)、逗号,句号。空格 换行 全角符号…—`;
+
+      const maxWidth1 = 80;
+      const res1 = wrapText(text, font, maxWidth1);
+      expect(res1).toBe(`中国你好!这是一\n个测试。
+我们来看看:人民\n币¥1234「很\n贵」
+(括号)、逗号,\n句号。空格 换行\n全角符号…—`);
+
+      const maxWidth2 = 50;
+      const res2 = wrapText(text, font, maxWidth2);
+      expect(res2).toBe(`中国你好!\n这是一个测\n试。
+我们来看\n看:人民币\n¥1234\n「很贵」
+(括号)、\n逗号,句\n号。空格\n换行 全角\n符号…—`);
+    });
+
+    it("should break Japanese sentences", () => {
+      const text = `日本こんにちは!これはテストです。
+  見てみましょう:円¥1234「高い」
+  (括弧)、読点、句点。
+  空白 改行 全角記号…ー`;
+
+      const maxWidth1 = 80;
+      const res1 = wrapText(text, font, maxWidth1);
+      expect(res1).toBe(`日本こんにちは!\nこれはテストで\nす。
+  見てみましょ\nう:円¥1234\n「高い」
+  (括弧)、読\n点、句点。
+  空白 改行\n全角記号…ー`);
+
+      const maxWidth2 = 50;
+      const res2 = wrapText(text, font, maxWidth2);
+      expect(res2).toBe(`日本こんに\nちは!これ\nはテストで\nす。
+  見てみ\nましょう:\n円\n¥1234\n「高い」
+  (括\n弧)、読\n点、句点。
+  空白\n改行 全角\n記号…ー`);
+    });
+
+    it("should break Korean sentences", () => {
+      const text = `한국 안녕하세요! 이것은 테스트입니다.
+우리 보자: 원화₩1234「비싸다」
+(괄호), 쉼표, 마침표.
+공백 줄바꿈 전각기호…—`;
+
+      const maxWidth1 = 80;
+      const res1 = wrapText(text, font, maxWidth1);
+      expect(res1).toBe(`한국 안녕하세\n요! 이것은 테\n스트입니다.
+우리 보자: 원\n화₩1234「비\n싸다」
+(괄호), 쉼\n표, 마침표.
+공백 줄바꿈 전\n각기호…—`);
+
+      const maxWidth2 = 60;
+      const res2 = wrapText(text, font, maxWidth2);
+      expect(res2).toBe(`한국 안녕하\n세요! 이것\n은 테스트입\n니다.
+우리 보자:\n원화\n₩1234\n「비싸다」
+(괄호),\n쉼표, 마침\n표.
+공백 줄바꿈\n전각기호…—`);
+    });
+  });
+
+  describe("When text contains leading whitespaces", () => {
+    const text = "  \t   Hello world";
+
+    it("should preserve leading whitespaces", () => {
+      const maxWidth = 120;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("  \t   Hello\nworld");
+    });
+
+    it("should break and collapse leading whitespaces when line breaks", () => {
+      const maxWidth = 60;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("\nHello\nworld");
+    });
+
+    it("should break and collapse leading whitespaces whe words break", () => {
+      const maxWidth = 30;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("\nHel\nlo\nwor\nld");
+    });
+  });
+
+  describe("When text contains trailing whitespaces", () => {
+    it("shouldn't add new lines for trailing spaces", () => {
+      const text = "Hello whats up     ";
+      const maxWidth = 190;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe(text);
+    });
+
+    it("should ignore trailing whitespaces when line breaks", () => {
+      const text = "Hippopotomonstrosesquippedaliophobia        ??????";
+      const maxWidth = 400;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????");
+    });
+
+    it("should not ignore trailing whitespaces when word breaks", () => {
+      const text = "Hippopotomonstrosesquippedaliophobia        ??????";
+      const maxWidth = 300;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia        ??????");
+    });
+
+    it("should ignore trailing whitespaces when word breaks and line breaks", () => {
+      const text = "Hippopotomonstrosesquippedaliophobia        ??????";
+      const maxWidth = 180;
+      const res = wrapText(text, font, maxWidth);
+      expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????");
+    });
+  });
+
+  describe("When text doesn't contain new lines", () => {
+    const text = "Hello whats up";
+
+    [
+      {
+        desc: "break all words when width of each word is less than container width",
+        width: 70,
+        res: `Hello\nwhats\nup`,
+      },
+      {
+        desc: "break all characters when width of each character is less than container width",
+        width: 15,
+        res: `H\ne\nl\nl\no\nw\nh\na\nt\ns\nu\np`,
+      },
+      {
+        desc: "break words as per the width",
+
+        width: 130,
+        res: `Hello whats\nup`,
+      },
+      {
+        desc: "fit the container",
+
+        width: 240,
+        res: "Hello whats up",
+      },
+      {
+        desc: "push the word if its equal to max width",
+        width: 50,
+        res: `Hello\nwhats\nup`,
+      },
+    ].forEach((data) => {
+      it(`should ${data.desc}`, () => {
+        const res = wrapText(text, font, data.width);
+        expect(res).toEqual(data.res);
+      });
+    });
+  });
+
+  describe("When text contain new lines", () => {
+    const text = `Hello\n  whats up`;
+    [
+      {
+        desc: "break all words when width of each word is less than container width",
+        width: 70,
+        res: `Hello\n  whats\nup`,
+      },
+      {
+        desc: "break all characters when width of each character is less than container width",
+        width: 15,
+        res: `H\ne\nl\nl\no\n\nw\nh\na\nt\ns\nu\np`,
+      },
+      {
+        desc: "break words as per the width",
+        width: 140,
+        res: `Hello\n  whats up`,
+      },
+    ].forEach((data) => {
+      it(`should respect new lines and ${data.desc}`, () => {
+        const res = wrapText(text, font, data.width);
+        expect(res).toEqual(data.res);
+      });
+    });
+  });
+
+  describe("When text is long", () => {
+    const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`;
+    [
+      {
+        desc: "fit characters of long string as per container width",
+        width: 160,
+        res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`,
+      },
+      {
+        desc: "fit characters of long string as per container width and break words as per the width",
+
+        width: 120,
+        res: `hellolongtex\ntthisiswhats\nupwithyouIam\ntypingggggan\ndtypinggg\nbreak it now`,
+      },
+      {
+        desc: "fit the long text when container width is greater than text length and move the rest to next line",
+
+        width: 590,
+        res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`,
+      },
+    ].forEach((data) => {
+      it(`should ${data.desc}`, () => {
+        const res = wrapText(text, font, data.width);
+        expect(res).toEqual(data.res);
+      });
+    });
+  });
+
+  describe("Test parseTokens", () => {
+    it("should tokenize latin", () => {
+      let text = "Excalidraw is a virtual collaborative whiteboard";
+
+      expect(parseTokens(text)).toEqual([
+        "Excalidraw",
+        " ",
+        "is",
+        " ",
+        "a",
+        " ",
+        "virtual",
+        " ",
+        "collaborative",
+        " ",
+        "whiteboard",
+      ]);
+
+      text =
+        "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects";
+      expect(parseTokens(text)).toEqual([
+        "Wikipedia",
+        " ",
+        "is",
+        " ",
+        "hosted",
+        " ",
+        "by",
+        " ",
+        "Wikimedia-",
+        " ",
+        "Foundation,",
+        " ",
+        "a",
+        " ",
+        "non-",
+        "profit",
+        " ",
+        "organization",
+        " ",
+        "that",
+        " ",
+        "also",
+        " ",
+        "hosts",
+        " ",
+        "a",
+        " ",
+        "range-",
+        "of",
+        " ",
+        "other",
+        " ",
+        "projects",
+      ]);
+    });
+
+    it("should not tokenize number", () => {
+      const text = "99,100.99";
+      const tokens = parseTokens(text);
+      expect(tokens).toEqual(["99,100.99"]);
+    });
+
+    it("should tokenize joined emojis", () => {
+      const text = `😬🌍🗺🔥☂️👩🏽‍🦰👨‍👩‍👧‍👦👩🏾‍🔬🏳️‍🌈🧔‍♀️🧑‍🤝‍🧑🙅🏽‍♂️✅0️⃣🇨🇿🦅`;
+      const tokens = parseTokens(text);
+
+      expect(tokens).toEqual([
+        "😬",
+        "🌍",
+        "🗺",
+        "🔥",
+        "☂️",
+        "👩🏽‍🦰",
+        "👨‍👩‍👧‍👦",
+        "👩🏾‍🔬",
+        "🏳️‍🌈",
+        "🧔‍♀️",
+        "🧑‍🤝‍🧑",
+        "🙅🏽‍♂️",
+        "✅",
+        "0️⃣",
+        "🇨🇿",
+        "🦅",
+      ]);
+    });
+
+    it("should tokenize emojis mixed with mixed text", () => {
+      const text = `😬a🌍b🗺c🔥d☂️《👩🏽‍🦰》👨‍👩‍👧‍👦德👩🏾‍🔬こ🏳️‍🌈안🧔‍♀️g🧑‍🤝‍🧑h🙅🏽‍♂️e✅f0️⃣g🇨🇿10🦅#hash`;
+      const tokens = parseTokens(text);
+
+      expect(tokens).toEqual([
+        "😬",
+        "a",
+        "🌍",
+        "b",
+        "🗺",
+        "c",
+        "🔥",
+        "d",
+        "☂️",
+        "《",
+        "👩🏽‍🦰",
+        "》",
+        "👨‍👩‍👧‍👦",
+        "德",
+        "👩🏾‍🔬",
+        "こ",
+        "🏳️‍🌈",
+        "안",
+        "🧔‍♀️",
+        "g",
+        "🧑‍🤝‍🧑",
+        "h",
+        "🙅🏽‍♂️",
+        "e",
+        "✅",
+        "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common)
+        "🇨🇿",
+        "10", // nice! do not break the number, as it's by default matched by \p{Emoji}
+        "🦅",
+        "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji}
+      ]);
+    });
+
+    it("should tokenize decomposed chars into their composed variants", () => {
+      // each input character is in a decomposed form
+      const text = "čでäぴέ다й한";
+      expect(text.normalize("NFC").length).toEqual(8);
+      expect(text).toEqual(text.normalize("NFD"));
+
+      const tokens = parseTokens(text);
+      expect(tokens.length).toEqual(8);
+      expect(tokens).toEqual(["č", "で", "ä", "ぴ", "έ", "다", "й", "한"]);
+    });
+
+    it("should tokenize artificial CJK", () => {
+      const text = `《道德經》醫-醫こんにちは世界!안녕하세요세계;요』,다.다...원/달(((다)))[[1]]〚({((한))>)〛(「た」)た…[Hello] \t World?ニューヨーク・¥3700.55す。090-1234-5678¥1,000〜$5,000「素晴らしい!」〔重要〕#1:Taro君30%は、(たなばた)〰¥110±¥570で20℃〜9:30〜10:00【一番】`;
+      // [
+      //   '《道',      '德',        '經》',           '醫-',
+      //   '醫',        'こ',        'ん',             'に',
+      //   'ち',        'は',        '世',             '界!',
+      //   '안',        '녕',        '하',             '세',
+      //   '요',        '세',        '계;',           '요』,',
+      //   '다.',       '다...',     '원/',            '달',
+      //   '(((다)))',  '[[1]]',     '〚({((한))>)〛', '(「た」)',
+      //   'た…',       '[Hello]',   ' ',              '\t',
+      //   ' ',        'World?',   'ニ',             'ュ',
+      //   'ー',        'ヨ',        'ー',             'ク・',
+      //   '¥3700.55', 'す。',      '090-',           '1234-',
+      //   '5678',      '¥1,000〜', '$5,000',        '「素',
+      //   '晴',        'ら',        'し',             'い!」',
+      //   '〔重',      '要〕',      '#',             '1:',
+      //   'Taro',      '君',        '30%',           'は、',
+      //   '(た',      'な',        'ば',             'た)',
+      //   '〰',        '¥110±',    '¥570',          'で',
+      //   '20℃〜',     '9:30〜',    '10:00',          '【一',
+      //   '番】'
+      // ]
+      const tokens = parseTokens(text);
+
+      // Latin
+      expect(tokens).toContain("[[1]]");
+      expect(tokens).toContain("[Hello]");
+      expect(tokens).toContain("World?");
+      expect(tokens).toContain("Taro");
+
+      // Chinese
+      expect(tokens).toContain("《道");
+      expect(tokens).toContain("德");
+      expect(tokens).toContain("經》");
+      expect(tokens).toContain("醫-");
+      expect(tokens).toContain("醫");
+
+      // Japanese
+      expect(tokens).toContain("こ");
+      expect(tokens).toContain("ん");
+      expect(tokens).toContain("に");
+      expect(tokens).toContain("ち");
+      expect(tokens).toContain("は");
+      expect(tokens).toContain("世");
+      expect(tokens).toContain("ク・");
+      expect(tokens).toContain("界!");
+      expect(tokens).toContain("た…");
+      expect(tokens).toContain("す。");
+      expect(tokens).toContain("ュ");
+      expect(tokens).toContain("「素");
+      expect(tokens).toContain("晴");
+      expect(tokens).toContain("ら");
+      expect(tokens).toContain("し");
+      expect(tokens).toContain("い!」");
+      expect(tokens).toContain("君");
+      expect(tokens).toContain("は、");
+      expect(tokens).toContain("(た");
+      expect(tokens).toContain("な");
+      expect(tokens).toContain("ば");
+      expect(tokens).toContain("た)");
+      expect(tokens).toContain("で");
+      expect(tokens).toContain("【一");
+      expect(tokens).toContain("番】");
+
+      // Check for Korean
+      expect(tokens).toContain("안");
+      expect(tokens).toContain("녕");
+      expect(tokens).toContain("하");
+      expect(tokens).toContain("세");
+      expect(tokens).toContain("요");
+      expect(tokens).toContain("세");
+      expect(tokens).toContain("계;");
+      expect(tokens).toContain("요』,");
+      expect(tokens).toContain("다.");
+      expect(tokens).toContain("다...");
+      expect(tokens).toContain("원/");
+      expect(tokens).toContain("달");
+      expect(tokens).toContain("(((다)))");
+      expect(tokens).toContain("〚({((한))>)〛");
+      expect(tokens).toContain("(「た」)");
+
+      // Numbers and units
+      expect(tokens).toContain("¥3700.55");
+      expect(tokens).toContain("090-");
+      expect(tokens).toContain("1234-");
+      expect(tokens).toContain("5678");
+      expect(tokens).toContain("¥1,000〜");
+      expect(tokens).toContain("$5,000");
+      expect(tokens).toContain("1:");
+      expect(tokens).toContain("30%");
+      expect(tokens).toContain("¥110±");
+      expect(tokens).toContain("20℃〜");
+      expect(tokens).toContain("9:30〜");
+      expect(tokens).toContain("10:00");
+
+      // Punctuation and symbols
+      expect(tokens).toContain(" ");
+      expect(tokens).toContain("\t");
+      expect(tokens).toContain(" ");
+      expect(tokens).toContain("ニ");
+      expect(tokens).toContain("ー");
+      expect(tokens).toContain("ヨ");
+      expect(tokens).toContain("〰");
+      expect(tokens).toContain("#");
+    });
+  });
+});

+ 568 - 0
packages/excalidraw/element/textWrapping.ts

@@ -0,0 +1,568 @@
+import { ENV } from "../constants";
+import { charWidth, getLineWidth } from "./textElement";
+import type { FontString } from "./types";
+
+let cachedCjkRegex: RegExp | undefined;
+let cachedLineBreakRegex: RegExp | undefined;
+let cachedEmojiRegex: RegExp | undefined;
+
+/**
+ * Test if a given text contains any CJK characters (including symbols, punctuation, etc,).
+ */
+export const containsCJK = (text: string) => {
+  if (!cachedCjkRegex) {
+    cachedCjkRegex = Regex.class(...Object.values(CJK));
+  }
+
+  return cachedCjkRegex.test(text);
+};
+
+const getLineBreakRegex = () => {
+  if (!cachedLineBreakRegex) {
+    try {
+      cachedLineBreakRegex = getLineBreakRegexAdvanced();
+    } catch {
+      cachedLineBreakRegex = getLineBreakRegexSimple();
+    }
+  }
+
+  return cachedLineBreakRegex;
+};
+
+const getEmojiRegex = () => {
+  if (!cachedEmojiRegex) {
+    cachedEmojiRegex = getEmojiRegexUnicode();
+  }
+
+  return cachedEmojiRegex;
+};
+
+/**
+ * Common symbols used across different languages.
+ */
+const COMMON = {
+  /**
+   * Natural breaking points for any grammars.
+   *
+   * Hello world
+   *      ↑ BREAK ALWAYS " " → ["Hello", " ", "world"]
+   * Hello-world
+   *       ↑ BREAK AFTER "-" → ["Hello-", "world"]
+   */
+  WHITESPACE: /\s/u,
+  HYPHEN: /-/u,
+  /**
+   * Generally do not break, unless closed symbol is followed by an opening symbol.
+   *
+   * Also, western punctation is often used in modern Korean and expects to be treated
+   * similarly to the CJK opening and closing symbols.
+   *
+   * Hello(한글)→ ["Hello", "(한", "글)"]
+   *      ↑ BREAK BEFORE "("
+   *          ↑ BREAK AFTER ")"
+   */
+  OPENING: /<\(\[\{/u,
+  CLOSING: />\)\]\}.,:;!\?…\//u,
+};
+
+/**
+ * Characters and symbols used in Chinese, Japanese and Korean.
+ */
+const CJK = {
+  /**
+   * Every CJK breaks before and after, unless it's paired with an opening or closing symbol.
+   *
+   * Does not include every possible char used in CJK texts, such as currency, parentheses or punctuation.
+   */
+  CHAR: /\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}`'^〃〰〆#&*+-ー/\=|¦〒¬ ̄/u,
+  /**
+   * Opening and closing CJK punctuation breaks before and after all such characters (in case of many),
+   * and creates pairs with neighboring characters.
+   *
+   * Hello た。→ ["Hello", "た。"]
+   *        ↑ DON'T BREAK "た。"
+   * * Hello「た」 World → ["Hello", "「た」", "World"]
+   *       ↑ DON'T BREAK "「た"
+   *        ↑ DON'T BREAK "た"
+   *      ↑ BREAK BEFORE "「"
+   *         ↑ BREAK AFTER "」"
+   */
+  // eslint-disable-next-line prettier/prettier
+  OPENING:/([{〈《⦅「「『【〖〔〘〚<〝/u,
+  CLOSING: /)]}〉》⦆」」』】〗〕〙〛>。.,、〟‥?!:;・〜〞/u,
+  /**
+   * Currency symbols break before, not after
+   *
+   * Price¥100 → ["Price", "¥100"]
+   *      ↑ BREAK BEFORE "¥"
+   */
+  CURRENCY: /¥₩£¢$/u,
+};
+
+const EMOJI = {
+  FLAG: /\p{RI}\p{RI}/u,
+  JOINER:
+    /(?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?/u,
+  ZWJ: /\u200D/u,
+  ANY: /[\p{Emoji}]/u,
+  MOST: /[\p{Extended_Pictographic}\p{Emoji_Presentation}]/u,
+};
+
+/**
+ * Simple fallback for browsers (mainly Safari < 16.4) that don't support "Lookbehind assertion".
+ *
+ * Browser support as of 10/2024:
+ * - 91% Lookbehind assertion https://caniuse.com/mdn-javascript_regular_expressions_lookbehind_assertion
+ * - 94% Unicode character class escape https://caniuse.com/mdn-javascript_regular_expressions_unicode_character_class_escape
+ *
+ * Does not include advanced CJK breaking rules, but covers most of the core cases, especially for latin.
+ */
+const getLineBreakRegexSimple = () =>
+  Regex.or(
+    getEmojiRegex(),
+    Break.On(COMMON.HYPHEN, COMMON.WHITESPACE, CJK.CHAR),
+  );
+
+/**
+ * Specifies the line breaking rules based for alphabetic-based languages,
+ * Chinese, Japanese, Korean and Emojis.
+ *
+ * "Hello-world" → ["Hello-", "world"]
+ * "Hello 「世界。」🌎🗺" → ["Hello", " ", "「世", "界。」", "🌎", "🗺"]
+ */
+const getLineBreakRegexAdvanced = () =>
+  Regex.or(
+    // Unicode-defined regex for (multi-codepoint) Emojis
+    getEmojiRegex(),
+    // Rules for whitespace and hyphen
+    Break.Before(COMMON.WHITESPACE).Build(),
+    Break.After(COMMON.WHITESPACE, COMMON.HYPHEN).Build(),
+    // Rules for CJK (chars, symbols, currency)
+    Break.Before(CJK.CHAR, CJK.CURRENCY)
+      .NotPrecededBy(COMMON.OPENING, CJK.OPENING)
+      .Build(),
+    Break.After(CJK.CHAR)
+      .NotFollowedBy(COMMON.HYPHEN, COMMON.CLOSING, CJK.CLOSING)
+      .Build(),
+    // Rules for opening and closing punctuation
+    Break.BeforeMany(CJK.OPENING).NotPrecededBy(COMMON.OPENING).Build(),
+    Break.AfterMany(CJK.CLOSING).NotFollowedBy(COMMON.CLOSING).Build(),
+    Break.AfterMany(COMMON.CLOSING).FollowedBy(COMMON.OPENING).Build(),
+  );
+
+/**
+ * Matches various emoji types.
+ *
+ * 1. basic emojis (😀, 🌍)
+ * 2. flags (🇨🇿)
+ * 3. multi-codepoint emojis:
+ *    - skin tones (👍🏽)
+ *    - variation selectors (☂️)
+ *    - keycaps (1️⃣)
+ *    - tag sequences (🏴󠁧󠁢󠁥󠁮󠁧󠁿)
+ *    - emoji sequences (👨‍👩‍👧‍👦, 👩‍🚀, 🏳️‍🌈)
+ *
+ * Unicode points:
+ * - \uFE0F: presentation selector
+ * - \u20E3: enclosing keycap
+ * - \u200D: zero width joiner
+ * - \u{E0020}-\u{E007E}: tags
+ * - \u{E007F}: cancel tag
+ *
+ * @see https://unicode.org/reports/tr51/#EBNF_and_Regex, with changes:
+ * - replaced \p{Emoji} with [\p{Extended_Pictographic}\p{Emoji_Presentation}], see more in `should tokenize emojis mixed with mixed text` test
+ * - replaced \p{Emod} with \p{Emoji_Modifier} as some engines do not understand the abbreviation (i.e. https://devina.io/redos-checker)
+ */
+const getEmojiRegexUnicode = () =>
+  Regex.group(
+    Regex.or(
+      EMOJI.FLAG,
+      Regex.and(
+        EMOJI.MOST,
+        EMOJI.JOINER,
+        Regex.build(
+          `(?:${EMOJI.ZWJ.source}(?:${EMOJI.FLAG.source}|${EMOJI.ANY.source}${EMOJI.JOINER.source}))*`,
+        ),
+      ),
+    ),
+  );
+
+/**
+ * Regex utilities for unicode character classes.
+ */
+const Regex = {
+  /**
+   * Builds a regex from a string.
+   */
+  build: (regex: string): RegExp => new RegExp(regex, "u"),
+  /**
+   * Joins regexes into a single string.
+   */
+  join: (...regexes: RegExp[]): string => regexes.map((x) => x.source).join(""),
+  /**
+   * Joins regexes into a single regex as with "and" operator.
+   */
+  and: (...regexes: RegExp[]): RegExp => Regex.build(Regex.join(...regexes)),
+  /**
+   * Joins regexes into a single regex with "or" operator.
+   */
+  or: (...regexes: RegExp[]): RegExp =>
+    Regex.build(regexes.map((x) => x.source).join("|")),
+  /**
+   * Puts regexes into a matching group.
+   */
+  group: (...regexes: RegExp[]): RegExp =>
+    Regex.build(`(${Regex.join(...regexes)})`),
+  /**
+   * Puts regexes into a character class.
+   */
+  class: (...regexes: RegExp[]): RegExp =>
+    Regex.build(`[${Regex.join(...regexes)}]`),
+};
+
+/**
+ * Human-readable lookahead and lookbehind utilities for defining line break
+ * opportunities between pairs of character classes.
+ */
+const Break = {
+  /**
+   * Break on the given class of characters.
+   */
+  On: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    return Regex.build(`([${joined}])`);
+  },
+  /**
+   * Break before the given class of characters.
+   */
+  Before: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    const builder = () => Regex.build(`(?=[${joined}])`);
+    return Break.Chain(builder) as Omit<
+      ReturnType<typeof Break.Chain>,
+      "FollowedBy"
+    >;
+  },
+  /**
+   * Break after the given class of characters.
+   */
+  After: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    const builder = () => Regex.build(`(?<=[${joined}])`);
+    return Break.Chain(builder) as Omit<
+      ReturnType<typeof Break.Chain>,
+      "PreceededBy"
+    >;
+  },
+  /**
+   * Break before one or multiple characters of the same class.
+   */
+  BeforeMany: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    const builder = () => Regex.build(`(?<![${joined}])(?=[${joined}])`);
+    return Break.Chain(builder) as Omit<
+      ReturnType<typeof Break.Chain>,
+      "FollowedBy"
+    >;
+  },
+  /**
+   * Break after one or multiple character from the same class.
+   */
+  AfterMany: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    const builder = () => Regex.build(`(?<=[${joined}])(?![${joined}])`);
+    return Break.Chain(builder) as Omit<
+      ReturnType<typeof Break.Chain>,
+      "PreceededBy"
+    >;
+  },
+  /**
+   * Do not break before the given class of characters.
+   */
+  NotBefore: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    const builder = () => Regex.build(`(?![${joined}])`);
+    return Break.Chain(builder) as Omit<
+      ReturnType<typeof Break.Chain>,
+      "NotFollowedBy"
+    >;
+  },
+  /**
+   * Do not break after the given class of characters.
+   */
+  NotAfter: (...regexes: RegExp[]) => {
+    const joined = Regex.join(...regexes);
+    const builder = () => Regex.build(`(?<![${joined}])`);
+    return Break.Chain(builder) as Omit<
+      ReturnType<typeof Break.Chain>,
+      "NotPrecededBy"
+    >;
+  },
+  Chain: (rootBuilder: () => RegExp) => ({
+    /**
+     * Build the root regex.
+     */
+    Build: rootBuilder,
+    /**
+     * Specify additional class of characters that should precede the root regex.
+     */
+    PreceededBy: (...regexes: RegExp[]) => {
+      const root = rootBuilder();
+      const preceeded = Break.After(...regexes).Build();
+      const builder = () => Regex.and(preceeded, root);
+      return Break.Chain(builder) as Omit<
+        ReturnType<typeof Break.Chain>,
+        "PreceededBy"
+      >;
+    },
+    /**
+     * Specify additional class of characters that should follow the root regex.
+     */
+    FollowedBy: (...regexes: RegExp[]) => {
+      const root = rootBuilder();
+      const followed = Break.Before(...regexes).Build();
+      const builder = () => Regex.and(root, followed);
+      return Break.Chain(builder) as Omit<
+        ReturnType<typeof Break.Chain>,
+        "FollowedBy"
+      >;
+    },
+    /**
+     * Specify additional class of characters that should not precede the root regex.
+     */
+    NotPrecededBy: (...regexes: RegExp[]) => {
+      const root = rootBuilder();
+      const notPreceeded = Break.NotAfter(...regexes).Build();
+      const builder = () => Regex.and(notPreceeded, root);
+      return Break.Chain(builder) as Omit<
+        ReturnType<typeof Break.Chain>,
+        "NotPrecededBy"
+      >;
+    },
+    /**
+     * Specify additional class of characters that should not follow the root regex.
+     */
+    NotFollowedBy: (...regexes: RegExp[]) => {
+      const root = rootBuilder();
+      const notFollowed = Break.NotBefore(...regexes).Build();
+      const builder = () => Regex.and(root, notFollowed);
+      return Break.Chain(builder) as Omit<
+        ReturnType<typeof Break.Chain>,
+        "NotFollowedBy"
+      >;
+    },
+  }),
+};
+
+/**
+ * Breaks the line into the tokens based on the found line break opporutnities.
+ */
+export const parseTokens = (line: string) => {
+  const breakLineRegex = getLineBreakRegex();
+
+  // normalizing to single-codepoint composed chars due to canonical equivalence
+  // of multi-codepoint versions for chars like č, で (~ so that we don't break a line in between c and ˇ)
+  // filtering due to multi-codepoint chars like 👨‍👩‍👧‍👦, 👩🏽‍🦰
+  return line.normalize("NFC").split(breakLineRegex).filter(Boolean);
+};
+
+/**
+ * Wraps the original text into the lines based on the given width.
+ */
+export const wrapText = (
+  text: string,
+  font: FontString,
+  maxWidth: number,
+): string => {
+  // if maxWidth is not finite or NaN which can happen in case of bugs in
+  // computation, we need to make sure we don't continue as we'll end up
+  // in an infinite loop
+  if (!Number.isFinite(maxWidth) || maxWidth < 0) {
+    return text;
+  }
+
+  const lines: Array<string> = [];
+  const originalLines = text.split("\n");
+
+  for (const originalLine of originalLines) {
+    const currentLineWidth = getLineWidth(originalLine, font, true);
+
+    if (currentLineWidth <= maxWidth) {
+      lines.push(originalLine);
+      continue;
+    }
+
+    const wrappedLine = wrapLine(originalLine, font, maxWidth);
+    lines.push(...wrappedLine);
+  }
+
+  return lines.join("\n");
+};
+
+/**
+ * Wraps the original line into the lines based on the given width.
+ */
+const wrapLine = (
+  line: string,
+  font: FontString,
+  maxWidth: number,
+): string[] => {
+  const lines: Array<string> = [];
+  const tokens = parseTokens(line);
+  const tokenIterator = tokens[Symbol.iterator]();
+
+  let currentLine = "";
+  let currentLineWidth = 0;
+
+  let iterator = tokenIterator.next();
+
+  while (!iterator.done) {
+    const token = iterator.value;
+    const testLine = currentLine + token;
+
+    // cache single codepoint whitespace, CJK or emoji width calc. as kerning should not apply here
+    const testLineWidth = isSingleCharacter(token)
+      ? currentLineWidth + charWidth.calculate(token, font)
+      : getLineWidth(testLine, font, true);
+
+    // build up the current line, skipping length check for possibly trailing whitespaces
+    if (/\s/.test(token) || testLineWidth <= maxWidth) {
+      currentLine = testLine;
+      currentLineWidth = testLineWidth;
+      iterator = tokenIterator.next();
+      continue;
+    }
+
+    // current line is empty => just the token (word) is longer than `maxWidth` and needs to be wrapped
+    if (!currentLine) {
+      const wrappedWord = wrapWord(token, font, maxWidth);
+      const trailingLine = wrappedWord[wrappedWord.length - 1] ?? "";
+      const precedingLines = wrappedWord.slice(0, -1);
+
+      lines.push(...precedingLines);
+
+      // trailing line of the wrapped word might still be joined with next token/s
+      currentLine = trailingLine;
+      currentLineWidth = getLineWidth(trailingLine, font, true);
+      iterator = tokenIterator.next();
+    } else {
+      // push & reset, but don't iterate on the next token, as we didn't use it yet!
+      lines.push(currentLine.trimEnd());
+
+      // purposefully not iterating and not setting `currentLine` to `token`, so that we could use a simple !currentLine check above
+      currentLine = "";
+      currentLineWidth = 0;
+    }
+  }
+
+  // iterator done, push the trailing line if exists
+  if (currentLine) {
+    const trailingLine = trimLine(currentLine, font, maxWidth);
+    lines.push(trailingLine);
+  }
+
+  return lines;
+};
+
+/**
+ * Wraps the word into the lines based on the given width.
+ */
+const wrapWord = (
+  word: string,
+  font: FontString,
+  maxWidth: number,
+): Array<string> => {
+  // multi-codepoint emojis are already broken apart and shouldn't be broken further
+  if (getEmojiRegex().test(word)) {
+    return [word];
+  }
+
+  satisfiesWordInvariant(word);
+
+  const lines: Array<string> = [];
+  const chars = Array.from(word);
+
+  let currentLine = "";
+  let currentLineWidth = 0;
+
+  for (const char of chars) {
+    const _charWidth = charWidth.calculate(char, font);
+    const testLineWidth = currentLineWidth + _charWidth;
+
+    if (testLineWidth <= maxWidth) {
+      currentLine = currentLine + char;
+      currentLineWidth = testLineWidth;
+      continue;
+    }
+
+    if (currentLine) {
+      lines.push(currentLine);
+    }
+
+    currentLine = char;
+    currentLineWidth = _charWidth;
+  }
+
+  if (currentLine) {
+    lines.push(currentLine);
+  }
+
+  return lines;
+};
+
+/**
+ * Similarly to browsers, does not trim all trailing whitespaces, but only those exceeding the `maxWidth`.
+ */
+const trimLine = (line: string, font: FontString, maxWidth: number) => {
+  const shouldTrimWhitespaces = getLineWidth(line, font, true) > maxWidth;
+
+  if (!shouldTrimWhitespaces) {
+    return line;
+  }
+
+  // defensively default to `trimeEnd` in case the regex does not match
+  let [, trimmedLine, whitespaces] = line.match(/^(.+?)(\s+)$/) ?? [
+    line,
+    line.trimEnd(),
+    "",
+  ];
+
+  let trimmedLineWidth = getLineWidth(trimmedLine, font, true);
+
+  for (const whitespace of Array.from(whitespaces)) {
+    const _charWidth = charWidth.calculate(whitespace, font);
+    const testLineWidth = trimmedLineWidth + _charWidth;
+
+    if (testLineWidth > maxWidth) {
+      break;
+    }
+
+    trimmedLine = trimmedLine + whitespace;
+    trimmedLineWidth = testLineWidth;
+  }
+
+  return trimmedLine;
+};
+
+/**
+ * Check if the given string is a single character.
+ *
+ * Handles multi-byte chars (é, 中) and purposefully does not handle multi-codepoint char (👨‍👩‍👧‍👦, 👩🏽‍🦰).
+ */
+const isSingleCharacter = (maybeSingleCharacter: string) => {
+  return (
+    maybeSingleCharacter.codePointAt(0) !== undefined &&
+    maybeSingleCharacter.codePointAt(1) === undefined
+  );
+};
+
+/**
+ * Invariant for the word wrapping algorithm.
+ */
+const satisfiesWordInvariant = (word: string) => {
+  if (import.meta.env.MODE === ENV.TEST || import.meta.env.DEV) {
+    if (/\s/.test(word)) {
+      throw new Error("Word should not contain any whitespaces!");
+    }
+  }
+};

+ 1 - 1
packages/excalidraw/element/textWysiwyg.tsx

@@ -27,13 +27,13 @@ import {
   getTextWidth,
   getTextWidth,
   normalizeText,
   normalizeText,
   redrawTextBoundingBox,
   redrawTextBoundingBox,
-  wrapText,
   getBoundTextMaxHeight,
   getBoundTextMaxHeight,
   getBoundTextMaxWidth,
   getBoundTextMaxWidth,
   computeContainerDimensionForBoundText,
   computeContainerDimensionForBoundText,
   computeBoundTextPosition,
   computeBoundTextPosition,
   getBoundTextElement,
   getBoundTextElement,
 } from "./textElement";
 } from "./textElement";
+import { wrapText } from "./textWrapping";
 import {
 import {
   actionDecreaseFontSize,
   actionDecreaseFontSize,
   actionIncreaseFontSize,
   actionIncreaseFontSize,

+ 2 - 5
packages/excalidraw/fonts/Fonts.ts

@@ -7,11 +7,8 @@ import {
   getFontFamilyFallbacks,
   getFontFamilyFallbacks,
 } from "../constants";
 } from "../constants";
 import { isTextElement } from "../element";
 import { isTextElement } from "../element";
-import {
-  charWidth,
-  containsCJK,
-  getContainerElement,
-} from "../element/textElement";
+import { charWidth, getContainerElement } from "../element/textElement";
+import { containsCJK } from "../element/textWrapping";
 import { ShapeCache } from "../scene/ShapeCache";
 import { ShapeCache } from "../scene/ShapeCache";
 import { getFontString, PromisePool, promiseTry } from "../utils";
 import { getFontString, PromisePool, promiseTry } from "../utils";
 import { ExcalidrawFontFace } from "./ExcalidrawFontFace";
 import { ExcalidrawFontFace } from "./ExcalidrawFontFace";

+ 1 - 1
packages/excalidraw/tests/linearElementEditor.test.tsx

@@ -20,7 +20,6 @@ import { LinearElementEditor } from "../element/linearElementEditor";
 import { act, queryByTestId, queryByText } from "@testing-library/react";
 import { act, queryByTestId, queryByText } from "@testing-library/react";
 import {
 import {
   getBoundTextElementPosition,
   getBoundTextElementPosition,
-  wrapText,
   getBoundTextMaxWidth,
   getBoundTextMaxWidth,
 } from "../element/textElement";
 } from "../element/textElement";
 import * as textElementUtils from "../element/textElement";
 import * as textElementUtils from "../element/textElement";
@@ -29,6 +28,7 @@ import { vi } from "vitest";
 import { arrayToMap } from "../utils";
 import { arrayToMap } from "../utils";
 import type { GlobalPoint } from "../../math";
 import type { GlobalPoint } from "../../math";
 import { pointCenter, pointFrom } from "../../math";
 import { pointCenter, pointFrom } from "../../math";
+import { wrapText } from "../element/textWrapping";
 
 
 const renderInteractiveScene = vi.spyOn(
 const renderInteractiveScene = vi.spyOn(
   InteractiveCanvas,
   InteractiveCanvas,