diff --git a/.eslintrc.js b/.eslintrc.js index 0f1335b586d..c6251ef722d 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -78,6 +78,11 @@ module.exports = { name: "matrix-react-sdk/", message: "Please use matrix-react-sdk/src/index instead", }, + { + name: "emojibase-regex", + message: + "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.", + }, ], patterns: [ { @@ -141,6 +146,11 @@ module.exports = { ], message: "Please use matrix-js-sdk/src/matrix instead", }, + { + group: ["emojibase-regex/emoji*"], + message: + "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.", + }, ], }, ], diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index d8c154440bd..919236281e0 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -20,7 +20,6 @@ limitations under the License. import React, { LegacyRef, ReactNode } from "react"; import sanitizeHtml from "sanitize-html"; import classNames from "classnames"; -import EMOJIBASE_REGEX from "emojibase-regex"; import katex from "katex"; import { decode } from "html-entities"; import { IContent } from "matrix-js-sdk/src/matrix"; @@ -46,10 +45,26 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/; const SYMBOL_PATTERN = /([\u2100-\u2bff])/; // Regex pattern for non-emoji characters that can appear in an "all-emoji" message -// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace) -const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g; +// (Zero-Width Space, other whitespace) +const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g; + +// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional +// emoji presentation VS (U+FE0F), but not those sequences that are followed by +// a text presentation VS (U+FE0E). We also count lone regional indicators +// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji +// followed by U+FE0E when the emoji doesn't have a text variant, but in +// practice this doesn't matter. +export let EMOJI_REGEX: RegExp +try { + // Per our support policy, v mode is available to us, but we still don't + // want the app to completely crash on older platforms. + EMOJI_REGEX = new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(? { expect(html).toMatchInlineSnapshot(`"test foo <b>bar"`); }); + it("generates big emoji for emoji made of multiple characters", () => { + const { asFragment } = render(bodyToHtml({ body: "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ โ†”๏ธ ๐Ÿ‡ฎ๐Ÿ‡ธ", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + it("should generate big emoji for an emoji-only reply to a message", () => { const { asFragment } = render( bodyToHtml( @@ -132,6 +138,12 @@ describe("bodyToHtml", () => { expect(asFragment()).toMatchSnapshot(); }); + it("does not mistake characters in text presentation mode for emoji", () => { + const { asFragment } = render(bodyToHtml({ body: "โ†” โ—๏ธŽ", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + describe("feature_latex_maths", () => { beforeEach(() => { jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths"); diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap index c33cc46433d..c69eaa7d952 100644 --- a/test/__snapshots__/HtmlUtils-test.tsx.snap +++ b/test/__snapshots__/HtmlUtils-test.tsx.snap @@ -1,5 +1,16 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP +exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = ` + + + โ†” โ—๏ธŽ + + +`; + exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"

hello

$\\xi$

world

"`; exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"

hello

world
"`; @@ -8,6 +19,36 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"

hel exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello ฮพ\\xi world"`; +exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = ` + + + + ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ + + + + โ†”๏ธ + + + + ๐Ÿ‡ฎ๐Ÿ‡ธ + + + +`; + exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `