diff --git a/.eslintrc.js b/.eslintrc.js index 0f1335b586d..c6251ef722d 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -78,6 +78,11 @@ module.exports = { name: "matrix-react-sdk/", message: "Please use matrix-react-sdk/src/index instead", }, + { + name: "emojibase-regex", + message: + "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.", + }, ], patterns: [ { @@ -141,6 +146,11 @@ module.exports = { ], message: "Please use matrix-js-sdk/src/matrix instead", }, + { + group: ["emojibase-regex/emoji*"], + message: + "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.", + }, ], }, ], diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index d8c154440bd..888c30d76cc 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -20,7 +20,6 @@ limitations under the License. import React, { LegacyRef, ReactNode } from "react"; import sanitizeHtml from "sanitize-html"; import classNames from "classnames"; -import EMOJIBASE_REGEX from "emojibase-regex"; import katex from "katex"; import { decode } from "html-entities"; import { IContent } from "matrix-js-sdk/src/matrix"; @@ -46,10 +45,35 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/; const SYMBOL_PATTERN = /([\u2100-\u2bff])/; // Regex pattern for non-emoji characters that can appear in an "all-emoji" message -// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace) -const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g; +// (Zero-Width Space, other whitespace) +const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g; + +// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional +// emoji presentation VS (U+FE0F), but not those sequences that are followed by +// a text presentation VS (U+FE0E). We also count lone regional indicators +// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji +// followed by U+FE0E when the emoji doesn't have a text variant, but in +// practice this doesn't matter. +export const EMOJI_REGEX = (() => { + try { + // Per our support policy, v mode is available to us, but we still don't + // want the app to completely crash on older platforms. We use the + // constructor here to avoid a syntax error on such platforms. + return new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(? { + try { + return new RegExp(`^(${EMOJI_REGEX.source})+$`, "iv"); + } catch (_e) { + // Fall back, just like for EMOJI_REGEX + return /(?!)/; + } +})(); /* * Return true if the given string contains emoji @@ -266,7 +290,7 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea let key = 0; for (const data of graphemeSegmenter.segment(message)) { - if (EMOJIBASE_REGEX.test(data.segment)) { + if (EMOJI_REGEX.test(data.segment)) { if (text) { result.push(text); text = ""; diff --git a/src/components/views/rooms/SendMessageComposer.tsx b/src/components/views/rooms/SendMessageComposer.tsx index 0ea0bdf94c1..c5972ee86ac 100644 --- a/src/components/views/rooms/SendMessageComposer.tsx +++ b/src/components/views/rooms/SendMessageComposer.tsx @@ -15,7 +15,6 @@ limitations under the License. */ import React, { createRef, KeyboardEvent, SyntheticEvent } from "react"; -import EMOJI_REGEX from "emojibase-regex"; import { IContent, MatrixEvent, @@ -70,6 +69,7 @@ import { doMaybeLocalRoomAction } from "../../../utils/local-room"; import { Caret } from "../../../editor/caret"; import { IDiff } from "../../../editor/diff"; import { getBlobSafeMimeType } from "../../../utils/blobs"; +import { EMOJI_REGEX } from "../../../HtmlUtils"; /** * Build the mentions information based on the editor model (and any related events): diff --git a/src/editor/parts.ts b/src/editor/parts.ts index 3f482357d1e..2b732a6dd1f 100644 --- a/src/editor/parts.ts +++ b/src/editor/parts.ts @@ -15,11 +15,10 @@ See the License for the specific language governing permissions and limitations under the License. */ -import EMOJIBASE_REGEX from "emojibase-regex"; import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix"; import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete"; -import { unicodeToShortcode } from "../HtmlUtils"; +import { EMOJI_REGEX, unicodeToShortcode } from "../HtmlUtils"; import * as Avatar from "../Avatar"; import defaultDispatcher from "../dispatcher/dispatcher"; import { Action } from "../dispatcher/actions"; @@ -197,7 +196,7 @@ abstract class BasePart { abstract class PlainBasePart extends BasePart { protected acceptsInsertion(chr: string, offset: number, inputType: string): boolean { - if (chr === "\n" || EMOJIBASE_REGEX.test(chr)) { + if (chr === "\n" || EMOJI_REGEX.test(chr)) { return false; } // when not pasting or dropping text, reject characters that should start a pill candidate @@ -375,7 +374,7 @@ class NewlinePart extends BasePart implements IBasePart { export class EmojiPart extends BasePart implements IBasePart { protected acceptsInsertion(chr: string, offset: number): boolean { - return EMOJIBASE_REGEX.test(chr); + return EMOJI_REGEX.test(chr); } protected acceptsRemoval(position: number, chr: string): boolean { @@ -573,7 +572,7 @@ export class PartCreator { case "\n": return new NewlinePart(); default: - if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) { + if (EMOJI_REGEX.test(getFirstGrapheme(input))) { return new EmojiPart(); } return new PlainPart(); @@ -650,7 +649,7 @@ export class PartCreator { let plainText = ""; for (const data of graphemeSegmenter.segment(text)) { - if (EMOJIBASE_REGEX.test(data.segment)) { + if (EMOJI_REGEX.test(data.segment)) { if (plainText) { parts.push(this.plain(plainText)); plainText = ""; diff --git a/test/HtmlUtils-test.tsx b/test/HtmlUtils-test.tsx index d9e75faaa99..ae12a717806 100644 --- a/test/HtmlUtils-test.tsx +++ b/test/HtmlUtils-test.tsx @@ -107,6 +107,12 @@ describe("bodyToHtml", () => { expect(html).toMatchInlineSnapshot(`"test foo <b>bar"`); }); + it("generates big emoji for emoji made of multiple characters", () => { + const { asFragment } = render(bodyToHtml({ body: "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ โ†”๏ธ ๐Ÿ‡ฎ๐Ÿ‡ธ", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + it("should generate big emoji for an emoji-only reply to a message", () => { const { asFragment } = render( bodyToHtml( @@ -132,6 +138,12 @@ describe("bodyToHtml", () => { expect(asFragment()).toMatchSnapshot(); }); + it("does not mistake characters in text presentation mode for emoji", () => { + const { asFragment } = render(bodyToHtml({ body: "โ†” โ—๏ธŽ", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + describe("feature_latex_maths", () => { beforeEach(() => { jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths"); diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap index c33cc46433d..c69eaa7d952 100644 --- a/test/__snapshots__/HtmlUtils-test.tsx.snap +++ b/test/__snapshots__/HtmlUtils-test.tsx.snap @@ -1,5 +1,16 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP +exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = ` + + + โ†” โ—๏ธŽ + + +`; + exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"

hello

$\\xi$

world

"`; exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"

hello

world
"`; @@ -8,6 +19,36 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"

hel exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello ฮพ\\xi world"`; +exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = ` + + + + ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ + + + + โ†”๏ธ + + + + ๐Ÿ‡ฎ๐Ÿ‡ธ + + + +`; + exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `