diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index eeb894ec4..95cb74d3f 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -324,6 +324,18 @@ export default class Tokenizer { this.sectionStart = this._index; } } + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + private isTagStartChar(c: string) { + return ( + isASCIIAlpha(c) || + (this.xmlMode && !whitespace(c) && c !== "/" && c !== ">") + ); + } private stateBeforeTagName(c: string) { if (c === "/") { this._state = State.BeforeClosingTagName; @@ -342,7 +354,7 @@ export default class Tokenizer { } else if (c === "?") { this._state = State.InProcessingInstruction; this.sectionStart = this._index + 1; - } else if (!isASCIIAlpha(c)) { + } else if (!this.isTagStartChar(c)) { this._state = State.Text; } else { this._state = @@ -378,7 +390,7 @@ export default class Tokenizer { this._state = State.Text; this._index--; } - } else if (!isASCIIAlpha(c)) { + } else if (!this.isTagStartChar(c)) { this._state = State.InSpecialComment; this.sectionStart = this._index; } else { diff --git a/src/__fixtures__/Events/20-xml_entities.json b/src/__fixtures__/Events/20-xml_entities copy.json similarity index 100% rename from src/__fixtures__/Events/20-xml_entities.json rename to src/__fixtures__/Events/20-xml_entities copy.json diff --git a/src/__fixtures__/Events/40-xml_tags.json b/src/__fixtures__/Events/40-xml_tags.json new file mode 100644 index 000000000..bfa58c2a9 --- /dev/null +++ b/src/__fixtures__/Events/40-xml_tags.json @@ -0,0 +1,33 @@ +{ + "name": "XML tags", + "options": { + "parser": { "xmlMode": true } + }, + "html": "<:foo><_bar>", + "expected": [ + { + "data": [":foo"], + "event": "opentagname" + }, + { + "data": [":foo", {}], + "event": "opentag" + }, + { + "data": ["_bar"], + "event": "opentagname" + }, + { + "data": ["_bar", {}], + "event": "opentag" + }, + { + "data": ["_bar"], + "event": "closetag" + }, + { + "data": [":foo"], + "event": "closetag" + } + ] +}