Skip to content

Commit

Permalink
tokenizer: fix parse bug when tag name is not ASCII alpha (#497)
Browse files Browse the repository at this point in the history
Co-authored-by: zhuyujie <zhuyujie@zhuyujiedeMacBook-Pro.local>
  • Loading branch information
Zuckjet and zhuyujie committed Aug 13, 2020
1 parent 9b7b63f commit bc010de
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
21 changes: 21 additions & 0 deletions src/Tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const enum State {
//comments
BeforeComment,
InComment,
InSpecialComment,
AfterComment1,
AfterComment2,

Expand Down Expand Up @@ -99,6 +100,10 @@ function whitespace(c: string): boolean {
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
}

function isASCIIAlpha(c: string): boolean {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

interface Callbacks {
onattribdata(value: string): void; //TODO implement the new event
onattribend(): void;
Expand Down Expand Up @@ -305,6 +310,8 @@ export default class Tokenizer {
} else if (c === "?") {
this._state = State.InProcessingInstruction;
this._sectionStart = this._index + 1;
} else if (!isASCIIAlpha(c)) {
this._state = State.Text;
} else {
this._state =
!this._xmlMode && (c === "s" || c === "S")
Expand Down Expand Up @@ -336,6 +343,9 @@ export default class Tokenizer {
this._state = State.Text;
this._index--;
}
} else if (!isASCIIAlpha(c)) {
this._state = State.InSpecialComment;
this._sectionStart = this._index;
} else {
this._state = State.InClosingTagName;
this._sectionStart = this._index;
Expand Down Expand Up @@ -481,6 +491,15 @@ export default class Tokenizer {
_stateInComment(c: string) {
if (c === "-") this._state = State.AfterComment1;
}
_stateInSpecialComment(c: string) {
if (c === ">") {
this._cbs.oncomment(
this._buffer.substring(this._sectionStart, this._index)
);
this._state = State.Text;
this._sectionStart = this._index + 1;
}
}
_stateAfterComment1(c: string) {
if (c === "-") {
this._state = State.AfterComment2;
Expand Down Expand Up @@ -718,6 +737,8 @@ export default class Tokenizer {
this._stateInAttributeName(c);
} else if (this._state === State.InComment) {
this._stateInComment(c);
} else if (this._state === State.InSpecialComment) {
this._stateInSpecialComment(c);
} else if (this._state === State.BeforeAttributeName) {
this._stateBeforeAttributeName(c);
} else if (this._state === State.InTagName) {
Expand Down
12 changes: 12 additions & 0 deletions src/__fixtures__/Events/34-not-alpha-tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"name": "tag names are not ASCII alpha",
"options": {
"parser": {}
},
"html": "<12>text</12>",
"expected": [
{ "event": "text", "data": ["<12>text"] },
{ "event": "comment", "data": ["12"] },
{ "event": "commentend", "data": [] }
]
}

0 comments on commit bc010de

Please sign in to comment.