Skip to content

Commit

Permalink
Fix class selector with colon (#464)
Browse files Browse the repository at this point in the history
This is related to the following:

#458
#411

I decided to push the "cleaning" to the lexer, but I think
for more complex escaping rules, we may need to push back to
Elixir.
  • Loading branch information
philss authored Jun 2, 2023
1 parent 7998d89 commit ac13707
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 6 deletions.
2 changes: 1 addition & 1 deletion lib/floki/selector/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ defmodule Floki.Selector.Parser do
end

defp do_parse([{:hash, _, id} | t], selector) do
do_parse(t, %{selector | id: to_string(id) |> String.replace("\\.", ".")})
do_parse(t, %{selector | id: to_string(id)})
end

defp do_parse([{:class, _, class} | t], selector) do
Expand Down
14 changes: 10 additions & 4 deletions src/floki_selector_lexer.xrl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Definitions.

IDENTIFIER = [-A-Za-z0-9_]+(\\\.[-A-Za-z0-9_]+)*
ESCAPED = \\[:.]
IDENTIFIER = [-A-Za-z0-9_]+(({ESCAPED})?[-A-Za-z0-9_]+)*
QUOTED = (\"[^"]*\"|\'[^']*\')
PARENTESIS = \([^)]*\)
INT = [0-9]+
Expand All @@ -18,9 +19,8 @@ Rules.
{QUOTED} : {token, {quoted, TokenLine, remove_wrapper(TokenChars)}}.
{ATTRIBUTE_IDENTIFIER} : {token, {attribute_identifier, TokenLine, TokenChars}}.
{SYMBOL} : {token, {TokenChars, TokenLine}}.
#{IDENTIFIER} : {token, {hash, TokenLine, tail(TokenChars)}}.
\.{IDENTIFIER} : {token, {class, TokenLine, tail(TokenChars)}}.
\.{IDENTIFIER}\\:{IDENTIFIER} : {token, {class, TokenLine, tail(TokenChars)}}.
#{IDENTIFIER} : {token, {hash, TokenLine, unescape_inside_id_name(tail(TokenChars))}}.
\.{IDENTIFIER} : {token, {class, TokenLine, unescape_inside_class_name(tail(TokenChars))}}.
\:{NOT}\( : {token, {pseudo_not, TokenLine}}.
\:{IDENTIFIER} : {token, {pseudo, TokenLine, tail(TokenChars)}}.
\({INT}\) : {token, {pseudo_class_int, TokenLine, list_to_integer(remove_wrapper(TokenChars))}}.
Expand Down Expand Up @@ -51,3 +51,9 @@ remove_wrapper(Chars) ->
tail([_|T]) ->
T.
unescape_inside_class_name(Chars) ->
lists:flatten(string:replace(Chars, "\\:", ":", all)).
unescape_inside_id_name(Chars) ->
lists:flatten(string:replace(Chars, "\\.", ".", all)).
8 changes: 7 additions & 1 deletion test/floki/selector/parser_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ defmodule Floki.Selector.ParserTest do
tokens = tokenize("a.xs\\:red-500")

assert Parser.parse(tokens) == [
%Selector{type: "a", classes: ["xs\\:red-500"], pseudo_classes: []}
%Selector{type: "a", classes: ["xs:red-500"], pseudo_classes: []}
]

tokens = tokenize("a.xs\\:red-500\\:big")

assert Parser.parse(tokens) == [
%Selector{type: "a", classes: ["xs:red-500:big"], pseudo_classes: []}
]
end

Expand Down
25 changes: 25 additions & 0 deletions test/floki_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,31 @@ defmodule FlokiTest do
assert Floki.find(document!(@html), class_selector) == []
end

test "find elements with colon in class names" do
html =
document!(
html_body("""
<div class="w-56 flex justify-end astro-SCKKX6R4"></div>
<div class="m-auto max-w-7xl px-4 pt-12 pb-20 flex flex-col xl:flex-row space-y-16
md:space-y-20 xl:space-y-0"></div>
<section class="flex flex-col xl:flex-row"></section>
""")
)

assert Floki.find(html, ".xl\\:flex-row.md\\:space-y-20") == [
{
"div",
[
{
"class",
"m-auto max-w-7xl px-4 pt-12 pb-20 flex flex-col xl:flex-row space-y-16\nmd:space-y-20 xl:space-y-0"
}
],
[]
}
]
end

# Floki.find/2 - Tag name

test "select elements by tag name" do
Expand Down

0 comments on commit ac13707

Please sign in to comment.