Skip to content

Commit

Permalink
git push origin masterMerge branch 'ChenNingCong-fixselector'
Browse files Browse the repository at this point in the history
  • Loading branch information
aviks committed Mar 18, 2020
2 parents 12f3a92 + d2af1ea commit 81cdb2e
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 24 deletions.
16 changes: 16 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name = "Cascadia"
uuid = "54eefc05-d75b-58de-a785-1a3403f0919f"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"

[compat]
julia = "≥ 0.7.0"

[extras]
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["JSON", "Test"]
21 changes: 15 additions & 6 deletions src/selector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@ end
#// found, or false if no match is found.
function hasDescendantMatch(n::HTMLNode, a::Selector)
for c in Gumbo.children(n)
if typeof(c) == HTMLText; return a(c); end
if typeof(c) == HTMLText
if a(c)
return true
else
continue
end
end
for cc in PostOrderDFS(c)
if a(cc); return true; end
end
Expand Down Expand Up @@ -139,6 +145,7 @@ function typeSelector(tg) #->Selector
tg=lowercase(tg)
return Selector() do n::HTMLNode
if !(typeof(n) <: HTMLElement); return false; end
#@info tg n string(tag(n))
lowercase(string(tag(n))) == tg
end
end
Expand Down Expand Up @@ -244,7 +251,7 @@ end
#// the attribute named key matches the regular expression rx
function attributeRegexSelector(key::AbstractString, rx::Regex) #->Selector
return attributeSelector(key) do s
return occursin(rx, key)
return occursin(rx, s)
end
end

Expand Down Expand Up @@ -384,14 +391,15 @@ function nthChildSelector(a::Int, b::Int, last::Bool, ofType::Bool) #->Selector
if parent == NullNode; return false; end
i=-1
count=0
temp = nothing
for c in Gumbo.children(parent)
if typeof(c) != HTMLElement || (ofType && tag(c) != tag(n))
if !(typeof(c) <: HTMLElement) || (ofType && tag(c) != tag(n))
continue
end
count += 1
if c == n
i=count
if c !== children(parent)[end]
if !last
break
end
end
Expand All @@ -400,7 +408,7 @@ function nthChildSelector(a::Int, b::Int, last::Bool, ofType::Bool) #->Selector
# This shouldn't happen, since n should always be one of its parent's children.
return false
end
if c===children(parent)[end]
if last
i = count -i + 1
end

Expand All @@ -419,12 +427,13 @@ end
#// If ofType is true, it implements :only-of-type instead.
function onlyChildSelector(ofType::Bool) #-> Selector
return Selector() do n::HTMLNode
#@info n
if !(typeof(n) <: HTMLElement); return false; end
parent = n.parent
if parent == nothing || parent == NullNode; return false; end
count = 0
for c in Gumbo.children(parent)
if typeof(c) != HTMLElement || (oftype && tag(c) != tag(n))
if !(typeof(c) <: HTMLElement) || (ofType && tag(c) != tag(n))
continue
end
count += 1
Expand Down
30 changes: 17 additions & 13 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,18 @@ using Test
using JSON
using Gumbo

# write your own tests here
@test 1 == 1
function checkstring(elem::HTMLElement{T}) where {T}
opentag = "<$T"
for (name,value) in sort(collect(elem.attributes), by=x->x.first)
opentag *= " $name=\"$value\""
end
opentag *= ">"
end

#A function to simplify test artifact creation
P(x) = Cascadia.Parser(x)

@testset "Basic Tests" begin
@test Cascadia.parseName(P("abc")) == "abc"
@test Cascadia.parseName(P("x")) == "x"
@test Cascadia.parseIdentifier(P("abc")) == "abc"
Expand All @@ -29,26 +35,24 @@ P(x) = Cascadia.Parser(x)
@test Cascadia.parseString(P("'x\\\r\nx'")) == "xx"
@test Cascadia.parseString(P("\"a\\\"b\"")) == "a\"b"

Cascadia.parseInteger(P("90:")) == 90
@test Cascadia.parseInteger(P("90:")) == 90

end

###Selector tests. Load data from file.

selectorTests=JSON.parsefile(joinpath(dirname(@__FILE__), "selectorTests.json"))

cnt = 0
for (i, d) in enumerate(selectorTests)
@testset "Selector Test $(d["Selector"])" for d in selectorTests
c = Selector(d["Selector"])
@test typeof(c) == Selector
n=parsehtml(d["HTML"])
r=eachmatch(c, n.root)
l=length(r)
e = length(d["Results"])
if l != e
global cnt += 1
println("Test Failure (known) for $(d["Selector"]) Expected $e, got $l")
else
println("Test Success for $(d["Selector"])")
e = d["Results"]
@test length(r) == length(e)

for i in 1:length(r)
@test lowercase(checkstring(r[i])) == e[i]
end
end
@test cnt <= 19
println("Total test failures: $cnt / $(length(selectorTests))")
10 changes: 5 additions & 5 deletions test/selectorTests.json
Original file line number Diff line number Diff line change
Expand Up @@ -437,23 +437,23 @@
"HTML": "<ul>\n\t\t\t<li><a id=\"a1\" href=\"http://www.google.com/finance\"/>\n\t\t\t<li><a id=\"a2\" href=\"http://finance.yahoo.com/\"/>\n\t\t\t<li><a id=\"a2\" href=\"http://finance.untrusted.com/\"/>\n\t\t\t<li><a id=\"a3\" href=\"https://www.google.com/news\"/>\n\t\t\t<li><a id=\"a4\" href=\"http://news.yahoo.com\"/>\n\t\t</ul>",
"Selector": "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])",
"Results": [
"<a id=\"a1\" href=\"http://www.google.com/finance\">",
"<a id=\"a2\" href=\"http://finance.yahoo.com/\">"
"<a href=\"http://www.google.com/finance\" id=\"a1\">",
"<a href=\"http://finance.yahoo.com/\" id=\"a2\">"
]
},
{
"HTML": "<ul>\n\t\t\t<li><a id=\"a1\" href=\"http://www.google.com/finance\"/>\n\t\t\t<li><a id=\"a2\" href=\"http://finance.yahoo.com/\"/>\n\t\t\t<li><a id=\"a3\" href=\"https://www.google.com/news\"/>\n\t\t\t<li><a id=\"a4\" href=\"http://news.yahoo.com\"/>\n\t\t</ul>",
"Selector": "[href#=(^https:\\/\\/[^\\/]*\\/?news)]",
"Results": [
"<a id=\"a3\" href=\"https://www.google.com/news\">"
"<a href=\"https://www.google.com/news\" id=\"a3\">"
]
},
{
"HTML": "<form>\n\t\t\t<label>Username <input type=\"text\" name=\"username\" /></label>\n\t\t\t<label>Password <input type=\"password\" name=\"password\" /></label>\n\t\t\t<label>Country\n\t\t\t\t<select name=\"country\">\n\t\t\t\t\t<option value=\"ca\">Canada</option>\n\t\t\t\t\t<option value=\"us\">United States</option>\n\t\t\t\t</select>\n\t\t\t</label>\n\t\t\t<label>Bio <textarea name=\"bio\"></textarea></label>\n\t\t\t<button>Sign up</button>\n\t\t</form>",
"Selector": ":input",
"Results": [
"<input type=\"text\" name=\"username\">",
"<input type=\"password\" name=\"password\">",
"<input name=\"username\" type=\"text\">",
"<input name=\"password\" type=\"password\">",
"<select name=\"country\">",
"<textarea name=\"bio\">",
"<button>"
Expand Down

0 comments on commit 81cdb2e

Please sign in to comment.