diff --git a/Project.toml b/Project.toml index f76100bdf..5ba3ac209 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,6 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/src/DataFrames.jl b/src/DataFrames.jl index debd309f5..c85f16a70 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -1,6 +1,6 @@ module DataFrames -using Statistics, Printf, REPL +using Statistics, Printf using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays @reexport using Missings, InvertedIndices using Base.Sort, Base.Order, Base.Iterators, Base.Threads diff --git a/src/other/index.jl b/src/other/index.jl index ae9358d38..61341b0c2 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -294,6 +294,26 @@ end @inline Base.getindex(x::AbstractIndex, rx::Regex) = getindex(x, filter(name -> occursin(rx, String(name)), _names(x))) +# Levenshtein Distance +# taken from https://github.com/JuliaLang/julia/blob/b5af119a6c608de43d6591a6c4129e9369239898/stdlib/REPL/src/docview.jl#L760-L776 +function _levenshtein(s1, s2) + a, b = collect(s1), collect(s2) + m = length(a) + n = length(b) + d = Matrix{Int}(undef, m+1, n+1) + + d[1:m+1, 1] = 0:m + d[1, 1:n+1] = 0:n + + for i = 1:m, j = 1:n + d[i+1,j+1] = min(d[i , j+1] + 1, + d[i+1, j ] + 1, + d[i , j ] + (a[i] != b[j])) + end + + return d[m+1, n+1] +end + # Fuzzy matching rules: # 1. ignore case # 2. maximum Levenshtein distance is 2 @@ -302,7 +322,7 @@ end # Returns candidates ordered by (distance, name) pair function fuzzymatch(l::Dict{Symbol, Int}, idx::Symbol) idxs = uppercase(string(idx)) - dist = [(REPL.levenshtein(uppercase(string(x)), idxs), x) for x in keys(l)] + dist = [(_levenshtein(uppercase(string(x)), idxs), x) for x in keys(l)] sort!(dist) c = [count(x -> x[1] <= i, dist) for i in 0:2] maxd = max(0, searchsortedlast(c, 8) - 1)