Skip to content

Commit

Permalink
add zscore functions. Close JuliaLang#75.
Browse files Browse the repository at this point in the history
  • Loading branch information
lindahua committed Jul 15, 2014
1 parent 08437dd commit fe549ee
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/StatsBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ module StatsBase
mode, # find a mode from data (the first one)
modes, # find all modes from data

zscore, # compute Z-scores
zscore!, # compute Z-scores inplace or to a pre-allocated array

percentile, # quantile using percentage (instead of fraction) as argument
nquantile, # quantiles at [0:n]/n

Expand Down
83 changes: 83 additions & 0 deletions src/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,89 @@ mad{T<:Real}(v::Range{T}) = mad!([v])
iqr{T<:Real}(v::AbstractArray{T}) = (q = quantile(v, [.25, .75]); q[2] - q[1])


#############################
#
# Z-scores
#
#############################

function _zscore!(Z::AbstractArray, X::AbstractArray, μ::Real, σ::Real)
# Z and X are assumed to have the same size
= inv(σ)
if μ == zero(μ)
for i = 1 : length(X)
@inbounds Z[i] = X[i] *
end
else
for i = 1 : length(X)
@inbounds Z[i] = (X[i] - μ) *
end
end
return Z
end

@ngenerate N typeof(Z) function _zscore!{S,T,N}(Z::AbstractArray{S,N}, X::AbstractArray{T,N}, μ::AbstractArray, σ::AbstractArray)
# Z and X are assumed to have the same size
# μ and σ are assumed to have the same size, that is compatible with size(X)
siz1 = size(X, 1)
@nextract N ud d->size(μ, d)
if size(μ, 1) == 1 && siz1 > 1
@nloops N i d->(d>1 ? (1:size(X,d)) : (1:1)) d->(j_d = ud_d ==1 ? 1 : i_d) begin
v = (@nref N μ j)
c = inv(@nref N σ j)
for i_1 = 1:siz1
(@nref N Z i) = ((@nref N X i) - v) * c
end
end
else
@nloops N i X d->(j_d = ud_d ==1 ? 1 : i_d) begin
(@nref N Z i) = ((@nref N X i) - (@nref N μ j)) / (@nref N σ j)
end
end
return Z
end

function _zscore_chksize(X::AbstractArray, μ::AbstractArray, σ::AbstractArray)
size(μ) == size(σ) || throw(DimensionMismatch("μ and σ should have the same size."))
for i=1:ndims(X)
dμ_i = size(μ,i)
(dμ_i == 1 || dμ_i == size(X,i)) || throw(DimensionMismatch("X and μ have incompatible sizes."))
end
end

function zscore!{ZT<:FloatingPoint,T<:Real}(Z::AbstractArray{ZT}, X::AbstractArray{T}, μ::Real, σ::Real)
size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size."))
_zscore!(Z, X, μ, σ)
end

function zscore!{ZT<:FloatingPoint,T<:Real,U<:Real,S<:Real}(Z::AbstractArray{ZT}, X::AbstractArray{T},
μ::AbstractArray{U}, σ::AbstractArray{S})
size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size."))
_zscore_chksize(X, μ, σ)
_zscore!(Z, X, μ, σ)
end

zscore!{T<:FloatingPoint}(X::AbstractArray{T}, μ::Real, σ::Real) = _zscore!(X, X, μ, σ)

zscore!{T<:FloatingPoint,U<:Real,S<:Real}(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S}) =
(_zscore_chksize(X, μ, σ); _zscore!(X, X, μ, σ))

function zscore{T<:Real}(X::AbstractArray{T}, μ::Real, σ::Real)
ZT = typeof((zero(T) - zero(μ)) / one(σ))
_zscore!(Array(ZT, size(X)), X, μ, σ)
end

function zscore{T<:Real,U<:Real,S<:Real}(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S})
_zscore_chksize(X, μ, σ)
ZT = typeof((zero(T) - zero(U)) / one(S))
_zscore!(Array(ZT, size(X)), X, μ, σ)
end

zscore{T<:Real}(X::AbstractArray{T}) = ((μ, σ) = mean_and_std(X); zscore(X, μ, σ))
zscore{T<:Real}(X::AbstractArray{T}, dim::Int) = ((μ, σ) = mean_and_std(X, dim); zscore(X, μ, σ))



#############################
#
# entropy and friends
Expand Down
23 changes: 23 additions & 0 deletions test/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,29 @@ using Base.Test
@test modes([1, 2, 3, 3, 2, 2, 1]) == [2]
@test sort(modes([1, 3, 2, 3, 3, 2, 2, 1])) == [2, 3]

## zscores

@test zscore([-3:3], 1.5, 0.5) == [-9.0:2.0:3.0]

a = [3 4 5 6; 7 8 1 2; 6 9 3 0]
z1 = [4. 6. 8. 10.; 5. 6. -1. 0.; 1.5 3.0 0.0 -1.5]
z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.]

@test_approx_eq zscore(a, [1, 2, 3], [0.5, 1.0, 2.0]) z1
@test_approx_eq zscore(a, [1 3 2 4], [0.25 0.5 1.0 2.0]) z2

@test zscore!(float64([-3:3]), 1.5, 0.5) == [-9.0:2.0:3.0]
@test_approx_eq zscore!(float64(a), [1, 2, 3], [0.5, 1.0, 2.0]) z1
@test_approx_eq zscore!(float64(a), [1 3 2 4], [0.25 0.5 1.0 2.0]) z2

@test zscore!(zeros(7), [-3:3], 1.5, 0.5) == [-9.0:2.0:3.0]
@test_approx_eq zscore!(zeros(size(a)), a, [1, 2, 3], [0.5, 1.0, 2.0]) z1
@test_approx_eq zscore!(zeros(size(a)), a, [1 3 2 4], [0.25 0.5 1.0 2.0]) z2

@test_approx_eq zscore(a) zscore(a, mean(a), std(a))
@test_approx_eq zscore(a, 1) zscore(a, mean(a,1), std(a,1))
@test_approx_eq zscore(a, 2) zscore(a, mean(a,2), std(a,2))


###### quantile & friends

Expand Down

0 comments on commit fe549ee

Please sign in to comment.