move to grad.jl, no other changes

FluxML · Jan 29, 2021 · c6c501b · c6c501b
1 parent be93f84
commit c6c501b
Show file tree

Hide file tree

Showing 2 changed files with 175 additions and 175 deletions.
diff --git a/src/lib/grad.jl b/src/lib/grad.jl
@@ -38,3 +38,178 @@ function Zygote._pullback(ctx::Zygote.AContext, ::typeof(checkpointed), f, xs...
     end
     return y, pullback_checkpointed
 end
+
+"""
+    hessian(f, x)
+
+Construct the Hessian `∂²f/∂x²`, where `x` is a real number or an array,
+and `f(x)` is a real number. When `x` is an array, the result is a matrix
+`H[i,j] = ∂²f/∂x[i]∂x[j]`, using linear indexing `x[i]` even if the argument
+is higher-dimensional.
+
+This uses forward over reverse, ForwardDiff over Zygote, calling `hessian_dual(f, x)`.
+See [`hessian_reverse`](@ref) for an all-Zygote alternative.
+
+# Examples
+
+```jldoctest; setup=:(using Zygote)
+julia> Zygote.hessian(x -> x[1]*x[2], randn(2))
+2×2 Array{Float64,2}:
+ 0.0  1.0
+ 1.0  0.0
+
+julia> Zygote.hessian(x -> sum(x.^3), [1 2; 3 4])  # uses linear indexing of x
+4×4 Array{$Int,2}:
+ 6   0   0   0
+ 0  18   0   0
+ 0   0  12   0
+ 0   0   0  24
+
+julia> Zygote.hessian(sin, pi/2)
+-1.0
+```
+"""
+hessian(f, x) = hessian_dual(f, x)
+
+hessian_dual(f, x::AbstractArray) = forward_jacobian(x -> gradient(f, x)[1], x)[2]
+
+hessian_dual(f, x::Number) = ForwardDiff.derivative(x -> gradient(f, x)[1], x)
+
+"""
+    hessian_reverse(f, x)
+
+This should be equivalent to [`hessian(f, x)`](@ref hessian),
+but implemented using reverse over reverse mode, all Zygote.
+(This is usually much slower, and more likely to find errors.)
+"""
+hessian_reverse(f, x::AbstractArray) = jacobian(x -> gradient(f, x)[1], x)[1]
+
+hessian_reverse(f, x::Number) = gradient(x -> gradient(f, x)[1], x)[1]
+
+
+"""
+    jacobian(f, args...) -> Tuple
+
+For each array `a ∈ args` this returns a matrix with `Ja[k,i] = ∂y[k]/∂a[i]`
+where `y = f(args...)` is usually a vector.
+Arrays of higher dimension are treated like `vec(a)`, or `vec(y)` for output.
+
+For scalar `x::Number ∈ args`, the result `Jx[k,1] = ∂y[k]/∂x` is a vector,
+while for scalar `y` all results have just one row.
+
+With any other argument type, no result is produced, even if [`gradient`](@ref) would work.
+
+This reverse-mode Jacobian needs to evaluate the pullback once for each element of `y`.
+Doing so is usually only efficient when `length(y)` is small compared to `length(a)`,
+otherwise forward mode is likely to be better.
+
+See also [`hessian`](@ref), [`hessian_reverse`](@ref).
+
+# Examples
+
+```jldoctest; setup=:(using Zygote)
+julia> jacobian(a -> 100*a[1:3].^2, 1:7)[1]  # first index (rows) is output
+3×7 Array{$Int,2}:
+ 200    0    0  0  0  0  0
+   0  400    0  0  0  0  0
+   0    0  600  0  0  0  0
+
+julia> jacobian((a,x) -> a.^2 .* x, [1,2,3], 1)  # scalar argument has vector jacobian
+([2 0 0; 0 4 0; 0 0 6], [1, 4, 9])
+
+julia> jacobian((a,d) -> prod(a, dims=d), [1 2; 3 4; 5 6], 2)
+([2 0 … 0 0; 0 4 … 3 0; 0 0 … 0 5], [0, 0, 0])
+```
+
+!!! warning
+    For arguments of any type except `Number` & `AbstractArray`, the result is `nothing`.
+
+```jldoctest; setup=:(using Zygote)
+julia> jacobian((a,s) -> a.^length(s), [1,2,3], "str")
+([3 0 0; 0 12 0; 0 0 27], nothing)
+
+julia> jacobian((a,t) -> sum(a .* t[1]) + t[2], [1,2,3], (4,5))
+([4 4 4], nothing)
+
+julia> gradient((a,t) -> sum(a .* t[1]) + t[2], [1,2,3], (4,5))  # gradient undersands the tuple
+([4, 4, 4], (6, 1))
+```
+"""
+function jacobian(f, args...)
+  y, back = pullback(_jvec∘f, args...)
+  out = map(args) do x
+    T = promote_type(eltype(x), eltype(y))
+    dx = x isa AbstractArray ? similar(x, T, length(y), length(x)) :
+      x isa Number ? similar(y, T, length(y)) :
+      nothing
+  end
+  delta = _eyelike(y)
+  for k in LinearIndices(y)
+    grads = back(delta[:,k])
+    for (dx, grad) in zip(out, grads)
+      dx isa AbstractArray || continue
+      _gradcopy!(view(dx,k,:), grad)
+    end
+  end
+  out
+end
+
+_jvec(x::AbstractArray) = vec(x)
+_jvec(x::Number) = _jvec(vcat(x))
+_jvec(x) = throw(ArgumentError("jacobian expected a function which returns an array, or a scalar, got $(typeof(x))"))
+_jvec(x::AbstractArray{<:Complex}) = throw(ArgumentError("jacobian does not accept complex output"))
+
+_eyelike(y::Vector) = Matrix{eltype(y)}(I, length(y), length(y))
+function _eyelike(y::AbstractVector) # version which works on GPU
+  out = fill!(similar(y, length(y), length(y)), 0)
+  out[LinearAlgebra.diagind(out)] .= 1
+  out
+end
+
+_gradcopy!(dst::AbstractArray, src::AbstractArray{<:Number}) = copyto!(dst, src)
+_gradcopy!(dst::AbstractArray, src::Number) = copyto!(dst, src)
+_gradcopy!(dst::AbstractArray, src::Nothing) = dst .= 0
+_gradcopy!(dst::AbstractArray, src::AbstractArray) = copyto!(dst, g isa Number ? g : 0 for g in src) # e.g. Union{Nothing,Float64}
+
+"""
+    jacobian(loss, ::Params)
+
+Like [`gradient`](@ref) with implicit parameters, this method takes a zero-argument function
+and returns an `IdDict`-like object, now containing the Jacobian for each parameter.
+
+# Examples
+```jldoctest; setup=:(using Zygote)
+julia> xs = [1 2; 3 4]; ys = [5,7,9];
+
+julia> Jxy = jacobian(() -> ys[1:2] .+ sum(xs.^2), Params([xs, ys]))
+Grads(...)
+
+julia> Jxy[ys]
+2×3 Array{$Int,2}:
+ 1  0  0
+ 0  1  0
+
+julia> Jxy[xs]
+2×4 Array{$Int,2}:
+ 2  6  4  8
+ 2  6  4  8
+```
+"""
+function jacobian(f, pars::Params)
+  y, back = pullback(_jvec∘f, pars)
+  out = IdDict()
+  for p in pars
+    T = Base.promote_type(eltype(p), eltype(y))
+    J = similar(y, T, length(y), length(p))
+    out[p] = J
+  end
+  delta = _eyelike(y)
+  for k in LinearIndices(y)
+    grads = back(delta[:,k])
+    for p in pars
+      out[p] isa AbstractArray || continue
+      _gradcopy!(view(out[p],k,:), grads[p])
+    end
+  end
+  Grads(out, pars)
+end
diff --git a/src/lib/utils.jl b/src/lib/utils.jl
@@ -98,178 +98,3 @@ macro showgrad(x)
       x̄
     end)
 end
-
-"""
-    hessian(f, x)
-
-Construct the Hessian `∂²f/∂x²`, where `x` is a real number or an array,
-and `f(x)` is a real number. When `x` is an array, the result is a matrix
-`H[i,j] = ∂²f/∂x[i]∂x[j]`, using linear indexing `x[i]` even if the argument
-is higher-dimensional.
-
-This uses forward over reverse, ForwardDiff over Zygote, calling `hessian_dual(f, x)`.
-See [`hessian_reverse`](@ref) for an all-Zygote alternative.
-
-# Examples
-
-```jldoctest; setup=:(using Zygote)
-julia> Zygote.hessian(x -> x[1]*x[2], randn(2))
-2×2 Array{Float64,2}:
- 0.0  1.0
- 1.0  0.0
-
-julia> Zygote.hessian(x -> sum(x.^3), [1 2; 3 4])  # uses linear indexing of x
-4×4 Array{$Int,2}:
- 6   0   0   0
- 0  18   0   0
- 0   0  12   0
- 0   0   0  24
-
-julia> Zygote.hessian(sin, pi/2)
--1.0
-```
-"""
-hessian(f, x) = hessian_dual(f, x)
-
-hessian_dual(f, x::AbstractArray) = forward_jacobian(x -> gradient(f, x)[1], x)[2]
-
-hessian_dual(f, x::Number) = ForwardDiff.derivative(x -> gradient(f, x)[1], x)
-
-"""
-    hessian_reverse(f, x)
-
-This should be equivalent to [`hessian(f, x)`](@ref hessian),
-but implemented using reverse over reverse mode, all Zygote.
-(This is usually much slower, and more likely to find errors.)
-"""
-hessian_reverse(f, x::AbstractArray) = jacobian(x -> gradient(f, x)[1], x)[1]
-
-hessian_reverse(f, x::Number) = gradient(x -> gradient(f, x)[1], x)[1]
-
-
-"""
-    jacobian(f, args...) -> Tuple
-
-For each array `a ∈ args` this returns a matrix with `Ja[k,i] = ∂y[k]/∂a[i]`
-where `y = f(args...)` is usually a vector.
-Arrays of higher dimension are treated like `vec(a)`, or `vec(y)` for output.
-
-For scalar `x::Number ∈ args`, the result `Jx[k,1] = ∂y[k]/∂x` is a vector,
-while for scalar `y` all results have just one row.
-
-With any other argument type, no result is produced, even if [`gradient`](@ref) would work.
-
-This reverse-mode Jacobian needs to evaluate the pullback once for each element of `y`.
-Doing so is usually only efficient when `length(y)` is small compared to `length(a)`,
-otherwise forward mode is likely to be better.
-
-See also [`hessian`](@ref), [`hessian_reverse`](@ref).
-
-# Examples
-
-```jldoctest; setup=:(using Zygote)
-julia> jacobian(a -> 100*a[1:3].^2, 1:7)[1]  # first index (rows) is output
-3×7 Array{$Int,2}:
- 200    0    0  0  0  0  0
-   0  400    0  0  0  0  0
-   0    0  600  0  0  0  0
-
-julia> jacobian((a,x) -> a.^2 .* x, [1,2,3], 1)  # scalar argument has vector jacobian
-([2 0 0; 0 4 0; 0 0 6], [1, 4, 9])
-
-julia> jacobian((a,d) -> prod(a, dims=d), [1 2; 3 4; 5 6], 2)
-([2 0 … 0 0; 0 4 … 3 0; 0 0 … 0 5], [0, 0, 0])
-```
-
-!!! warning
-    For arguments of any type except `Number` & `AbstractArray`, the result is `nothing`.
-
-```jldoctest; setup=:(using Zygote)
-julia> jacobian((a,s) -> a.^length(s), [1,2,3], "str")
-([3 0 0; 0 12 0; 0 0 27], nothing)
-
-julia> jacobian((a,t) -> sum(a .* t[1]) + t[2], [1,2,3], (4,5))
-([4 4 4], nothing)
-
-julia> gradient((a,t) -> sum(a .* t[1]) + t[2], [1,2,3], (4,5))  # gradient undersands the tuple
-([4, 4, 4], (6, 1))
-```
-"""
-function jacobian(f, args...)
-  y, back = pullback(_jvec∘f, args...)
-  out = map(args) do x
-    T = promote_type(eltype(x), eltype(y))
-    dx = x isa AbstractArray ? similar(x, T, length(y), length(x)) :
-      x isa Number ? similar(y, T, length(y)) :
-      nothing
-  end
-  delta = _eyelike(y)
-  for k in LinearIndices(y)
-    grads = back(delta[:,k])
-    for (dx, grad) in zip(out, grads)
-      dx isa AbstractArray || continue
-      _gradcopy!(view(dx,k,:), grad)
-    end
-  end
-  out
-end
-
-_jvec(x::AbstractArray) = vec(x)
-_jvec(x::Number) = _jvec(vcat(x))
-_jvec(x) = throw(ArgumentError("jacobian expected a function which returns an array, or a scalar, got $(typeof(x))"))
-_jvec(x::AbstractArray{<:Complex}) = throw(ArgumentError("jacobian does not accept complex output"))
-
-_eyelike(y::Vector) = Matrix{eltype(y)}(I, length(y), length(y))
-function _eyelike(y::AbstractVector) # version which works on GPU
-  out = fill!(similar(y, length(y), length(y)), 0)
-  out[LinearAlgebra.diagind(out)] .= 1
-  out
-end
-
-_gradcopy!(dst::AbstractArray, src::AbstractArray{<:Number}) = copyto!(dst, src)
-_gradcopy!(dst::AbstractArray, src::Number) = copyto!(dst, src)
-_gradcopy!(dst::AbstractArray, src::Nothing) = dst .= 0
-_gradcopy!(dst::AbstractArray, src::AbstractArray) = copyto!(dst, g isa Number ? g : 0 for g in src) # e.g. Union{Nothing,Float64}
-
-"""
-    jacobian(loss, ::Params)
-
-Like [`gradient`](@ref) with implicit parameters, this method takes a zero-argument function
-and returns an `IdDict`-like object, now containing the Jacobian for each parameter.
-
-# Examples
-```jldoctest; setup=:(using Zygote)
-julia> xs = [1 2; 3 4]; ys = [5,7,9];
-
-julia> Jxy = jacobian(() -> ys[1:2] .+ sum(xs.^2), Params([xs, ys]))
-Grads(...)
-
-julia> Jxy[ys]
-2×3 Array{$Int,2}:
- 1  0  0
- 0  1  0
-
-julia> Jxy[xs]
-2×4 Array{$Int,2}:
- 2  6  4  8
- 2  6  4  8
-```
-"""
-function jacobian(f, pars::Params)
-  y, back = pullback(_jvec∘f, pars)
-  out = IdDict()
-  for p in pars
-    T = Base.promote_type(eltype(p), eltype(y))
-    J = similar(y, T, length(y), length(p))
-    out[p] = J
-  end
-  delta = _eyelike(y)
-  for k in LinearIndices(y)
-    grads = back(delta[:,k])
-    for p in pars
-      out[p] isa AbstractArray || continue
-      _gradcopy!(view(out[p],k,:), grads[p])
-    end
-  end
-  Grads(out, pars)
-end