From f41651f2d49e9e51a8065bfc02d4025f2bf8e94e Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Thu, 18 Apr 2024 10:44:02 +0200 Subject: [PATCH] Clarify linear solvers and vector requirements (#140) * Clarify linear solvers and vector requirements * Fix parsing --- docs/src/faq.md | 29 +++------- src/implicit_function.jl | 116 +++++++++++++++++++++++++++------------ test/systematic.jl | 2 +- test/utils.jl | 20 +++---- 4 files changed, 97 insertions(+), 70 deletions(-) diff --git a/docs/src/faq.md b/docs/src/faq.md index c9c614d..046114d 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -18,22 +18,22 @@ You can override the default with the `conditions_x_backend` and `conditions_y_b ### Arrays -Functions that eat or spit out arbitrary arrays are supported, as long as the forward mapping _and_ conditions return arrays of the same size. +Functions that eat or spit out arbitrary vectors are supported, as long as the forward mapping _and_ conditions return vectors of the same size. -If you deal with small arrays (say, less than 100 elements), consider using [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl) for increased performance. +If you deal with small vectors (say, less than 100 elements), consider using [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl) for increased performance. ### Scalars Functions that eat or spit out a single number are not supported. -The forward mapping _and_ conditions need arrays: instead of returning `val` you should return `[val]` (a 1-element `Vector`). +The forward mapping _and_ conditions need vectors: instead of returning `val` you should return `[val]` (a 1-element `Vector`). Or better yet, wrap it in a static vector: `SVector(val)`. -### Sparse arrays +### Sparse vectors !!! danger "Danger" - Sparse arrays are not supported and might give incorrect values or `NaN`s! + Sparse vectors are not supported and might give incorrect values or `NaN`s! -With ForwardDiff.jl, differentiation of sparse arrays will often give wrong results due to [sparsity pattern cancellation](https://github.com/JuliaDiff/ForwardDiff.jl/issues/658). +With ForwardDiff.jl, differentiation of sparse vectors will often give wrong results due to [sparsity pattern cancellation](https://github.com/JuliaDiff/ForwardDiff.jl/issues/658). That is why we do not test behavior for sparse inputs. ## Number of inputs and outputs @@ -51,7 +51,7 @@ We now detail each of these options. ### Multiple inputs or outputs | Derivatives needed -Say your forward mapping takes multiple input arrays and returns multiple output arrays, such that you want derivatives for all of them. +Say your forward mapping takes multiple inputs and returns multiple outputs, such that you want derivatives for all of them. The trick is to leverage [ComponentArrays.jl](https://github.com/jonniedie/ComponentArrays.jl) to wrap all the inputs inside a single a `ComponentVector`, and do the same for all the outputs. See the examples for a demonstration. @@ -92,21 +92,6 @@ This is mainly useful when the solution procedure creates objects such as Jacobi In that case, you may want to write the conditions differentiation rules yourself. A more advanced application is given by [DifferentiableFrankWolfe.jl](https://github.com/gdalle/DifferentiableFrankWolfe.jl). -## Linear system - -### Lazy or dense - -Usually, dense Jacobians are more efficient in small dimension, while lazy operators become necessary in high dimension. -This choice is made via the `lazy` type parameter of [`ImplicitFunction`](@ref), with `lazy = true` being the default. - -### Picking a solver - -The right linear solver to use depends on the Jacobian representation. -You can usually stick to the default settings: - -- the direct solver `\` for dense Jacobians -- an iterative solver from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) for lazy operators - ## Modeling tips ### Writing conditions diff --git a/src/implicit_function.jl b/src/implicit_function.jl index 6202bdd..ab353b7 100644 --- a/src/implicit_function.jl +++ b/src/implicit_function.jl @@ -1,11 +1,31 @@ -struct DefaultLinearSolver end +""" + KrylovLinearSolver + +Callable object that can solve linear systems `As = b` and `AS = b` in the same way that `\`. +Uses an iterative solver from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) under the hood. + +# Note -function (::DefaultLinearSolver)(A, b::AbstractVector) +This name is not exported, and thus not part of the public API, but it is used in the [`ImplicitFunction`](@ref) constructors. +""" +struct KrylovLinearSolver end + +""" + (::KylovLinearSolver)(A, b::AbstractVector) + +Solve a linear system with a single right-hand side. +""" +function (::KrylovLinearSolver)(A, b::AbstractVector) x, stats = gmres(A, b) return x end -function (::DefaultLinearSolver)(A, B::AbstractMatrix) +""" + (::KrylovLinearSolver)(A, B::AbstractMatrix) + +Solve a linear system with multiple right-hand sides. +""" +function (::KrylovLinearSolver)(A, B::AbstractMatrix) # X, stats = block_gmres(A, B) # https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues/854 X = mapreduce(hcat, eachcol(B)) do b first(gmres(A, b)) @@ -25,44 +45,28 @@ When a derivative is queried, the Jacobian of `y` is computed using the implicit This requires solving a linear system `A * J = -B` where `A = ∂c/∂y`, `B = ∂c/∂x` and `J = ∂y/∂x`. +# Type parameters + +- `lazy::Bool`: whether to represent `A` and `B` with a `LinearOperator` from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl) (`lazy = true`) or a dense Jacobian matrix (`lazy = false`) + +Usually, dense Jacobians are more efficient in small dimension, while lazy operators become necessary in high dimension. +The value of `lazy` must be chosen together with the `linear_solver`, see below. + # Fields - `forward`: a callable computing `y(x)`, does not need to be compatible with automatic differentiation - `conditions`: a callable computing `c(x, y)`, must be compatible with automatic differentiation -- `linear_solver`: a callable to solve the linear system `A * J = -B` +- `linear_solver`: a callable to solve the linear system - `conditions_x_backend`: defines how the conditions will be differentiated with respect to the first argument `x` - `conditions_y_backend`: defines how the conditions will be differentiated with respect to the second argument `y` -# Type parameters - -- `lazy`: whether to use a `LinearOperator` from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl) (`lazy = true`) or a dense Jacobian matrix (`lazy = false`) for `A` and `B` - -# Constructors - - ImplicitFunction{lazy}( - forward, conditions; - linear_solver, conditions_x_backend, conditions_x_backend - ) - - ImplicitFunction( - forward, conditions; - linear_solver, conditions_x_backend, conditions_x_backend - ) - -Default values: - -- `lazy = true` -- `linear_solver`: the direct solver `\` for dense Jacobians, or an iterative solver from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) for lazy operators -- `conditions_x_backend = nothing` -- `conditions_y_backend = nothing` - # Function signatures There are two possible signatures for `forward` and `conditions`, which must be consistent with one another: | standard | byproduct | |:---|:---| -| `forward(x, args...; kwargs...) = y` | `conditions(x, y, args...; kwargs...) = c` | +| `forward(x, args...; kwargs...) = y` | `conditions(x, y, args...; kwargs...) = c` | | `forward(x, args...; kwargs...) = (y, z)` | `conditions(x, y, z, args...; kwargs...) = c` | In both cases, `x`, `y` and `c` must be `AbstractVector`s, with `length(y) = length(c)`. @@ -75,13 +79,16 @@ The byproduct `z` and the other positional arguments `args...` beyond `x` are co The provided `linear_solver` objects needs to be callable, with two methods: - `(A, b::AbstractVector) -> s::AbstractVector` such that `A * s = b` -- `(A, B::AbstractVector) -> S::AbstractMatrix` such that `A * S = B` +- `(A, B::AbstractVector) -> S::AbstractMatrix` such that `A * S = B` + +It can be either a direct solver (like `\`) or an iterative one (like [`KrylovLinearSolver`](@ref)). +Typically, direct solvers work best with dense Jacobians (`lazy = false`) while iterative solvers work best with operators (`lazy = true`). # Condition backends The provided `conditions_x_backend` and `conditions_y_backend` can be either: -- `nothing`, in which case the outer backend (the one differentiating through the `ImplicitFunction`) is used -- an object subtyping `AbstractADType` from [ADTypes.jl](https://github.com/SciML/ADTypes.jl). +- an object subtyping `AbstractADType` from [ADTypes.jl](https://github.com/SciML/ADTypes.jl); +- `nothing`, in which case the outer backend (the one differentiating through the `ImplicitFunction`) is used. """ struct ImplicitFunction{ lazy,F,C,L,B1<:Union{Nothing,AbstractADType},B2<:Union{Nothing,AbstractADType} @@ -93,10 +100,28 @@ struct ImplicitFunction{ conditions_y_backend::B2 end +""" + ImplicitFunction{lazy}( + forward, conditions; + linear_solver=if lazy + KrylovLinearSolver() + else + \ + end, + conditions_x_backend=nothing, + conditions_x_backend=nothing, + ) + +Constructor for an [`ImplicitFunction`](@ref) which picks the `linear_solver` automatically based on the `lazy` parameter. +""" function ImplicitFunction{lazy}( forward::F, conditions::C; - linear_solver::L=lazy ? DefaultLinearSolver() : \, + linear_solver::L=if lazy + KrylovLinearSolver() + else + \ + end, conditions_x_backend::B1=nothing, conditions_y_backend::B2=nothing, ) where {lazy,F,C,L,B1,B2} @@ -105,8 +130,29 @@ function ImplicitFunction{lazy}( ) end -function ImplicitFunction(forward, conditions; kwargs...) - return ImplicitFunction{true}(forward, conditions; kwargs...) +""" + ImplicitFunction( + forward, conditions; + linear_solver=KrylovLinearSolver(), + conditions_x_backend=nothing, + conditions_x_backend=nothing, + ) + +Constructor for an [`ImplicitFunction`](@ref) which picks the `lazy` parameter automatically based on the `linear_solver`, using the following heuristic: + + lazy = linear_solver != \ +""" +function ImplicitFunction( + forward, + conditions; + linear_solver=KrylovLinearSolver(), + conditions_x_backend=nothing, + conditions_y_backend=nothing, +) + lazy = linear_solver != \ + return ImplicitFunction{lazy}( + forward, conditions; linear_solver, conditions_x_backend, conditions_y_backend + ) end function Base.show(io::IO, implicit::ImplicitFunction{lazy}) where {lazy} @@ -119,7 +165,7 @@ function Base.show(io::IO, implicit::ImplicitFunction{lazy}) where {lazy} end """ - (implicit::ImplicitFunction)(x::AbstractArray, args...; kwargs...) + (implicit::ImplicitFunction)(x::AbstractVector, args...; kwargs...) Return `implicit.forward(x, args...; kwargs...)`, which can be either an `AbstractVector` `y` or a tuple `(y, z)`. diff --git a/test/systematic.jl b/test/systematic.jl index 5ad16a7..8056bd8 100644 --- a/test/systematic.jl +++ b/test/systematic.jl @@ -18,7 +18,7 @@ backends = [ linear_solver_candidates = ( \, # - ID.DefaultLinearSolver(), + ID.KrylovLinearSolver(), ) conditions_backend_candidates = ( diff --git a/test/utils.jl b/test/utils.jl index 4286624..373a503 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -31,35 +31,31 @@ end ## Various signatures -function make_implicit_sqrt(; linear_solver, kwargs...) - lazy = !(linear_solver isa typeof(\)) +function make_implicit_sqrt(; kwargs...) forward(x) = mysqrt(x) conditions(x, y) = abs2.(y) .- abs.(x) - implicit = ImplicitFunction{lazy}(forward, conditions; kwargs...) + implicit = ImplicitFunction(forward, conditions; kwargs...) return implicit end -function make_implicit_sqrt_byproduct(; linear_solver, kwargs...) - lazy = !(linear_solver isa typeof(\)) +function make_implicit_sqrt_byproduct(; kwargs...) forward(x) = one(eltype(x)) .* mysqrt(x), one(eltype(x)) conditions(x, y, z) = abs2.(y ./ z) .- abs.(x) - implicit = ImplicitFunction{lazy}(forward, conditions; linear_solver, kwargs...) + implicit = ImplicitFunction(forward, conditions; kwargs...) return implicit end -function make_implicit_sqrt_args(; linear_solver, kwargs...) - lazy = !(linear_solver isa typeof(\)) +function make_implicit_sqrt_args(; kwargs...) forward(x, p) = p .* mysqrt(x) conditions(x, y, p) = abs2.(y ./ p) .- abs.(x) - implicit = ImplicitFunction{lazy}(forward, conditions; linear_solver, kwargs...) + implicit = ImplicitFunction(forward, conditions; kwargs...) return implicit end -function make_implicit_sqrt_kwargs(; linear_solver, kwargs...) - lazy = !(linear_solver isa typeof(\)) +function make_implicit_sqrt_kwargs(; kwargs...) forward(x; p) = p .* mysqrt(x) conditions(x, y; p) = abs2.(y ./ p) .- abs.(x) - implicit = ImplicitFunction{lazy}(forward, conditions; linear_solver, kwargs...) + implicit = ImplicitFunction(forward, conditions; kwargs...) return implicit end