From f41651f2d49e9e51a8065bfc02d4025f2bf8e94e Mon Sep 17 00:00:00 2001
From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com>
Date: Thu, 18 Apr 2024 10:44:02 +0200
Subject: [PATCH] Clarify linear solvers and vector requirements (#140)

* Clarify linear solvers and vector requirements

* Fix parsing
---
 docs/src/faq.md          |  29 +++-------
 src/implicit_function.jl | 116 +++++++++++++++++++++++++++------------
 test/systematic.jl       |   2 +-
 test/utils.jl            |  20 +++----
 4 files changed, 97 insertions(+), 70 deletions(-)

diff --git a/docs/src/faq.md b/docs/src/faq.md
index c9c614d..046114d 100644
--- a/docs/src/faq.md
+++ b/docs/src/faq.md
@@ -18,22 +18,22 @@ You can override the default with the `conditions_x_backend` and `conditions_y_b
 
 ### Arrays
 
-Functions that eat or spit out arbitrary arrays are supported, as long as the forward mapping _and_ conditions return arrays of the same size.
+Functions that eat or spit out arbitrary vectors are supported, as long as the forward mapping _and_ conditions return vectors of the same size.
 
-If you deal with small arrays (say, less than 100 elements), consider using [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl) for increased performance.
+If you deal with small vectors (say, less than 100 elements), consider using [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl) for increased performance.
 
 ### Scalars
 
 Functions that eat or spit out a single number are not supported.
-The forward mapping _and_ conditions need arrays: instead of returning `val` you should return `[val]` (a 1-element `Vector`).
+The forward mapping _and_ conditions need vectors: instead of returning `val` you should return `[val]` (a 1-element `Vector`).
 Or better yet, wrap it in a static vector: `SVector(val)`.
 
-### Sparse arrays
+### Sparse vectors
 
 !!! danger "Danger"
-    Sparse arrays are not supported and might give incorrect values or `NaN`s!
+    Sparse vectors are not supported and might give incorrect values or `NaN`s!
 
-With ForwardDiff.jl, differentiation of sparse arrays will often give wrong results due to [sparsity pattern cancellation](https://github.com/JuliaDiff/ForwardDiff.jl/issues/658).
+With ForwardDiff.jl, differentiation of sparse vectors will often give wrong results due to [sparsity pattern cancellation](https://github.com/JuliaDiff/ForwardDiff.jl/issues/658).
 That is why we do not test behavior for sparse inputs.
 
 ## Number of inputs and outputs
@@ -51,7 +51,7 @@ We now detail each of these options.
 
 ### Multiple inputs or outputs | Derivatives needed
 
-Say your forward mapping takes multiple input arrays and returns multiple output arrays, such that you want derivatives for all of them.
+Say your forward mapping takes multiple inputs and returns multiple outputs, such that you want derivatives for all of them.
 
 The trick is to leverage [ComponentArrays.jl](https://github.com/jonniedie/ComponentArrays.jl) to wrap all the inputs inside a single a `ComponentVector`, and do the same for all the outputs.
 See the examples for a demonstration.
@@ -92,21 +92,6 @@ This is mainly useful when the solution procedure creates objects such as Jacobi
 In that case, you may want to write the conditions differentiation rules yourself.
 A more advanced application is given by [DifferentiableFrankWolfe.jl](https://github.com/gdalle/DifferentiableFrankWolfe.jl).
 
-## Linear system
-
-### Lazy or dense
-
-Usually, dense Jacobians are more efficient in small dimension, while lazy operators become necessary in high dimension.
-This choice is made via the `lazy` type parameter of [`ImplicitFunction`](@ref), with `lazy = true` being the default.
-
-### Picking a solver
-
-The right linear solver to use depends on the Jacobian representation.
-You can usually stick to the default settings:
-
-- the direct solver `\` for dense Jacobians
-- an iterative solver from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) for lazy operators
-
 ## Modeling tips
 
 ### Writing conditions
diff --git a/src/implicit_function.jl b/src/implicit_function.jl
index 6202bdd..ab353b7 100644
--- a/src/implicit_function.jl
+++ b/src/implicit_function.jl
@@ -1,11 +1,31 @@
-struct DefaultLinearSolver end
+"""
+    KrylovLinearSolver
+
+Callable object that can solve linear systems `As = b` and `AS = b` in the same way that `\`.
+Uses an iterative solver from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) under the hood.
+
+# Note
 
-function (::DefaultLinearSolver)(A, b::AbstractVector)
+This name is not exported, and thus not part of the public API, but it is used in the [`ImplicitFunction`](@ref) constructors.
+"""
+struct KrylovLinearSolver end
+
+"""
+    (::KylovLinearSolver)(A, b::AbstractVector)
+
+Solve a linear system with a single right-hand side.
+"""
+function (::KrylovLinearSolver)(A, b::AbstractVector)
     x, stats = gmres(A, b)
     return x
 end
 
-function (::DefaultLinearSolver)(A, B::AbstractMatrix)
+"""
+    (::KrylovLinearSolver)(A, B::AbstractMatrix)
+
+Solve a linear system with multiple right-hand sides.
+"""
+function (::KrylovLinearSolver)(A, B::AbstractMatrix)
     # X, stats = block_gmres(A, B)  # https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues/854
     X = mapreduce(hcat, eachcol(B)) do b
         first(gmres(A, b))
@@ -25,44 +45,28 @@ When a derivative is queried, the Jacobian of `y` is computed using the implicit
 
 This requires solving a linear system `A * J = -B` where `A = ∂c/∂y`, `B = ∂c/∂x` and `J = ∂y/∂x`.
 
+# Type parameters
+
+- `lazy::Bool`: whether to represent `A` and `B` with a `LinearOperator` from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl) (`lazy = true`) or a dense Jacobian matrix (`lazy = false`)
+
+Usually, dense Jacobians are more efficient in small dimension, while lazy operators become necessary in high dimension.
+The value of `lazy` must be chosen together with the `linear_solver`, see below.
+
 # Fields
 
 - `forward`: a callable computing `y(x)`, does not need to be compatible with automatic differentiation
 - `conditions`: a callable computing `c(x, y)`, must be compatible with automatic differentiation
-- `linear_solver`: a callable to solve the linear system `A * J = -B`
+- `linear_solver`: a callable to solve the linear system
 - `conditions_x_backend`: defines how the conditions will be differentiated with respect to the first argument `x` 
 - `conditions_y_backend`: defines how the conditions will be differentiated with respect to the second argument `y` 
 
-# Type parameters
-
-- `lazy`: whether to use a `LinearOperator` from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl) (`lazy = true`) or a dense Jacobian matrix (`lazy = false`) for `A` and `B`
-
-# Constructors
-
-    ImplicitFunction{lazy}(
-        forward, conditions;
-        linear_solver, conditions_x_backend, conditions_x_backend
-    )
-
-    ImplicitFunction(
-        forward, conditions;
-        linear_solver, conditions_x_backend, conditions_x_backend
-    )
-
-Default values:
-
-- `lazy = true`
-- `linear_solver`: the direct solver `\` for dense Jacobians, or an iterative solver from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) for lazy operators
-- `conditions_x_backend = nothing`
-- `conditions_y_backend = nothing`
-
 # Function signatures
 
 There are two possible signatures for `forward` and `conditions`, which must be consistent with one another:
     
 | standard | byproduct |
 |:---|:---|
-| `forward(x, args...; kwargs...) = y`      | `conditions(x, y, args...; kwargs...) = c` |
+| `forward(x, args...; kwargs...) = y`      | `conditions(x, y, args...; kwargs...) = c`    |
 | `forward(x, args...; kwargs...) = (y, z)` | `conditions(x, y, z, args...; kwargs...) = c` |
     
 In both cases, `x`, `y` and `c` must be `AbstractVector`s, with `length(y) = length(c)`.
@@ -75,13 +79,16 @@ The byproduct `z` and the other positional arguments `args...` beyond `x` are co
 
 The provided `linear_solver` objects needs to be callable, with two methods:
 - `(A, b::AbstractVector) -> s::AbstractVector` such that `A * s = b`
-- `(A, B::AbstractVector) -> S::AbstractMatrix` such that `A * S = B` 
+- `(A, B::AbstractVector) -> S::AbstractMatrix` such that `A * S = B`
+
+It can be either a direct solver (like `\`) or an iterative one (like [`KrylovLinearSolver`](@ref)).
+Typically, direct solvers work best with dense Jacobians (`lazy = false`) while iterative solvers work best with operators (`lazy = true`).
 
 # Condition backends
 
 The provided `conditions_x_backend` and `conditions_y_backend` can be either:
-- `nothing`, in which case the outer backend (the one differentiating through the `ImplicitFunction`) is used
-- an object subtyping `AbstractADType` from [ADTypes.jl](https://github.com/SciML/ADTypes.jl).
+- an object subtyping `AbstractADType` from [ADTypes.jl](https://github.com/SciML/ADTypes.jl);
+- `nothing`, in which case the outer backend (the one differentiating through the `ImplicitFunction`) is used.
 """
 struct ImplicitFunction{
     lazy,F,C,L,B1<:Union{Nothing,AbstractADType},B2<:Union{Nothing,AbstractADType}
@@ -93,10 +100,28 @@ struct ImplicitFunction{
     conditions_y_backend::B2
 end
 
+"""
+    ImplicitFunction{lazy}(
+        forward, conditions;
+        linear_solver=if lazy
+            KrylovLinearSolver()
+        else
+            \
+        end,
+        conditions_x_backend=nothing,
+        conditions_x_backend=nothing,
+    )
+
+Constructor for an [`ImplicitFunction`](@ref) which picks the `linear_solver` automatically based on the `lazy` parameter.
+"""
 function ImplicitFunction{lazy}(
     forward::F,
     conditions::C;
-    linear_solver::L=lazy ? DefaultLinearSolver() : \,
+    linear_solver::L=if lazy
+        KrylovLinearSolver()
+    else
+        \
+    end,
     conditions_x_backend::B1=nothing,
     conditions_y_backend::B2=nothing,
 ) where {lazy,F,C,L,B1,B2}
@@ -105,8 +130,29 @@ function ImplicitFunction{lazy}(
     )
 end
 
-function ImplicitFunction(forward, conditions; kwargs...)
-    return ImplicitFunction{true}(forward, conditions; kwargs...)
+"""
+    ImplicitFunction(
+        forward, conditions;
+        linear_solver=KrylovLinearSolver(),
+        conditions_x_backend=nothing,
+        conditions_x_backend=nothing,
+    )
+
+Constructor for an [`ImplicitFunction`](@ref) which picks the `lazy` parameter automatically based on the `linear_solver`, using the following heuristic:
+
+    lazy = linear_solver != \
+"""
+function ImplicitFunction(
+    forward,
+    conditions;
+    linear_solver=KrylovLinearSolver(),
+    conditions_x_backend=nothing,
+    conditions_y_backend=nothing,
+)
+    lazy = linear_solver != \
+    return ImplicitFunction{lazy}(
+        forward, conditions; linear_solver, conditions_x_backend, conditions_y_backend
+    )
 end
 
 function Base.show(io::IO, implicit::ImplicitFunction{lazy}) where {lazy}
@@ -119,7 +165,7 @@ function Base.show(io::IO, implicit::ImplicitFunction{lazy}) where {lazy}
 end
 
 """
-    (implicit::ImplicitFunction)(x::AbstractArray, args...; kwargs...)
+    (implicit::ImplicitFunction)(x::AbstractVector, args...; kwargs...)
 
 Return `implicit.forward(x, args...; kwargs...)`, which can be either an `AbstractVector` `y` or a tuple `(y, z)`.
 
diff --git a/test/systematic.jl b/test/systematic.jl
index 5ad16a7..8056bd8 100644
--- a/test/systematic.jl
+++ b/test/systematic.jl
@@ -18,7 +18,7 @@ backends = [
 
 linear_solver_candidates = (
     \, #
-    ID.DefaultLinearSolver(),
+    ID.KrylovLinearSolver(),
 )
 
 conditions_backend_candidates = (
diff --git a/test/utils.jl b/test/utils.jl
index 4286624..373a503 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -31,35 +31,31 @@ end
 
 ## Various signatures
 
-function make_implicit_sqrt(; linear_solver, kwargs...)
-    lazy = !(linear_solver isa typeof(\))
+function make_implicit_sqrt(; kwargs...)
     forward(x) = mysqrt(x)
     conditions(x, y) = abs2.(y) .- abs.(x)
-    implicit = ImplicitFunction{lazy}(forward, conditions; kwargs...)
+    implicit = ImplicitFunction(forward, conditions; kwargs...)
     return implicit
 end
 
-function make_implicit_sqrt_byproduct(; linear_solver, kwargs...)
-    lazy = !(linear_solver isa typeof(\))
+function make_implicit_sqrt_byproduct(; kwargs...)
     forward(x) = one(eltype(x)) .* mysqrt(x), one(eltype(x))
     conditions(x, y, z) = abs2.(y ./ z) .- abs.(x)
-    implicit = ImplicitFunction{lazy}(forward, conditions; linear_solver, kwargs...)
+    implicit = ImplicitFunction(forward, conditions; kwargs...)
     return implicit
 end
 
-function make_implicit_sqrt_args(; linear_solver, kwargs...)
-    lazy = !(linear_solver isa typeof(\))
+function make_implicit_sqrt_args(; kwargs...)
     forward(x, p) = p .* mysqrt(x)
     conditions(x, y, p) = abs2.(y ./ p) .- abs.(x)
-    implicit = ImplicitFunction{lazy}(forward, conditions; linear_solver, kwargs...)
+    implicit = ImplicitFunction(forward, conditions; kwargs...)
     return implicit
 end
 
-function make_implicit_sqrt_kwargs(; linear_solver, kwargs...)
-    lazy = !(linear_solver isa typeof(\))
+function make_implicit_sqrt_kwargs(; kwargs...)
     forward(x; p) = p .* mysqrt(x)
     conditions(x, y; p) = abs2.(y ./ p) .- abs.(x)
-    implicit = ImplicitFunction{lazy}(forward, conditions; linear_solver, kwargs...)
+    implicit = ImplicitFunction(forward, conditions; kwargs...)
     return implicit
 end