Remove the Enzyme extension, prepare gradient (#166)

yebai · github-actions[bot] · Red-Portal · web-flow · commit 5ae466d161ce · 2025-03-07T20:28:29.000Z
* Remove the Enzyme extension --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/TuringLang/AdvancedVI.jl?shareId=XXXX-XXXX-XXXX-XXXX). * Update ad.jl * Update test/interface/ad.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update repgradelbo.jl * Update test/interface/repgradelbo.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update AdvancedVI.jl * Avoid type piracy * Implement #101 * Update src/objectives/elbo/repgradelbo.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update src/AdvancedVI.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update scoregradelbo_locationscale.jl * Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * add prepare gradient, change order of arguments in DI wrappers --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Kyurae Kim <kyrkim@seas.upenn.edu>
diff --git a/Project.toml b/Project.toml
@@ -21,11 +21,9 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
-Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 
 [extensions]
 AdvancedVIBijectorsExt = "Bijectors"
-AdvancedVIEnzymeExt = "Enzyme"
 
 [compat]
 ADTypes = "1"
@@ -36,7 +34,6 @@ DiffResults = "1"
 DifferentiationInterface = "0.6"
 Distributions = "0.25.111"
 DocStringExtensions = "0.8, 0.9"
-Enzyme = "0.13"
 FillArrays = "1.3"
 Functors = "0.4, 0.5"
 LinearAlgebra = "1"
@@ -49,7 +46,6 @@ julia = "1.10, 1.11.2"
 
 [extras]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
-Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
diff --git a/ext/AdvancedVIEnzymeExt.jl b/ext/AdvancedVIEnzymeExt.jl
diff --git a/src/AdvancedVI.jl b/src/AdvancedVI.jl
@@ -26,27 +26,63 @@ using StatsBase
 
 # Derivatives
 """
-    value_and_gradient!(ad, f, x, aux, out)
+    _value_and_gradient!(f, out, ad, x, aux)
+    _value_and_gradient!(f, out, prep, ad, x, aux)
 
 Evaluate the value and gradient of a function `f` at `x` using the automatic differentiation backend `ad` and store the result in `out`.
 `f` may receive auxiliary input as `f(x,aux)`.
 
 # Arguments
-- `ad::ADTypes.AbstractADType`: Automatic differentiation backend. 
+- `ad::ADTypes.AbstractADType`: 
+    automatic differentiation backend. Currently supports
+    `ADTypes.AutoZygote()`, `ADTypes.ForwardDiff()`, `ADTypes.ReverseDiff()`, 
+    `ADTypes.AutoMooncake()` and
+    `ADTypes.AutoEnzyme(;
+        mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
+        function_annotation=Enzyme.Const,
+    )`.
+    If one wants to use `AutoEnzyme`, please make sure to include the `set_runtime_activity` and `function_annotation` as shown above.
 - `f`: Function subject to differentiation.
 - `x`: The point to evaluate the gradient.
 - `aux`: Auxiliary input passed to `f`.
+- `prep`: Output of `DifferentiationInterface.prepare_gradient`.
 - `out::DiffResults.MutableDiffResult`: Buffer to contain the output gradient and function value.
 """
-function value_and_gradient!(
-    ad::ADTypes.AbstractADType, f, x, aux, out::DiffResults.MutableDiffResult
+function _value_and_gradient!(
+    f, out::DiffResults.MutableDiffResult, ad::ADTypes.AbstractADType, x, aux
 )
     grad_buf = DiffResults.gradient(out)
     y, _ = DifferentiationInterface.value_and_gradient!(f, grad_buf, ad, x, Constant(aux))
     DiffResults.value!(out, y)
     return out
 end
 
+function _value_and_gradient!(
+    f, out::DiffResults.MutableDiffResult, prep, ad::ADTypes.AbstractADType, x, aux
+)
+    grad_buf = DiffResults.gradient(out)
+    y, _ = DifferentiationInterface.value_and_gradient!(
+        f, grad_buf, prep, ad, x, Constant(aux)
+    )
+    DiffResults.value!(out, y)
+    return out
+end
+
+"""
+    _prepare_gradient!(f, ad, x, aux)
+
+Prepare AD backend for taking gradients of a function `f` at `x` using the automatic differentiation backend `ad`.
+
+# Arguments
+- `ad::ADTypes.AbstractADType`: Automatic differentiation backend.
+- `f`: Function subject to differentiation.
+- `x`: The point to evaluate the gradient.
+- `aux`: Auxiliary input passed to `f`.
+"""
+function _prepare_gradient(f, ad::ADTypes.AbstractADType, x, aux)
+    return DifferentiationInterface.prepare_gradient(f, ad, x, Constant(aux))
+end
+
 """
     restructure_ad_forward(adtype, restructure, params)
 
@@ -74,18 +110,26 @@ If the estimator is stateful, it can implement `init` to initialize the state.
 abstract type AbstractVariationalObjective end
 
 """
-    init(rng, obj, prob, params, restructure)
+    init(rng, obj, adtype, prob, params, restructure)
 
 Initialize a state of the variational objective `obj` given the initial variational parameters `λ`.
 This function needs to be implemented only if `obj` is stateful.
 
 # Arguments
 - `rng::Random.AbstractRNG`: Random number generator.
 - `obj::AbstractVariationalObjective`: Variational objective.
+` `adtype::ADTypes.AbstractADType`: Automatic differentiation backend.
 - `params`: Initial variational parameters.
 - `restructure`: Function that reconstructs the variational approximation from `λ`.
 """
-init(::Random.AbstractRNG, ::AbstractVariationalObjective, ::Any, ::Any, ::Any) = nothing
+init(
+    ::Random.AbstractRNG,
+    ::AbstractVariationalObjective,
+    ::ADTypes.AbstractADType,
+    ::Any,
+    ::Any,
+    ::Any,
+) = nothing
 
 """
     estimate_objective([rng,] obj, q, prob; kwargs...)
diff --git a/src/objectives/elbo/repgradelbo.jl b/src/objectives/elbo/repgradelbo.jl
@@ -33,6 +33,28 @@ struct RepGradELBO{EntropyEst<:AbstractEntropyEstimator} <: AbstractVariationalO
     n_samples::Int
 end
 
+function init(
+    rng::Random.AbstractRNG,
+    obj::RepGradELBO,
+    adtype::ADTypes.AbstractADType,
+    prob,
+    params,
+    restructure,
+)
+    q_stop = restructure(params)
+    aux = (
+        rng=rng,
+        adtype=adtype,
+        obj=obj,
+        problem=prob,
+        restructure=restructure,
+        q_stop=q_stop,
+    )
+    return AdvancedVI._prepare_gradient(
+        estimate_repgradelbo_ad_forward, adtype, params, aux
+    )
+end
+
 function RepGradELBO(n_samples::Int; entropy::AbstractEntropyEstimator=ClosedFormEntropy())
     return RepGradELBO(entropy, n_samples)
 end
@@ -129,6 +151,7 @@ function estimate_gradient!(
     restructure,
     state,
 )
+    prep = state
     q_stop = restructure(params)
     aux = (
         rng=rng,
@@ -138,8 +161,10 @@ function estimate_gradient!(
         restructure=restructure,
         q_stop=q_stop,
     )
-    value_and_gradient!(adtype, estimate_repgradelbo_ad_forward, params, aux, out)
+    AdvancedVI._value_and_gradient!(
+        estimate_repgradelbo_ad_forward, out, prep, adtype, params, aux
+    )
     nelbo = DiffResults.value(out)
     stat = (elbo=-nelbo,)
-    return out, nothing, stat
+    return out, state, stat
 end
diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl
@@ -16,6 +16,23 @@ struct ScoreGradELBO <: AbstractVariationalObjective
     n_samples::Int
 end
 
+function init(
+    rng::Random.AbstractRNG,
+    obj::ScoreGradELBO,
+    adtype::ADTypes.AbstractADType,
+    prob,
+    params,
+    restructure,
+)
+    q = restructure(params)
+    samples = rand(rng, q, obj.n_samples)
+    ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples))
+    aux = (adtype=adtype, logprob_stop=ℓπ, samples_stop=samples, restructure=restructure)
+    return AdvancedVI._prepare_gradient(
+        estimate_scoregradelbo_ad_forward, adtype, params, aux
+    )
+end
+
 function Base.show(io::IO, obj::ScoreGradELBO)
     print(io, "ScoreGradELBO(n_samples=")
     print(io, obj.n_samples)
@@ -71,14 +88,15 @@ function AdvancedVI.estimate_gradient!(
     state,
 )
     q = restructure(params)
+    prep = state
     samples = rand(rng, q, obj.n_samples)
     ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples))
     aux = (adtype=adtype, logprob_stop=ℓπ, samples_stop=samples, restructure=restructure)
-    AdvancedVI.value_and_gradient!(
-        adtype, estimate_scoregradelbo_ad_forward, params, aux, out
+    AdvancedVI._value_and_gradient!(
+        estimate_scoregradelbo_ad_forward, out, prep, adtype, params, aux
     )
     ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples))
     elbo = mean(ℓπ - ℓq)
     stat = (elbo=elbo,)
-    return out, nothing, stat
+    return out, state, stat
 end
diff --git a/src/optimize.jl b/src/optimize.jl
@@ -68,7 +68,9 @@ function optimize(
 )
     params, restructure = Optimisers.destructure(deepcopy(q_init))
     opt_st = maybe_init_optimizer(state_init, optimizer, params)
-    obj_st = maybe_init_objective(state_init, rng, objective, problem, params, restructure)
+    obj_st = maybe_init_objective(
+        state_init, rng, objective, adtype, problem, params, restructure
+    )
     avg_st = maybe_init_averager(state_init, averager, params)
     grad_buf = DiffResults.DiffResult(zero(eltype(params)), similar(params))
     stats = NamedTuple[]
diff --git a/src/utils.jl b/src/utils.jl
@@ -25,14 +25,15 @@ function maybe_init_objective(
     state_init::NamedTuple,
     rng::Random.AbstractRNG,
     objective::AbstractVariationalObjective,
+    adtype::ADTypes.AbstractADType,
     problem,
     params,
     restructure,
 )
     if haskey(state_init, :objective)
         state_init.objective
     else
-        init(rng, objective, problem, params, restructure)
+        init(rng, objective, adtype, problem, params, restructure)
     end
 end
 
diff --git a/test/inference/scoregradelbo_locationscale.jl b/test/inference/scoregradelbo_locationscale.jl
@@ -1,6 +1,11 @@
 
 AD_scoregradelbo_locationscale = if TEST_GROUP == "Enzyme"
-    Dict(:Enzyme => AutoEnzyme())
+    Dict(
+        :Enzyme => AutoEnzyme(;
+            mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
+            function_annotation=Enzyme.Const,
+        ),
+    )
 else
     Dict(
         :ForwarDiff => AutoForwardDiff(),
diff --git a/test/interface/ad.jl b/test/interface/ad.jl
@@ -2,7 +2,12 @@
 using Test
 
 AD_interface = if TEST_GROUP == "Enzyme"
-    Dict(:Enzyme => AutoEnzyme())
+    Dict(
+        :Enzyme => AutoEnzyme(;
+            mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
+            function_annotation=Enzyme.Const,
+        ),
+    )
 else
     Dict(
         :ForwarDiff => AutoForwardDiff(),
@@ -20,7 +25,26 @@ end
         b = randn(D)
         grad_buf = DiffResults.GradientResult(λ)
         f(λ′, aux) = λ′' * A * λ′ / 2 + dot(aux.b, λ′)
-        AdvancedVI.value_and_gradient!(adtype, f, λ, (b=b,), grad_buf)
+        AdvancedVI._value_and_gradient!(f, grad_buf, adtype, λ, (b=b,))
+        ∇ = DiffResults.gradient(grad_buf)
+        f = DiffResults.value(grad_buf)
+        @test ∇ ≈ (A + A') * λ / 2 + b
+        @test f ≈ λ' * A * λ / 2 + dot(b, λ)
+    end
+
+    @testset "$(adname) with prep" for (adname, adtype) in AD_interface
+        D = 10
+        λ = randn(D)
+        A = randn(D, D)
+        grad_buf = DiffResults.GradientResult(λ)
+
+        b_prep = randn(D)
+        f(λ′, aux) = λ′' * A * λ′ / 2 + dot(aux.b, λ′)
+        prep = AdvancedVI._prepare_gradient(f, adtype, λ, (b=b_prep,))
+
+        b = randn(D)
+        AdvancedVI._value_and_gradient!(f, grad_buf, prep, adtype, λ, (b=b,))
+
         ∇ = DiffResults.gradient(grad_buf)
         f = DiffResults.value(grad_buf)
         @test ∇ ≈ (A + A') * λ / 2 + b
diff --git a/test/interface/repgradelbo.jl b/test/interface/repgradelbo.jl
@@ -1,6 +1,11 @@
 
 AD_repgradelbo_interface = if TEST_GROUP == "Enzyme"
-    [AutoEnzyme()]
+    [
+        AutoEnzyme(;
+            mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
+            function_annotation=Enzyme.Const,
+        ),
+    ]
 else
     [
         AutoForwardDiff(),
@@ -71,8 +76,8 @@ end
         aux = (
             rng=rng, obj=obj, problem=model, restructure=re, q_stop=q_true, adtype=adtype
         )
-        AdvancedVI.value_and_gradient!(
-            adtype, AdvancedVI.estimate_repgradelbo_ad_forward, params, aux, out
+        AdvancedVI._value_and_gradient!(
+            AdvancedVI.estimate_repgradelbo_ad_forward, out, adtype, params, aux
         )
         grad = DiffResults.gradient(out)
         @test norm(grad) ≈ 0 atol = 1e-5