@@ -64,27 +64,28 @@ struct CouplingLayerGlow <: NeuralNetLayer
64
64
C:: Conv1x1
65
65
RB:: Union{ResidualBlock, FluxBlock}
66
66
logdet:: Bool
67
+ activation:: ActivationFunction
67
68
end
68
69
69
70
@Flux . functor CouplingLayerGlow
70
71
71
72
# Constructor from 1x1 convolution and residual block
72
- function CouplingLayerGlow (C:: Conv1x1 , RB:: ResidualBlock ; logdet= false )
73
+ function CouplingLayerGlow (C:: Conv1x1 , RB:: ResidualBlock ; logdet= false , activation :: ActivationFunction = SigmoidLayer () )
73
74
RB. fan == false && throw (" Set ResidualBlock.fan == true" )
74
- return CouplingLayerGlow (C, RB, logdet)
75
+ return CouplingLayerGlow (C, RB, logdet, activation )
75
76
end
76
77
77
78
# Constructor from 1x1 convolution and residual Flux block
78
- CouplingLayerGlow (C:: Conv1x1 , RB:: FluxBlock ; logdet= false ) = CouplingLayerGlow (C, RB, logdet)
79
+ CouplingLayerGlow (C:: Conv1x1 , RB:: FluxBlock ; logdet= false , activation :: ActivationFunction = SigmoidLayer ()) = CouplingLayerGlow (C, RB, logdet, activation )
79
80
80
81
# Constructor from input dimensions
81
- function CouplingLayerGlow (n_in:: Int64 , n_hidden:: Int64 ; k1= 3 , k2= 1 , p1= 1 , p2= 0 , s1= 1 , s2= 1 , logdet= false , ndims= 2 )
82
+ function CouplingLayerGlow (n_in:: Int64 , n_hidden:: Int64 ; k1= 3 , k2= 1 , p1= 1 , p2= 0 , s1= 1 , s2= 1 , logdet= false , activation :: ActivationFunction = SigmoidLayer (), ndims= 2 )
82
83
83
84
# 1x1 Convolution and residual block for invertible layer
84
85
C = Conv1x1 (n_in)
85
86
RB = ResidualBlock (Int (n_in/ 2 ), n_hidden; k1= k1, k2= k2, p1= p1, p2= p2, s1= s1, s2= s2, fan= true , ndims= ndims)
86
87
87
- return CouplingLayerGlow (C, RB, logdet)
88
+ return CouplingLayerGlow (C, RB, logdet, activation )
88
89
end
89
90
90
91
CouplingLayerGlow3D (args... ;kw... ) = CouplingLayerGlow (args... ; kw... , ndims= 3 )
@@ -100,9 +101,10 @@ function forward(X::AbstractArray{T, 4}, L::CouplingLayerGlow) where T
100
101
101
102
Y2 = copy (X2)
102
103
logS_T = L. RB. forward (X2)
103
- Sm = Sigmoid (logS_T[:,:,1 : k,:])
104
+ Sm = L . activation . forward (logS_T[:,:,1 : k,:])
104
105
Tm = logS_T[:, :, k+ 1 : end , :]
105
106
Y1 = Sm.* X1 + Tm
107
+
106
108
Y = tensor_cat (Y1, Y2)
107
109
108
110
L. logdet == true ? (return Y, glow_logdet_forward (Sm)) : (return Y)
@@ -117,9 +119,10 @@ function inverse(Y::AbstractArray{T, 4}, L::CouplingLayerGlow; save=false) where
117
119
118
120
X2 = copy (Y2)
119
121
logS_T = L. RB. forward (X2)
120
- Sm = Sigmoid (logS_T[:,:,1 : k,:])
122
+ Sm = L . activation . forward (logS_T[:,:,1 : k,:])
121
123
Tm = logS_T[:, :, k+ 1 : end , :]
122
124
X1 = (Y1 - Tm) ./ (Sm .+ eps (T)) # add epsilon to avoid division by 0
125
+
123
126
X_ = tensor_cat (X1, X2)
124
127
X = L. C. inverse (X_)
125
128
@@ -143,10 +146,10 @@ function backward(ΔY::AbstractArray{T, 4}, Y::AbstractArray{T, 4}, L::CouplingL
143
146
144
147
ΔX1 = ΔY1 .* S
145
148
if set_grad
146
- ΔX2 = L. RB. backward (cat (SigmoidGrad (ΔS, S), ΔT; dims= 3 ), X2) + ΔY2
149
+ ΔX2 = L. RB. backward (cat (L . activation . backward (ΔS, S), ΔT; dims= 3 ), X2) + ΔY2
147
150
else
148
- ΔX2, Δθrb = L. RB. backward (cat (SigmoidGrad (ΔS, S), ΔT; dims= 3 ), X2; set_grad= set_grad)
149
- _, ∇logdet = L. RB. backward (cat (SigmoidGrad (ΔS_, S), 0 .* ΔT; dims= 3 ), X2; set_grad= set_grad)
151
+ ΔX2, Δθrb = L. RB. backward (cat (L . activation . backward (ΔS, S), ΔT; dims= 3 ), X2; set_grad= set_grad)
152
+ _, ∇logdet = L. RB. backward (cat (L . activation . backward (ΔS_, S), 0f0 .* ΔT; dims= 3 ), X2; set_grad= set_grad)
150
153
ΔX2 += ΔY2
151
154
end
152
155
ΔX_ = tensor_cat (ΔX1, ΔX2)
@@ -179,20 +182,20 @@ function jacobian(ΔX::AbstractArray{T, 4}, Δθ::Array{Parameter, 1}, X, L::Cou
179
182
Y2 = copy (X2)
180
183
ΔY2 = copy (ΔX2)
181
184
ΔlogS_T, logS_T = L. RB. jacobian (ΔX2, Δθ[4 : end ], X2)
182
- S = Sigmoid (logS_T[:,:,1 : k,:])
183
- ΔS = SigmoidGrad (ΔlogS_T[:,:,1 : k,:], nothing ; x= logS_T[:,:,1 : k,:])
185
+ Sm = L . activation . forward (logS_T[:,:,1 : k,:])
186
+ ΔS = L . activation . backward (ΔlogS_T[:,:,1 : k,:], nothing ;x= logS_T[:,:,1 : k,:])
184
187
Tm = logS_T[:, :, k+ 1 : end , :]
185
188
ΔT = ΔlogS_T[:, :, k+ 1 : end , :]
186
- Y1 = S .* X1 + Tm
187
- ΔY1 = ΔS.* X1 + S .* ΔX1 + ΔT
189
+ Y1 = Sm .* X1 + Tm
190
+ ΔY1 = ΔS.* X1 + Sm .* ΔX1 + ΔT
188
191
Y = tensor_cat (Y1, Y2)
189
192
ΔY = tensor_cat (ΔY1, ΔY2)
190
193
191
194
# Gauss-Newton approximation of logdet terms
192
195
JΔθ = L. RB. jacobian (cuzeros (ΔX2, size (ΔX2)), Δθ[4 : end ], X2)[1 ][:, :, 1 : k, :]
193
- GNΔθ = cat (0 * Δθ[1 : 3 ], - L. RB. adjointJacobian (tensor_cat (SigmoidGrad (JΔθ, S ), zeros (Float32, size (S ))), X2)[2 ]; dims= 1 )
196
+ GNΔθ = cat (0f0 * Δθ[1 : 3 ], - L. RB. adjointJacobian (tensor_cat (L . activation . backward (JΔθ, Sm ), zeros (Float32, size (Sm ))), X2)[2 ]; dims= 1 )
194
197
195
- L. logdet ? (return ΔY, Y, glow_logdet_forward (S ), GNΔθ) : (return ΔY, Y)
198
+ L. logdet ? (return ΔY, Y, glow_logdet_forward (Sm ), GNΔθ) : (return ΔY, Y)
196
199
end
197
200
198
201
function adjointJacobian (ΔY:: AbstractArray{T, N} , Y:: AbstractArray{T, N} , L:: CouplingLayerGlow ) where {T, N}
0 commit comments