@@ -68,6 +68,10 @@ def map_estimate(embedding, data=None, ns_data=None, data_generator=None, N=None
68
68
ns_data = data
69
69
70
70
opt = tf .keras .optimizers .Adam (learning_rate = 0.001 )
71
+ opt_theta = opt .add_variable_from_reference (embedding .theta , "theta" )#, initial_value=embedding.theta)
72
+ opt .build ([opt_theta ])
73
+ opt_theta .assign (embedding .theta )
74
+
71
75
e = embedding
72
76
if valid_data is not None :
73
77
if not isinstance (valid_data , tf .Tensor ):
@@ -107,13 +111,17 @@ def map_estimate(embedding, data=None, ns_data=None, data_generator=None, N=None
107
111
i ,j ,x = next (data_generator )
108
112
else :
109
113
i ,j ,x = generate_batch (data , model = model , ws = ws , ns = ns , batch_size = batch_size , start_ix = start_ix , ns_data = ns_data )
110
- if model == "sgns" :
111
- objective = lambda : - tf .reduce_sum (sgns_likelihood (e , i , j , x = x )) - e .log_prob (batch_size , N )
112
- elif model == "cbow" :
113
- objective = lambda : - tf .reduce_sum (cbow_likelihood (e , i , j , x = x )) - e .log_prob (batch_size , N )
114
- _ = opt .minimize (objective , [embedding .theta ])
114
+ with tf .GradientTape () as tape :
115
+ if model == "sgns" :
116
+ objective = - tf .reduce_sum (sgns_likelihood (e , i , j , x = x )) - e .log_prob (batch_size , N )
117
+ elif model == "cbow" :
118
+ objective = - tf .reduce_sum (cbow_likelihood (e , i , j , x = x )) - e .log_prob (batch_size , N )
119
+ d_l_d_theta = - tape .gradient (objective , e .theta )
120
+
121
+ opt .update_step (d_l_d_theta , opt_theta , 0.001 )
122
+ embedding .theta .assign (opt_theta )
115
123
if training_loss :
116
- epoch_training_loss .append (objective () / len (i ))
124
+ epoch_training_loss .append (objective / len (i ))
117
125
batch_no = len (epoch_training_loss )
118
126
if batch_no % 250 == 0 :
119
127
logger .log (logging .TRAIN , f"Epoch { epoch } mean training loss after { batch_no } batches: { np .mean (epoch_training_loss )} " )
@@ -155,7 +163,7 @@ def mean_field_vi(embedding, data=None, data_generator=None, N=None, model="cbow
155
163
if model not in ["sgns" , "cbow" ]:
156
164
raise ValueError ("model must be 'sgns' or 'cbow'" )
157
165
158
- optimizer = tf .keras .optimizers .experimental . Adam (learning_rate = 0.001 )
166
+ optimizer = tf .keras .optimizers .Adam (learning_rate = 0.001 )
159
167
e = embedding
160
168
161
169
if words_to_fix_rotation :
@@ -177,10 +185,13 @@ def mean_field_vi(embedding, data=None, data_generator=None, N=None, model="cbow
177
185
logger .info (f"Init std: { init_std } " )
178
186
q_std_log = tf .Variable (init_std )
179
187
180
- opt_mean_var = optimizer .add_variable_from_reference (q_mean , "q_mean" , initial_value = q_mean )
181
- opt_std_var = optimizer .add_variable_from_reference (q_std_log , "q_std_log" , initial_value = q_std_log )
188
+ opt_mean_var = optimizer .add_variable_from_reference (q_mean , "q_mean" )
189
+ opt_std_var = optimizer .add_variable_from_reference (q_std_log , "q_std_log" )
182
190
optimizer .build ([opt_mean_var , opt_std_var ])
183
191
192
+ opt_mean_var .assign (q_mean )
193
+ opt_std_var .assign (q_std_log )
194
+
184
195
elbos = []
185
196
for epoch in range (epochs ):
186
197
logger .log (logging .TRAIN , f"Epoch { epoch } " )
@@ -216,8 +227,8 @@ def mean_field_vi(embedding, data=None, data_generator=None, N=None, model="cbow
216
227
# Add the entropy term
217
228
d_l_q_std_log = d_l_q_std_log - tf .ones (d_l_q_std_log .shape , dtype = tf .float64 )
218
229
219
- optimizer .update_step (d_l_d_q_mean , opt_mean_var )
220
- optimizer .update_step (d_l_q_std_log , opt_std_var )
230
+ optimizer .update_step (d_l_d_q_mean , opt_mean_var , 0.001 )
231
+ optimizer .update_step (d_l_q_std_log , opt_std_var , 0.001 )
221
232
222
233
223
234
std_numerical_stability_constant = 10.0
0 commit comments