-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathrun_DenoisingAutoencoder.m
236 lines (174 loc) · 9 KB
/
run_DenoisingAutoencoder.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
%%
% This script presents a demo run for denoising autoencoders with various
% architectures and options on MNIST.
%
%% Load data
addpath(genpath('data/'));
images = loadMNISTImages('data/train-images.idx3-ubyte');
%% Let us start by checking our gradient calculation numerically
% Initialize sparse autoencoder object
DA0 = DenoisingAutoencoder(struct( 'visibleSize',20,... % input size
'hiddenSize' ,3,... % hidden size
'drop' ,0,... % drop level
'lambda' ,0,...
'x' ,rand(20,10)));
% this should be very small, we obtained 4.6655e-10
DA0.check_numerical_gradient;
clear DA0
%% First let us train a standard denoising autoencoder with a default
% setting. Train using SGD for 100 epochs with momentum. Hidden and
% output activations are both sigmoid and cost function is mse.
% Initialize sparse autoencoder object
DA1 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.5,... % drop level
'momentum' ,.7,... % grad speed
'alpha' ,1,... % learning rate
'x' ,images(:,1:10000)));
% Start training our DA
DA1.train_model;
% Display resulting hidden layer features
Utility2.display_images(DA1.W1');
clear DA1
%% Now let us train a standard denoising autoencoder with a default
% setting but with a second order method this time, namely; l-BFGS.
% Hidden and output activations are both sigmoid and cost function is mse
addpath(genpath('minFunc_2012'));
% Initialize sparse autoencoder object
DA2 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'lambda' ,.03,... % weight decay
'drop' ,0.4,... % drop level
'minfunc' ,@(funJ,theta)minFunc(funJ, theta, struct('maxIter',100)),...
'x' ,images(:,1:10000)));
% Start training our DA
DA2.train_model;
% Display resulting hidden layer features
Utility2.display_images(DA2.W1');
clear DA2
%% This time try tied weights meaning that W2 = W1
% Initialize sparse autoencoder object
DA3 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.5,... % drop level
'momentum' ,.6,... % grad speed
'alpha' ,1,... % learning rate
'tiedWeights',true,... % W2 = W1
'x' ,images(:,1:10000)));
% Start training our DA
DA3.train_model;
% Display resulting hidden layer features
Utility2.display_images(DA3.W1');
clear DA3
%% Extend previous model by using adadelta,
% Initialize sparse autoencoder object
DA3_2 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.4,... % drop level
'momentum' ,.4,... % grad speed
'adaDeltaRho',.99,... % decay rate
'useAdaDelta',true,... % employ adadelta
'tiedWeights',true,... % W2 = W1
'x' ,images(:,1:10000)));
% Start training our DA
DA3_2.train_model;
% plot some stats
DA3_2.plot_training_error;
[h, Frobenius_norm, L1_dist_to_I] = DA3_2.plot_feature_corrs
% Display resulting hidden layer features
Utility2.display_images(DA3_2.W1');
clear DA3_2
%% And try cross entropy with tied weights
% Initialize sparse autoencoder object
DA4 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.5,... % drop level
'momentum' ,.5,... % grad speed
'alpha' ,1,... % learning rate
'tiedWeights',true,... % W2 = W1
'errFun' ,1,... % 1-cross entropy
'x' ,images(:,1:10000)));
% Start training our DA
DA4.train_model;
% Display resulting hidden layer features
Utility2.display_images(DA4.W1');
clear DA4
%% Then, train a linear autoencoder. A linear autoencoder can be obtained
% either by setting 'vActFun' option to 3 or 'isLinearCost' to true.
% Initialize sparse autoencoder object
DA5 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.2,... % drop level
'momentum' ,0,... % grad speed
'alpha' ,1,... % learning rate
'tiedWeights',true,... % W2 = W1
'vActFun' ,3,... % linear output
'x' ,images(:,1:10000)));
% Start training our DA
DA5.train_model;
% Display resulting hidden layer features
Utility2.display_images(DA5.W1');
clear DA5
%% Next, let us analyse the Jacobian and its singular values to assess
% the contraction amount of the encoding
% Initialize sparse autoencoder object
DA6 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.5,... % drop level
'momentum' ,.9,... % grad speed
'adaDeltaRho',.95,... % decay rate
'useAdaDelta',true,... % employ adadelta
'tiedWeights',true,... % W2 = W1
'x' ,images(:,1:5000)));
% Start training our DA
tic
DA6.train_model;
toc
% plot some stats
DA6.plot_training_error;
[h, Frobenius_norm, L1_dist_to_I] = DA6.plot_feature_corrs
% Display resulting hidden layer features
Utility2.display_images(DA6.W1');
% get jacobian and plot its singular values
Jac = DA6.get_jacobian;
[U S V] = svd(Jac);
figure,plot(diag(S));
clear DA6
%% Then, let us employ GPU on previous model
% Initialize sparse autoencoder object
DA7 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'drop' ,.5,... % drop level
'momentum' ,.9,... % grad speed
'adaDeltaRho',.95,... % decay rate
'useAdaDelta',true,... % employ adadelta
'tiedWeights',true,... % W2 = W1
'useGPU' ,true,... % as the name refers
'x' ,images(:,1:5000)));
% Start training our DA
tic
DA7.train_model;
toc
% Display resulting hidden layer features
Utility2.display_images(DA7.W1');
clear DA7
%% Now let us add some sparsity term to the cost function
% Initialize sparse autoencoder object
DA8 = DenoisingAutoencoder(struct( 'visibleSize',28*28,... % input size
'hiddenSize' ,196,... % hidden size
'hActFun' ,0,...
'drop' ,.5,... % drop level
'momentum' ,.9,... % grad speed
'adaDeltaRho',.95,... % decay rate
'useAdaDelta',true,... % employ adadelta
'tiedWeights',true,... % W2 = W1
'rho' ,.03,... % sparsity target
'beta' ,.25,... % sparsity term effect
'x' ,images(:,1:5000)));
% Start training our DA
tic
DA8.train_model;
toc
% Display resulting hidden layer features
Utility2.display_images(DA8.W1');
clear DA8