1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " lstm" ,
7
+ "provenance" : [],
8
+ "include_colab_link" : true
9
+ },
10
+ "kernelspec" : {
11
+ "name" : " python3" ,
12
+ "display_name" : " Python 3"
13
+ }
14
+ },
15
+ "cells" : [
16
+ {
17
+ "cell_type" : " markdown" ,
18
+ "metadata" : {
19
+ "id" : " view-in-github" ,
20
+ "colab_type" : " text"
21
+ },
22
+ "source" : [
23
+ " <a href=\" https://colab.research.google.com/github/tinhb92/relax_ml/blob/master/deep_learning/lstm.ipynb\" target=\" _parent\" ><img src=\" https://colab.research.google.com/assets/colab-badge.svg\" alt=\" Open In Colab\" /></a>"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type" : " code" ,
28
+ "metadata" : {
29
+ "id" : " 414DjyHwHBH7" ,
30
+ "colab_type" : " code" ,
31
+ "colab" : {}
32
+ },
33
+ "source" : [
34
+ " import torch \n " ,
35
+ " import torch.nn as nn\n " ,
36
+ " import torchvision\n " ,
37
+ " import torchvision.transforms as transforms\n " ,
38
+ " \n " ,
39
+ " # Device configuration\n " ,
40
+ " device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n " ,
41
+ " \n " ,
42
+ " # Hyper-parameters\n " ,
43
+ " sequence_length = 28\n " ,
44
+ " input_size = 28\n " ,
45
+ " hidden_size = 128\n " ,
46
+ " num_layers = 2\n " ,
47
+ " num_classes = 10\n " ,
48
+ " batch_size = 100\n " ,
49
+ " num_epochs = 2\n " ,
50
+ " learning_rate = 0.01"
51
+ ],
52
+ "execution_count" : 0 ,
53
+ "outputs" : []
54
+ },
55
+ {
56
+ "cell_type" : " code" ,
57
+ "metadata" : {
58
+ "id" : " e2NCj8qbGI2r" ,
59
+ "colab_type" : " code" ,
60
+ "colab" : {}
61
+ },
62
+ "source" : [
63
+ " # MNIST dataset\n " ,
64
+ " train_dataset = torchvision.datasets.MNIST(root='../../data/',\n " ,
65
+ " train=True, \n " ,
66
+ " transform=transforms.ToTensor(),\n " ,
67
+ " download=True)\n " ,
68
+ " \n " ,
69
+ " test_dataset = torchvision.datasets.MNIST(root='../../data/',\n " ,
70
+ " train=False, \n " ,
71
+ " transform=transforms.ToTensor())\n " ,
72
+ " \n " ,
73
+ " # Data loader\n " ,
74
+ " train_loader = torch.utils.data.DataLoader(dataset=train_dataset,\n " ,
75
+ " batch_size=batch_size, \n " ,
76
+ " shuffle=True)\n " ,
77
+ " \n " ,
78
+ " test_loader = torch.utils.data.DataLoader(dataset=test_dataset,\n " ,
79
+ " batch_size=batch_size, \n " ,
80
+ " shuffle=False)"
81
+ ],
82
+ "execution_count" : 0 ,
83
+ "outputs" : []
84
+ },
85
+ {
86
+ "cell_type" : " code" ,
87
+ "metadata" : {
88
+ "id" : " o55sVKwpGMLx" ,
89
+ "colab_type" : " code" ,
90
+ "colab" : {}
91
+ },
92
+ "source" : [
93
+ " # Recurrent neural network (many-to-one)\n " ,
94
+ " class RNN(nn.Module):\n " ,
95
+ " def __init__(self, input_size, hidden_size, num_layers, num_classes):\n " ,
96
+ " super(RNN, self).__init__()\n " ,
97
+ " self.hidden_size = hidden_size\n " ,
98
+ " self.num_layers = num_layers\n " ,
99
+ " self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)\n " ,
100
+ " self.fc = nn.Linear(hidden_size, num_classes)\n " ,
101
+ " \n " ,
102
+ " def forward(self, x):\n " ,
103
+ " # Set initial hidden and cell states \n " ,
104
+ " h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) \n " ,
105
+ " c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)\n " ,
106
+ " \n " ,
107
+ " # Forward propagate LSTM\n " ,
108
+ " out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size)\n " ,
109
+ " \n " ,
110
+ " # Decode the hidden state of the last time step\n " ,
111
+ " out = self.fc(out[:, -1, :])\n " ,
112
+ " return out\n " ,
113
+ " \n " ,
114
+ " model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)"
115
+ ],
116
+ "execution_count" : 0 ,
117
+ "outputs" : []
118
+ },
119
+ {
120
+ "cell_type" : " code" ,
121
+ "metadata" : {
122
+ "id" : " EeQKqwxAGOod" ,
123
+ "colab_type" : " code" ,
124
+ "colab" : {
125
+ "base_uri" : " https://localhost:8080/" ,
126
+ "height" : 218
127
+ },
128
+ "outputId" : " 5e6bf8ff-34ff-4d39-8e93-c2741f1cf6f8"
129
+ },
130
+ "source" : [
131
+ " # Loss and optimizer\n " ,
132
+ " criterion = nn.CrossEntropyLoss()\n " ,
133
+ " optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n " ,
134
+ " \n " ,
135
+ " # Train the model\n " ,
136
+ " total_step = len(train_loader)\n " ,
137
+ " for epoch in range(num_epochs):\n " ,
138
+ " for i, (images, labels) in enumerate(train_loader):\n " ,
139
+ " images = images.reshape(-1, sequence_length, input_size).to(device)\n " ,
140
+ " labels = labels.to(device)\n " ,
141
+ " \n " ,
142
+ " # Forward pass\n " ,
143
+ " outputs = model(images)\n " ,
144
+ " loss = criterion(outputs, labels)\n " ,
145
+ " \n " ,
146
+ " # Backward and optimize\n " ,
147
+ " optimizer.zero_grad()\n " ,
148
+ " loss.backward()\n " ,
149
+ " optimizer.step()\n " ,
150
+ " \n " ,
151
+ " if (i+1) % 100 == 0:\n " ,
152
+ " print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' \n " ,
153
+ " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))"
154
+ ],
155
+ "execution_count" : 22 ,
156
+ "outputs" : [
157
+ {
158
+ "output_type" : " stream" ,
159
+ "text" : [
160
+ " Epoch [1/2], Step [100/600], Loss: 0.4692\n " ,
161
+ " Epoch [1/2], Step [200/600], Loss: 0.2797\n " ,
162
+ " Epoch [1/2], Step [300/600], Loss: 0.1271\n " ,
163
+ " Epoch [1/2], Step [400/600], Loss: 0.2750\n " ,
164
+ " Epoch [1/2], Step [500/600], Loss: 0.1792\n " ,
165
+ " Epoch [1/2], Step [600/600], Loss: 0.0991\n " ,
166
+ " Epoch [2/2], Step [100/600], Loss: 0.0826\n " ,
167
+ " Epoch [2/2], Step [200/600], Loss: 0.1674\n " ,
168
+ " Epoch [2/2], Step [300/600], Loss: 0.1562\n " ,
169
+ " Epoch [2/2], Step [400/600], Loss: 0.1447\n " ,
170
+ " Epoch [2/2], Step [500/600], Loss: 0.0842\n " ,
171
+ " Epoch [2/2], Step [600/600], Loss: 0.0283\n "
172
+ ],
173
+ "name" : " stdout"
174
+ }
175
+ ]
176
+ },
177
+ {
178
+ "cell_type" : " code" ,
179
+ "metadata" : {
180
+ "id" : " j-5yxyHfEa7z" ,
181
+ "colab_type" : " code" ,
182
+ "colab" : {
183
+ "base_uri" : " https://localhost:8080/" ,
184
+ "height" : 34
185
+ },
186
+ "outputId" : " 84f5134c-0b9e-42e3-fcaf-42f528a3f363"
187
+ },
188
+ "source" : [
189
+ " # Test the model\n " ,
190
+ " with torch.no_grad():\n " ,
191
+ " correct = 0\n " ,
192
+ " total = 0\n " ,
193
+ " for images, labels in test_loader:\n " ,
194
+ " images = images.reshape(-1, sequence_length, input_size).to(device)\n " ,
195
+ " labels = labels.to(device)\n " ,
196
+ " outputs = model(images)\n " ,
197
+ " _, predicted = torch.max(outputs.data, 1)\n " ,
198
+ " total += labels.size(0)\n " ,
199
+ " correct += (predicted == labels).sum().item()\n " ,
200
+ " \n " ,
201
+ " print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) \n " ,
202
+ " \n " ,
203
+ " # Save the model checkpoint\n " ,
204
+ " # torch.save(model.state_dict(), 'model.ckpt')"
205
+ ],
206
+ "execution_count" : 23 ,
207
+ "outputs" : [
208
+ {
209
+ "output_type" : " stream" ,
210
+ "text" : [
211
+ " Test Accuracy of the model on the 10000 test images: 97.55 %\n "
212
+ ],
213
+ "name" : " stdout"
214
+ }
215
+ ]
216
+ },
217
+ {
218
+ "cell_type" : " code" ,
219
+ "metadata" : {
220
+ "id" : " cs2NL18qEaz2" ,
221
+ "colab_type" : " code" ,
222
+ "colab" : {}
223
+ },
224
+ "source" : [
225
+ " "
226
+ ],
227
+ "execution_count" : 0 ,
228
+ "outputs" : []
229
+ },
230
+ {
231
+ "cell_type" : " code" ,
232
+ "metadata" : {
233
+ "id" : " 1BY0qUqTCrMq" ,
234
+ "colab_type" : " code" ,
235
+ "colab" : {}
236
+ },
237
+ "source" : [
238
+ " "
239
+ ],
240
+ "execution_count" : 0 ,
241
+ "outputs" : []
242
+ }
243
+ ]
244
+ }
0 commit comments