1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " lgb.ipynb" ,
7
+ "version" : " 0.3.2" ,
8
+ "provenance" : [],
9
+ "collapsed_sections" : []
10
+ },
11
+ "kernelspec" : {
12
+ "name" : " python3" ,
13
+ "display_name" : " Python 3"
14
+ }
15
+ },
16
+ "cells" : [
17
+ {
18
+ "cell_type" : " code" ,
19
+ "metadata" : {
20
+ "id" : " 0WlPr1Xpwp4J" ,
21
+ "colab_type" : " code" ,
22
+ "outputId" : " 1a2ca986-bc9b-4778-92d8-ade1647853cf" ,
23
+ "colab" : {
24
+ "base_uri" : " https://localhost:8080/" ,
25
+ "height" : 34
26
+ }
27
+ },
28
+ "source" : [
29
+ " import lightgbm as lgb\n " ,
30
+ " import numpy as np\n " ,
31
+ " import pandas as pd\n " ,
32
+ " df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=['y', 'X1', 'X2', 'X3'])\n " ,
33
+ " from sklearn.metrics import mean_squared_error"
34
+ ],
35
+ "execution_count" : 0 ,
36
+ "outputs" : [
37
+ {
38
+ "output_type" : " stream" ,
39
+ "text" : [
40
+ " Loading data...\n "
41
+ ],
42
+ "name" : " stdout"
43
+ }
44
+ ]
45
+ },
46
+ {
47
+ "cell_type" : " code" ,
48
+ "metadata" : {
49
+ "id" : " ZwuyE3a6w8T7" ,
50
+ "colab_type" : " code" ,
51
+ "outputId" : " 01aadd3c-b0fa-4d42-c040-fc06c1512896" ,
52
+ "colab" : {
53
+ "base_uri" : " https://localhost:8080/" ,
54
+ "height" : 195
55
+ }
56
+ },
57
+ "source" : [
58
+ " df.head()"
59
+ ],
60
+ "execution_count" : 0 ,
61
+ "outputs" : [
62
+ {
63
+ "output_type" : " execute_result" ,
64
+ "data" : {
65
+ "text/html" : [
66
+ " <div>\n " ,
67
+ " <style scoped>\n " ,
68
+ " .dataframe tbody tr th:only-of-type {\n " ,
69
+ " vertical-align: middle;\n " ,
70
+ " }\n " ,
71
+ " \n " ,
72
+ " .dataframe tbody tr th {\n " ,
73
+ " vertical-align: top;\n " ,
74
+ " }\n " ,
75
+ " \n " ,
76
+ " .dataframe thead th {\n " ,
77
+ " text-align: right;\n " ,
78
+ " }\n " ,
79
+ " </style>\n " ,
80
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
81
+ " <thead>\n " ,
82
+ " <tr style=\" text-align: right;\" >\n " ,
83
+ " <th></th>\n " ,
84
+ " <th>y</th>\n " ,
85
+ " <th>X1</th>\n " ,
86
+ " <th>X2</th>\n " ,
87
+ " <th>X3</th>\n " ,
88
+ " </tr>\n " ,
89
+ " </thead>\n " ,
90
+ " <tbody>\n " ,
91
+ " <tr>\n " ,
92
+ " <th>0</th>\n " ,
93
+ " <td>56</td>\n " ,
94
+ " <td>81</td>\n " ,
95
+ " <td>41</td>\n " ,
96
+ " <td>94</td>\n " ,
97
+ " </tr>\n " ,
98
+ " <tr>\n " ,
99
+ " <th>1</th>\n " ,
100
+ " <td>61</td>\n " ,
101
+ " <td>69</td>\n " ,
102
+ " <td>50</td>\n " ,
103
+ " <td>30</td>\n " ,
104
+ " </tr>\n " ,
105
+ " <tr>\n " ,
106
+ " <th>2</th>\n " ,
107
+ " <td>25</td>\n " ,
108
+ " <td>27</td>\n " ,
109
+ " <td>67</td>\n " ,
110
+ " <td>9</td>\n " ,
111
+ " </tr>\n " ,
112
+ " <tr>\n " ,
113
+ " <th>3</th>\n " ,
114
+ " <td>34</td>\n " ,
115
+ " <td>65</td>\n " ,
116
+ " <td>24</td>\n " ,
117
+ " <td>2</td>\n " ,
118
+ " </tr>\n " ,
119
+ " <tr>\n " ,
120
+ " <th>4</th>\n " ,
121
+ " <td>4</td>\n " ,
122
+ " <td>73</td>\n " ,
123
+ " <td>57</td>\n " ,
124
+ " <td>75</td>\n " ,
125
+ " </tr>\n " ,
126
+ " </tbody>\n " ,
127
+ " </table>\n " ,
128
+ " </div>"
129
+ ],
130
+ "text/plain" : [
131
+ " y X1 X2 X3\n " ,
132
+ " 0 56 81 41 94\n " ,
133
+ " 1 61 69 50 30\n " ,
134
+ " 2 25 27 67 9\n " ,
135
+ " 3 34 65 24 2\n " ,
136
+ " 4 4 73 57 75"
137
+ ]
138
+ },
139
+ "metadata" : {
140
+ "tags" : []
141
+ },
142
+ "execution_count" : 6
143
+ }
144
+ ]
145
+ },
146
+ {
147
+ "cell_type" : " code" ,
148
+ "metadata" : {
149
+ "id" : " PrM4fDO3xUjf" ,
150
+ "colab_type" : " code" ,
151
+ "colab" : {}
152
+ },
153
+ "source" : [
154
+ " y_train = df_train[0]\n " ,
155
+ " y_test = df_test[0]\n " ,
156
+ " X_train = df_train.drop(0, axis=1)\n " ,
157
+ " X_test = df_test.drop(0, axis=1)"
158
+ ],
159
+ "execution_count" : 0 ,
160
+ "outputs" : []
161
+ },
162
+ {
163
+ "cell_type" : " code" ,
164
+ "metadata" : {
165
+ "id" : " rnOBZM8GxXr_" ,
166
+ "colab_type" : " code" ,
167
+ "colab" : {}
168
+ },
169
+ "source" : [
170
+ " y_train, y_test = df.loc[:80, 'y'], df.loc[80:, 'y']\n " ,
171
+ " X_train, X_test = df.loc[:80, ['X1', 'X2', 'X3']], df.loc[80:, ['X1', 'X2', 'X3']]"
172
+ ],
173
+ "execution_count" : 0 ,
174
+ "outputs" : []
175
+ },
176
+ {
177
+ "cell_type" : " code" ,
178
+ "metadata" : {
179
+ "id" : " BP0FBk8Syfpg" ,
180
+ "colab_type" : " code" ,
181
+ "colab" : {}
182
+ },
183
+ "source" : [
184
+ " # create dataset for lightgbm\n " ,
185
+ " lgb_train = lgb.Dataset(X_train, y_train)\n " ,
186
+ " lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)"
187
+ ],
188
+ "execution_count" : 0 ,
189
+ "outputs" : []
190
+ },
191
+ {
192
+ "cell_type" : " code" ,
193
+ "metadata" : {
194
+ "id" : " 2h0GmTXzyjMZ" ,
195
+ "colab_type" : " code" ,
196
+ "colab" : {}
197
+ },
198
+ "source" : [
199
+ " # specify your configurations as a dict\n " ,
200
+ " params = {\n " ,
201
+ " 'boosting_type': 'gbdt',\n " ,
202
+ " 'objective': 'regression',\n " ,
203
+ " 'metric': {'l2', 'l1'},\n " ,
204
+ " 'num_leaves': 31,\n " ,
205
+ " 'learning_rate': 0.05,\n " ,
206
+ " 'feature_fraction': 0.9,\n " ,
207
+ " 'bagging_fraction': 0.8,\n " ,
208
+ " 'bagging_freq': 5,\n " ,
209
+ " 'verbose': 0\n " ,
210
+ " }"
211
+ ],
212
+ "execution_count" : 0 ,
213
+ "outputs" : []
214
+ },
215
+ {
216
+ "cell_type" : " code" ,
217
+ "metadata" : {
218
+ "id" : " jcQ17FYswKUD" ,
219
+ "colab_type" : " code" ,
220
+ "outputId" : " d1086f35-b126-4efb-82c1-819f0bd2cda8" ,
221
+ "colab" : {
222
+ "base_uri" : " https://localhost:8080/" ,
223
+ "height" : 235
224
+ }
225
+ },
226
+ "source" : [
227
+ " print('Starting training...')\n " ,
228
+ " # train\n " ,
229
+ " gbm = lgb.train(params,\n " ,
230
+ " lgb_train,\n " ,
231
+ " num_boost_round=20,\n " ,
232
+ " valid_sets=lgb_eval,\n " ,
233
+ " early_stopping_rounds=5)\n " ,
234
+ " \n " ,
235
+ " print('Saving model...')\n " ,
236
+ " # save model to file\n " ,
237
+ " gbm.save_model('model.txt')\n " ,
238
+ " \n " ,
239
+ " print('Starting predicting...')\n " ,
240
+ " # predict\n " ,
241
+ " y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)\n " ,
242
+ " # eval\n " ,
243
+ " print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)"
244
+ ],
245
+ "execution_count" : 0 ,
246
+ "outputs" : [
247
+ {
248
+ "output_type" : " stream" ,
249
+ "text" : [
250
+ " Starting training...\n " ,
251
+ " [1]\t valid_0's l2: 631.201\t valid_0's l1: 19.7541\n " ,
252
+ " Training until validation scores don't improve for 5 rounds.\n " ,
253
+ " [2]\t valid_0's l2: 634.602\t valid_0's l1: 19.8091\n " ,
254
+ " [3]\t valid_0's l2: 638.174\t valid_0's l1: 19.8614\n " ,
255
+ " [4]\t valid_0's l2: 637.171\t valid_0's l1: 19.9141\n " ,
256
+ " [5]\t valid_0's l2: 636.317\t valid_0's l1: 19.9641\n " ,
257
+ " [6]\t valid_0's l2: 634.499\t valid_0's l1: 19.931\n " ,
258
+ " Early stopping, best iteration is:\n " ,
259
+ " [1]\t valid_0's l2: 631.201\t valid_0's l1: 19.7541\n " ,
260
+ " Saving model...\n " ,
261
+ " Starting predicting...\n " ,
262
+ " The rmse of prediction is: 25.123705584627015\n "
263
+ ],
264
+ "name" : " stdout"
265
+ }
266
+ ]
267
+ },
268
+ {
269
+ "cell_type" : " code" ,
270
+ "metadata" : {
271
+ "id" : " UQ42Mxy4xw7g" ,
272
+ "colab_type" : " code" ,
273
+ "colab" : {}
274
+ },
275
+ "source" : [
276
+ " "
277
+ ],
278
+ "execution_count" : 0 ,
279
+ "outputs" : []
280
+ }
281
+ ]
282
+ }
0 commit comments