Skip to content

Commit ecc8262

Browse files
authored
Add files via upload
1 parent c4863da commit ecc8262

File tree

1 file changed

+282
-0
lines changed

1 file changed

+282
-0
lines changed

lgb.ipynb

+282
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "lgb.ipynb",
7+
"version": "0.3.2",
8+
"provenance": [],
9+
"collapsed_sections": []
10+
},
11+
"kernelspec": {
12+
"name": "python3",
13+
"display_name": "Python 3"
14+
}
15+
},
16+
"cells": [
17+
{
18+
"cell_type": "code",
19+
"metadata": {
20+
"id": "0WlPr1Xpwp4J",
21+
"colab_type": "code",
22+
"outputId": "1a2ca986-bc9b-4778-92d8-ade1647853cf",
23+
"colab": {
24+
"base_uri": "https://localhost:8080/",
25+
"height": 34
26+
}
27+
},
28+
"source": [
29+
"import lightgbm as lgb\n",
30+
"import numpy as np\n",
31+
"import pandas as pd\n",
32+
"df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=['y', 'X1', 'X2', 'X3'])\n",
33+
"from sklearn.metrics import mean_squared_error"
34+
],
35+
"execution_count": 0,
36+
"outputs": [
37+
{
38+
"output_type": "stream",
39+
"text": [
40+
"Loading data...\n"
41+
],
42+
"name": "stdout"
43+
}
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"metadata": {
49+
"id": "ZwuyE3a6w8T7",
50+
"colab_type": "code",
51+
"outputId": "01aadd3c-b0fa-4d42-c040-fc06c1512896",
52+
"colab": {
53+
"base_uri": "https://localhost:8080/",
54+
"height": 195
55+
}
56+
},
57+
"source": [
58+
"df.head()"
59+
],
60+
"execution_count": 0,
61+
"outputs": [
62+
{
63+
"output_type": "execute_result",
64+
"data": {
65+
"text/html": [
66+
"<div>\n",
67+
"<style scoped>\n",
68+
" .dataframe tbody tr th:only-of-type {\n",
69+
" vertical-align: middle;\n",
70+
" }\n",
71+
"\n",
72+
" .dataframe tbody tr th {\n",
73+
" vertical-align: top;\n",
74+
" }\n",
75+
"\n",
76+
" .dataframe thead th {\n",
77+
" text-align: right;\n",
78+
" }\n",
79+
"</style>\n",
80+
"<table border=\"1\" class=\"dataframe\">\n",
81+
" <thead>\n",
82+
" <tr style=\"text-align: right;\">\n",
83+
" <th></th>\n",
84+
" <th>y</th>\n",
85+
" <th>X1</th>\n",
86+
" <th>X2</th>\n",
87+
" <th>X3</th>\n",
88+
" </tr>\n",
89+
" </thead>\n",
90+
" <tbody>\n",
91+
" <tr>\n",
92+
" <th>0</th>\n",
93+
" <td>56</td>\n",
94+
" <td>81</td>\n",
95+
" <td>41</td>\n",
96+
" <td>94</td>\n",
97+
" </tr>\n",
98+
" <tr>\n",
99+
" <th>1</th>\n",
100+
" <td>61</td>\n",
101+
" <td>69</td>\n",
102+
" <td>50</td>\n",
103+
" <td>30</td>\n",
104+
" </tr>\n",
105+
" <tr>\n",
106+
" <th>2</th>\n",
107+
" <td>25</td>\n",
108+
" <td>27</td>\n",
109+
" <td>67</td>\n",
110+
" <td>9</td>\n",
111+
" </tr>\n",
112+
" <tr>\n",
113+
" <th>3</th>\n",
114+
" <td>34</td>\n",
115+
" <td>65</td>\n",
116+
" <td>24</td>\n",
117+
" <td>2</td>\n",
118+
" </tr>\n",
119+
" <tr>\n",
120+
" <th>4</th>\n",
121+
" <td>4</td>\n",
122+
" <td>73</td>\n",
123+
" <td>57</td>\n",
124+
" <td>75</td>\n",
125+
" </tr>\n",
126+
" </tbody>\n",
127+
"</table>\n",
128+
"</div>"
129+
],
130+
"text/plain": [
131+
" y X1 X2 X3\n",
132+
"0 56 81 41 94\n",
133+
"1 61 69 50 30\n",
134+
"2 25 27 67 9\n",
135+
"3 34 65 24 2\n",
136+
"4 4 73 57 75"
137+
]
138+
},
139+
"metadata": {
140+
"tags": []
141+
},
142+
"execution_count": 6
143+
}
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"metadata": {
149+
"id": "PrM4fDO3xUjf",
150+
"colab_type": "code",
151+
"colab": {}
152+
},
153+
"source": [
154+
"y_train = df_train[0]\n",
155+
"y_test = df_test[0]\n",
156+
"X_train = df_train.drop(0, axis=1)\n",
157+
"X_test = df_test.drop(0, axis=1)"
158+
],
159+
"execution_count": 0,
160+
"outputs": []
161+
},
162+
{
163+
"cell_type": "code",
164+
"metadata": {
165+
"id": "rnOBZM8GxXr_",
166+
"colab_type": "code",
167+
"colab": {}
168+
},
169+
"source": [
170+
"y_train, y_test = df.loc[:80, 'y'], df.loc[80:, 'y']\n",
171+
"X_train, X_test = df.loc[:80, ['X1', 'X2', 'X3']], df.loc[80:, ['X1', 'X2', 'X3']]"
172+
],
173+
"execution_count": 0,
174+
"outputs": []
175+
},
176+
{
177+
"cell_type": "code",
178+
"metadata": {
179+
"id": "BP0FBk8Syfpg",
180+
"colab_type": "code",
181+
"colab": {}
182+
},
183+
"source": [
184+
"# create dataset for lightgbm\n",
185+
"lgb_train = lgb.Dataset(X_train, y_train)\n",
186+
"lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)"
187+
],
188+
"execution_count": 0,
189+
"outputs": []
190+
},
191+
{
192+
"cell_type": "code",
193+
"metadata": {
194+
"id": "2h0GmTXzyjMZ",
195+
"colab_type": "code",
196+
"colab": {}
197+
},
198+
"source": [
199+
"# specify your configurations as a dict\n",
200+
"params = {\n",
201+
" 'boosting_type': 'gbdt',\n",
202+
" 'objective': 'regression',\n",
203+
" 'metric': {'l2', 'l1'},\n",
204+
" 'num_leaves': 31,\n",
205+
" 'learning_rate': 0.05,\n",
206+
" 'feature_fraction': 0.9,\n",
207+
" 'bagging_fraction': 0.8,\n",
208+
" 'bagging_freq': 5,\n",
209+
" 'verbose': 0\n",
210+
"}"
211+
],
212+
"execution_count": 0,
213+
"outputs": []
214+
},
215+
{
216+
"cell_type": "code",
217+
"metadata": {
218+
"id": "jcQ17FYswKUD",
219+
"colab_type": "code",
220+
"outputId": "d1086f35-b126-4efb-82c1-819f0bd2cda8",
221+
"colab": {
222+
"base_uri": "https://localhost:8080/",
223+
"height": 235
224+
}
225+
},
226+
"source": [
227+
"print('Starting training...')\n",
228+
"# train\n",
229+
"gbm = lgb.train(params,\n",
230+
" lgb_train,\n",
231+
" num_boost_round=20,\n",
232+
" valid_sets=lgb_eval,\n",
233+
" early_stopping_rounds=5)\n",
234+
"\n",
235+
"print('Saving model...')\n",
236+
"# save model to file\n",
237+
"gbm.save_model('model.txt')\n",
238+
"\n",
239+
"print('Starting predicting...')\n",
240+
"# predict\n",
241+
"y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)\n",
242+
"# eval\n",
243+
"print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)"
244+
],
245+
"execution_count": 0,
246+
"outputs": [
247+
{
248+
"output_type": "stream",
249+
"text": [
250+
"Starting training...\n",
251+
"[1]\tvalid_0's l2: 631.201\tvalid_0's l1: 19.7541\n",
252+
"Training until validation scores don't improve for 5 rounds.\n",
253+
"[2]\tvalid_0's l2: 634.602\tvalid_0's l1: 19.8091\n",
254+
"[3]\tvalid_0's l2: 638.174\tvalid_0's l1: 19.8614\n",
255+
"[4]\tvalid_0's l2: 637.171\tvalid_0's l1: 19.9141\n",
256+
"[5]\tvalid_0's l2: 636.317\tvalid_0's l1: 19.9641\n",
257+
"[6]\tvalid_0's l2: 634.499\tvalid_0's l1: 19.931\n",
258+
"Early stopping, best iteration is:\n",
259+
"[1]\tvalid_0's l2: 631.201\tvalid_0's l1: 19.7541\n",
260+
"Saving model...\n",
261+
"Starting predicting...\n",
262+
"The rmse of prediction is: 25.123705584627015\n"
263+
],
264+
"name": "stdout"
265+
}
266+
]
267+
},
268+
{
269+
"cell_type": "code",
270+
"metadata": {
271+
"id": "UQ42Mxy4xw7g",
272+
"colab_type": "code",
273+
"colab": {}
274+
},
275+
"source": [
276+
""
277+
],
278+
"execution_count": 0,
279+
"outputs": []
280+
}
281+
]
282+
}

0 commit comments

Comments
 (0)