Add files via upload

tinhb92 · web-flow · commit ecc8262c56cc · 2019-09-17T12:17:29.000+08:00
diff --git a/lgb.ipynb b/lgb.ipynb
@@ -0,0 +1,282 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "lgb.ipynb",
+      "version": "0.3.2",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0WlPr1Xpwp4J",
+        "colab_type": "code",
+        "outputId": "1a2ca986-bc9b-4778-92d8-ade1647853cf",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "import lightgbm as lgb\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=['y', 'X1', 'X2', 'X3'])\n",
+        "from sklearn.metrics import mean_squared_error"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Loading data...\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ZwuyE3a6w8T7",
+        "colab_type": "code",
+        "outputId": "01aadd3c-b0fa-4d42-c040-fc06c1512896",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 195
+        }
+      },
+      "source": [
+        "df.head()"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>y</th>\n",
+              "      <th>X1</th>\n",
+              "      <th>X2</th>\n",
+              "      <th>X3</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>56</td>\n",
+              "      <td>81</td>\n",
+              "      <td>41</td>\n",
+              "      <td>94</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>61</td>\n",
+              "      <td>69</td>\n",
+              "      <td>50</td>\n",
+              "      <td>30</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>25</td>\n",
+              "      <td>27</td>\n",
+              "      <td>67</td>\n",
+              "      <td>9</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>34</td>\n",
+              "      <td>65</td>\n",
+              "      <td>24</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>4</td>\n",
+              "      <td>73</td>\n",
+              "      <td>57</td>\n",
+              "      <td>75</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "    y  X1  X2  X3\n",
+              "0  56  81  41  94\n",
+              "1  61  69  50  30\n",
+              "2  25  27  67   9\n",
+              "3  34  65  24   2\n",
+              "4   4  73  57  75"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "PrM4fDO3xUjf",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "y_train = df_train[0]\n",
+        "y_test = df_test[0]\n",
+        "X_train = df_train.drop(0, axis=1)\n",
+        "X_test = df_test.drop(0, axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "rnOBZM8GxXr_",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "y_train, y_test = df.loc[:80, 'y'], df.loc[80:, 'y']\n",
+        "X_train, X_test = df.loc[:80, ['X1', 'X2', 'X3']], df.loc[80:, ['X1', 'X2', 'X3']]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BP0FBk8Syfpg",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# create dataset for lightgbm\n",
+        "lgb_train = lgb.Dataset(X_train, y_train)\n",
+        "lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2h0GmTXzyjMZ",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# specify your configurations as a dict\n",
+        "params = {\n",
+        "    'boosting_type': 'gbdt',\n",
+        "    'objective': 'regression',\n",
+        "    'metric': {'l2', 'l1'},\n",
+        "    'num_leaves': 31,\n",
+        "    'learning_rate': 0.05,\n",
+        "    'feature_fraction': 0.9,\n",
+        "    'bagging_fraction': 0.8,\n",
+        "    'bagging_freq': 5,\n",
+        "    'verbose': 0\n",
+        "}"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jcQ17FYswKUD",
+        "colab_type": "code",
+        "outputId": "d1086f35-b126-4efb-82c1-819f0bd2cda8",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 235
+        }
+      },
+      "source": [
+        "print('Starting training...')\n",
+        "# train\n",
+        "gbm = lgb.train(params,\n",
+        "                lgb_train,\n",
+        "                num_boost_round=20,\n",
+        "                valid_sets=lgb_eval,\n",
+        "                early_stopping_rounds=5)\n",
+        "\n",
+        "print('Saving model...')\n",
+        "# save model to file\n",
+        "gbm.save_model('model.txt')\n",
+        "\n",
+        "print('Starting predicting...')\n",
+        "# predict\n",
+        "y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)\n",
+        "# eval\n",
+        "print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Starting training...\n",
+            "[1]\tvalid_0's l2: 631.201\tvalid_0's l1: 19.7541\n",
+            "Training until validation scores don't improve for 5 rounds.\n",
+            "[2]\tvalid_0's l2: 634.602\tvalid_0's l1: 19.8091\n",
+            "[3]\tvalid_0's l2: 638.174\tvalid_0's l1: 19.8614\n",
+            "[4]\tvalid_0's l2: 637.171\tvalid_0's l1: 19.9141\n",
+            "[5]\tvalid_0's l2: 636.317\tvalid_0's l1: 19.9641\n",
+            "[6]\tvalid_0's l2: 634.499\tvalid_0's l1: 19.931\n",
+            "Early stopping, best iteration is:\n",
+            "[1]\tvalid_0's l2: 631.201\tvalid_0's l1: 19.7541\n",
+            "Saving model...\n",
+            "Starting predicting...\n",
+            "The rmse of prediction is: 25.123705584627015\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "UQ42Mxy4xw7g",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}