{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "\n", "from sklearn.model_selection import train_test_split\n", "import pandas as pd\n", "import numpy\n", "\n", "from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder\n", "\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.linear_model import LinearRegression\n", "from catboost import CatBoostRegressor\n", "\n", "from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 547701 entries, 313199 to 690900\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 price 547701 non-null int64 \n", " 1 date 547701 non-null object \n", " 2 time 547701 non-null object \n", " 3 geo_lat 547701 non-null float32 \n", " 4 geo_lon 547701 non-null float32 \n", " 5 region 547701 non-null category\n", " 6 building_type 547701 non-null category\n", " 7 level 547701 non-null int8 \n", " 8 levels 547701 non-null int8 \n", " 9 rooms 547701 non-null int8 \n", " 10 area 547701 non-null float16 \n", " 11 kitchen_area 547701 non-null float16 \n", " 12 object_type 547701 non-null category\n", "dtypes: category(3), float16(2), float32(2), int64(1), int8(3), object(2)\n", "memory usage: 26.1+ MB\n" ] } ], "source": [ "df = pd.read_pickle('data/clean_data.pkl').sample(frac=0.1, random_state = 2) # Уменьшаем размер чтобы модель быстрее обучалась на лекции\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = df.rename(columns={'price': 'target'})\n", "df = df.drop(columns=['date', 'time'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
targetgeo_latgeo_lonregionbuilding_typelevellevelsroomsareakitchen_areaobject_type
313199499999959.95845830.21553026613813136.000007.1992191
2437764215000045.07267441.9369962900355152.0000015.0000001
4949072860000059.93935830.437069266121122137.093759.7968751
4109465510000059.74047930.5695402661129374.500009.5000001
2187702347000056.32406244.005390287121126254.000008.00000011
....................................
5188085230000057.75060340.8664674189323138.0000011.0000001
4542014670000055.91172037.73741981325266.375008.0000001
3306731385000051.70451039.273037207221018389.5000014.2031251
520293187888554.94357782.95886296541110387.7500012.92187511
690900409735059.88270230.45124626612623136.0937516.20312511
\n", "

547701 rows × 11 columns

\n", "
" ], "text/plain": [ " target geo_lat geo_lon region building_type level levels \\\n", "313199 4999999 59.958458 30.215530 2661 3 8 13 \n", "2437764 2150000 45.072674 41.936996 2900 3 5 5 \n", "4949072 8600000 59.939358 30.437069 2661 2 11 22 \n", "4109465 5100000 59.740479 30.569540 2661 1 2 9 \n", "2187702 3470000 56.324062 44.005390 2871 2 11 26 \n", "... ... ... ... ... ... ... ... \n", "5188085 2300000 57.750603 40.866467 4189 3 2 3 \n", "4542014 6700000 55.911720 37.737419 81 3 2 5 \n", "3306731 3850000 51.704510 39.273037 2072 2 10 18 \n", "520293 1878885 54.943577 82.958862 9654 1 1 10 \n", "690900 4097350 59.882702 30.451246 2661 2 6 23 \n", "\n", " rooms area kitchen_area object_type \n", "313199 1 36.00000 7.199219 1 \n", "2437764 1 52.00000 15.000000 1 \n", "4949072 1 37.09375 9.796875 1 \n", "4109465 3 74.50000 9.500000 1 \n", "2187702 2 54.00000 8.000000 11 \n", "... ... ... ... ... \n", "5188085 1 38.00000 11.000000 1 \n", "4542014 2 66.37500 8.000000 1 \n", "3306731 3 89.50000 14.203125 1 \n", "520293 3 87.75000 12.921875 11 \n", "690900 1 36.09375 16.203125 11 \n", "\n", "[547701 rows x 11 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(df.drop('target', axis=1), df['target'], test_size=0.25, random_state=2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "geo_lat 56.327686\n", "geo_lon 43.928062\n", "region 2871.000000\n", "building_type 1.000000\n", "level 8.000000\n", "levels 10.000000\n", "rooms 2.000000\n", "area 56.000000\n", "kitchen_area 8.500000\n", "object_type 1.000000\n", "Name: 879487, dtype: float64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.iloc[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['region', 'building_type', 'object_type']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat_features = X_train.select_dtypes(include=['category','object']).columns.to_list()\n", "cat_features" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num_features = X_train.select_dtypes(include=['number']).columns.to_list()\n", "num_features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://scikit-learn.org/stable/api/sklearn.preprocessing.html - разные способы кодирования и скалирования" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "s_scaler = StandardScaler()\n", "l_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999) # unknown_value нужно выбирать с умом\n", "regressor = CatBoostRegressor()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Column transformer" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Для удобной работы со столбцами\n", "preprocessor = ColumnTransformer(\n", " transformers=[\n", " ('num', s_scaler, num_features), # преобразования для числовых признаков\n", " ('cat', l_encoder, cat_features), # преобразования для категориальных признаков\n", " ],\n", " remainder='drop' ) # Удаляем столбцы, которые не затронуты преобразования" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Learning rate set to 0.105957\n", "0:\tlearn: 22102085.4544239\ttotal: 67.8ms\tremaining: 1m 7s\n", "1:\tlearn: 21994630.3403412\ttotal: 87.1ms\tremaining: 43.4s\n", "2:\tlearn: 21906687.8196027\ttotal: 105ms\tremaining: 34.8s\n", "3:\tlearn: 21834890.5050552\ttotal: 124ms\tremaining: 30.9s\n", "4:\tlearn: 21770820.6751194\ttotal: 143ms\tremaining: 28.5s\n", "5:\tlearn: 21719543.9330108\ttotal: 163ms\tremaining: 27s\n", "6:\tlearn: 21676510.1666598\ttotal: 183ms\tremaining: 25.9s\n", "7:\tlearn: 21641355.8079016\ttotal: 202ms\tremaining: 25.1s\n", "8:\tlearn: 21612289.0494648\ttotal: 223ms\tremaining: 24.5s\n", "9:\tlearn: 21583808.7061085\ttotal: 242ms\tremaining: 24s\n", "10:\tlearn: 21559288.9618040\ttotal: 266ms\tremaining: 23.9s\n", "11:\tlearn: 21537048.9920531\ttotal: 286ms\tremaining: 23.5s\n", "12:\tlearn: 21444526.1629239\ttotal: 306ms\tremaining: 23.3s\n", "13:\tlearn: 21426349.3370315\ttotal: 324ms\tremaining: 22.8s\n", "14:\tlearn: 21411901.2338278\ttotal: 344ms\tremaining: 22.6s\n", "15:\tlearn: 21399279.8023459\ttotal: 360ms\tremaining: 22.1s\n", "16:\tlearn: 21299421.1434822\ttotal: 382ms\tremaining: 22.1s\n", "17:\tlearn: 21288560.2595435\ttotal: 400ms\tremaining: 21.8s\n", "18:\tlearn: 21277368.8876877\ttotal: 421ms\tremaining: 21.8s\n", "19:\tlearn: 21229205.2938305\ttotal: 441ms\tremaining: 21.6s\n", "20:\tlearn: 21220238.4828158\ttotal: 462ms\tremaining: 21.5s\n", "21:\tlearn: 21212849.7885410\ttotal: 485ms\tremaining: 21.6s\n", "22:\tlearn: 21205304.4132821\ttotal: 507ms\tremaining: 21.5s\n", "23:\tlearn: 21198813.8508479\ttotal: 534ms\tremaining: 21.7s\n", "24:\tlearn: 21184627.2326983\ttotal: 550ms\tremaining: 21.4s\n", "25:\tlearn: 21172748.3410688\ttotal: 569ms\tremaining: 21.3s\n", "26:\tlearn: 21103305.4766520\ttotal: 582ms\tremaining: 21s\n", "27:\tlearn: 21096636.4037750\ttotal: 594ms\tremaining: 20.6s\n", "28:\tlearn: 21082202.2892557\ttotal: 607ms\tremaining: 20.3s\n", "29:\tlearn: 21077185.5274954\ttotal: 619ms\tremaining: 20s\n", "30:\tlearn: 21071613.1691098\ttotal: 632ms\tremaining: 19.7s\n", "31:\tlearn: 21067654.8502386\ttotal: 644ms\tremaining: 19.5s\n", "32:\tlearn: 21053425.8947843\ttotal: 659ms\tremaining: 19.3s\n", "33:\tlearn: 21038024.0563140\ttotal: 670ms\tremaining: 19s\n", "34:\tlearn: 20961357.9814339\ttotal: 682ms\tremaining: 18.8s\n", "35:\tlearn: 20946027.4479676\ttotal: 695ms\tremaining: 18.6s\n", "36:\tlearn: 20866676.4104322\ttotal: 707ms\tremaining: 18.4s\n", "37:\tlearn: 20863078.3182449\ttotal: 717ms\tremaining: 18.1s\n", "38:\tlearn: 20859910.3609500\ttotal: 728ms\tremaining: 17.9s\n", "39:\tlearn: 20853462.2703730\ttotal: 739ms\tremaining: 17.7s\n", "40:\tlearn: 20851610.3209036\ttotal: 749ms\tremaining: 17.5s\n", "41:\tlearn: 20847674.0809285\ttotal: 760ms\tremaining: 17.3s\n", "42:\tlearn: 20845384.9263391\ttotal: 772ms\tremaining: 17.2s\n", "43:\tlearn: 20843256.7428906\ttotal: 784ms\tremaining: 17s\n", "44:\tlearn: 20841580.8594834\ttotal: 797ms\tremaining: 16.9s\n", "45:\tlearn: 20819301.2718345\ttotal: 810ms\tremaining: 16.8s\n", "46:\tlearn: 20812094.5913582\ttotal: 823ms\tremaining: 16.7s\n", "47:\tlearn: 20808932.0866915\ttotal: 834ms\tremaining: 16.5s\n", "48:\tlearn: 20763172.9200413\ttotal: 851ms\tremaining: 16.5s\n", "49:\tlearn: 20729084.6574594\ttotal: 863ms\tremaining: 16.4s\n", "50:\tlearn: 20721820.5403996\ttotal: 876ms\tremaining: 16.3s\n", "51:\tlearn: 20715664.3732084\ttotal: 887ms\tremaining: 16.2s\n", "52:\tlearn: 20712658.7025295\ttotal: 897ms\tremaining: 16s\n", "53:\tlearn: 20704254.1704930\ttotal: 908ms\tremaining: 15.9s\n", "54:\tlearn: 20690967.9220470\ttotal: 919ms\tremaining: 15.8s\n", "55:\tlearn: 20686546.8978473\ttotal: 929ms\tremaining: 15.7s\n", "56:\tlearn: 20682362.4255777\ttotal: 941ms\tremaining: 15.6s\n", "57:\tlearn: 20680744.8113421\ttotal: 951ms\tremaining: 15.4s\n", "58:\tlearn: 20677926.0871267\ttotal: 962ms\tremaining: 15.3s\n", "59:\tlearn: 20658478.3098789\ttotal: 974ms\tremaining: 15.3s\n", "60:\tlearn: 20641964.4472246\ttotal: 987ms\tremaining: 15.2s\n", "61:\tlearn: 20639551.4216654\ttotal: 999ms\tremaining: 15.1s\n", "62:\tlearn: 20638344.8919341\ttotal: 1.01s\tremaining: 15s\n", "63:\tlearn: 20635991.3894815\ttotal: 1.02s\tremaining: 15s\n", "64:\tlearn: 20595846.8116432\ttotal: 1.03s\tremaining: 14.9s\n", "65:\tlearn: 20592198.9483046\ttotal: 1.05s\tremaining: 14.8s\n", "66:\tlearn: 20565316.0060422\ttotal: 1.06s\tremaining: 14.8s\n", "67:\tlearn: 20563073.6783517\ttotal: 1.07s\tremaining: 14.7s\n", "68:\tlearn: 20553650.4649650\ttotal: 1.08s\tremaining: 14.6s\n", "69:\tlearn: 20545510.8230653\ttotal: 1.09s\tremaining: 14.5s\n", "70:\tlearn: 20544114.9272186\ttotal: 1.1s\tremaining: 14.5s\n", "71:\tlearn: 20541689.8802451\ttotal: 1.11s\tremaining: 14.4s\n", "72:\tlearn: 20538792.7074671\ttotal: 1.13s\tremaining: 14.3s\n", "73:\tlearn: 20517134.0713648\ttotal: 1.14s\tremaining: 14.3s\n", "74:\tlearn: 20510477.9089445\ttotal: 1.15s\tremaining: 14.2s\n", "75:\tlearn: 20494649.9067257\ttotal: 1.17s\tremaining: 14.2s\n", "76:\tlearn: 20490851.9879851\ttotal: 1.18s\tremaining: 14.1s\n", "77:\tlearn: 20488939.9621874\ttotal: 1.19s\tremaining: 14.1s\n", "78:\tlearn: 20432532.8171644\ttotal: 1.2s\tremaining: 14s\n", "79:\tlearn: 20428397.7107150\ttotal: 1.22s\tremaining: 14s\n", "80:\tlearn: 20421638.7734419\ttotal: 1.23s\tremaining: 13.9s\n", "81:\tlearn: 20421021.7388457\ttotal: 1.24s\tremaining: 13.9s\n", "82:\tlearn: 20406404.2376730\ttotal: 1.26s\tremaining: 13.9s\n", "83:\tlearn: 20021682.5008511\ttotal: 1.28s\tremaining: 13.9s\n", "84:\tlearn: 20018322.6048631\ttotal: 1.3s\tremaining: 14s\n", "85:\tlearn: 20004841.3476490\ttotal: 1.31s\tremaining: 14s\n", "86:\tlearn: 19985666.0092745\ttotal: 1.33s\tremaining: 13.9s\n", "87:\tlearn: 19983778.1947243\ttotal: 1.34s\tremaining: 13.9s\n", "88:\tlearn: 19982460.1107908\ttotal: 1.36s\tremaining: 13.9s\n", "89:\tlearn: 19979128.5494690\ttotal: 1.37s\tremaining: 13.8s\n", "90:\tlearn: 19974094.9707357\ttotal: 1.38s\tremaining: 13.8s\n", "91:\tlearn: 19972006.9431031\ttotal: 1.4s\tremaining: 13.8s\n", "92:\tlearn: 19970846.2845466\ttotal: 1.41s\tremaining: 13.7s\n", "93:\tlearn: 19968858.0073042\ttotal: 1.42s\tremaining: 13.7s\n", "94:\tlearn: 19921720.6252972\ttotal: 1.44s\tremaining: 13.7s\n", "95:\tlearn: 19916568.5707839\ttotal: 1.45s\tremaining: 13.6s\n", "96:\tlearn: 19913228.5247508\ttotal: 1.46s\tremaining: 13.6s\n", "97:\tlearn: 19901982.4625895\ttotal: 1.48s\tremaining: 13.6s\n", "98:\tlearn: 19836107.7247888\ttotal: 1.49s\tremaining: 13.6s\n", "99:\tlearn: 19834724.7455166\ttotal: 1.5s\tremaining: 13.5s\n", "100:\tlearn: 19832811.9745741\ttotal: 1.52s\tremaining: 13.5s\n", "101:\tlearn: 19818491.2851567\ttotal: 1.53s\tremaining: 13.5s\n", "102:\tlearn: 19815779.3719026\ttotal: 1.55s\tremaining: 13.5s\n", "103:\tlearn: 19814215.0962787\ttotal: 1.56s\tremaining: 13.5s\n", "104:\tlearn: 19782274.6892663\ttotal: 1.57s\tremaining: 13.4s\n", "105:\tlearn: 19777945.6507456\ttotal: 1.59s\tremaining: 13.4s\n", "106:\tlearn: 19770488.9772154\ttotal: 1.6s\tremaining: 13.4s\n", "107:\tlearn: 19769758.0023174\ttotal: 1.61s\tremaining: 13.3s\n", "108:\tlearn: 19767541.9303017\ttotal: 1.63s\tremaining: 13.3s\n", "109:\tlearn: 19766992.0126300\ttotal: 1.64s\tremaining: 13.2s\n", "110:\tlearn: 19765032.8837298\ttotal: 1.65s\tremaining: 13.2s\n", "111:\tlearn: 19705204.6771073\ttotal: 1.66s\tremaining: 13.2s\n", "112:\tlearn: 19703649.0394020\ttotal: 1.68s\tremaining: 13.2s\n", "113:\tlearn: 19693038.0415419\ttotal: 1.69s\tremaining: 13.1s\n", "114:\tlearn: 19690294.4304072\ttotal: 1.7s\tremaining: 13.1s\n", "115:\tlearn: 19686529.4709294\ttotal: 1.71s\tremaining: 13.1s\n", "116:\tlearn: 19684887.8267152\ttotal: 1.72s\tremaining: 13s\n", "117:\tlearn: 19369465.6970761\ttotal: 1.74s\tremaining: 13s\n", "118:\tlearn: 19368868.0416380\ttotal: 1.75s\tremaining: 13s\n", "119:\tlearn: 19334590.5868513\ttotal: 1.77s\tremaining: 13s\n", "120:\tlearn: 19332200.0832597\ttotal: 1.78s\tremaining: 12.9s\n", "121:\tlearn: 19320130.9244745\ttotal: 1.79s\tremaining: 12.9s\n", "122:\tlearn: 19318220.9448337\ttotal: 1.81s\tremaining: 12.9s\n", "123:\tlearn: 18941546.2095714\ttotal: 1.82s\tremaining: 12.9s\n", "124:\tlearn: 18941056.2836883\ttotal: 1.84s\tremaining: 12.9s\n", "125:\tlearn: 18939637.9662976\ttotal: 1.85s\tremaining: 12.8s\n", "126:\tlearn: 18938172.4621610\ttotal: 1.86s\tremaining: 12.8s\n", "127:\tlearn: 18935889.3619752\ttotal: 1.88s\tremaining: 12.8s\n", "128:\tlearn: 18928784.7025346\ttotal: 1.9s\tremaining: 12.8s\n", "129:\tlearn: 18926981.6933453\ttotal: 1.91s\tremaining: 12.8s\n", "130:\tlearn: 18830178.3173696\ttotal: 1.93s\tremaining: 12.8s\n", "131:\tlearn: 18828102.3918672\ttotal: 1.94s\tremaining: 12.8s\n", "132:\tlearn: 18825755.9987015\ttotal: 1.95s\tremaining: 12.7s\n", "133:\tlearn: 18793049.5462155\ttotal: 1.97s\tremaining: 12.7s\n", "134:\tlearn: 18791452.8400128\ttotal: 1.98s\tremaining: 12.7s\n", "135:\tlearn: 18484591.4924421\ttotal: 1.99s\tremaining: 12.7s\n", "136:\tlearn: 18482373.1605741\ttotal: 2s\tremaining: 12.6s\n", "137:\tlearn: 18414571.2543321\ttotal: 2.02s\tremaining: 12.6s\n", "138:\tlearn: 18412913.4160574\ttotal: 2.03s\tremaining: 12.6s\n", "139:\tlearn: 18409214.1141794\ttotal: 2.04s\tremaining: 12.6s\n", "140:\tlearn: 18395140.1008086\ttotal: 2.06s\tremaining: 12.5s\n", "141:\tlearn: 18390939.2248151\ttotal: 2.07s\tremaining: 12.5s\n", "142:\tlearn: 18377925.8298573\ttotal: 2.08s\tremaining: 12.5s\n", "143:\tlearn: 18371775.1291009\ttotal: 2.09s\tremaining: 12.4s\n", "144:\tlearn: 18370251.1042623\ttotal: 2.1s\tremaining: 12.4s\n", "145:\tlearn: 18332707.1499911\ttotal: 2.12s\tremaining: 12.4s\n", "146:\tlearn: 18330693.2665230\ttotal: 2.13s\tremaining: 12.3s\n", "147:\tlearn: 18329408.2952767\ttotal: 2.14s\tremaining: 12.3s\n", "148:\tlearn: 18321783.9892793\ttotal: 2.15s\tremaining: 12.3s\n", "149:\tlearn: 18321270.4958267\ttotal: 2.16s\tremaining: 12.2s\n", "150:\tlearn: 18310325.1681801\ttotal: 2.17s\tremaining: 12.2s\n", "151:\tlearn: 18299986.9413893\ttotal: 2.18s\tremaining: 12.2s\n", "152:\tlearn: 18290217.7479708\ttotal: 2.2s\tremaining: 12.2s\n", "153:\tlearn: 18280975.8537910\ttotal: 2.21s\tremaining: 12.1s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "154:\tlearn: 18272215.6509019\ttotal: 2.22s\tremaining: 12.1s\n", "155:\tlearn: 18263878.2178516\ttotal: 2.23s\tremaining: 12.1s\n", "156:\tlearn: 18256009.4859248\ttotal: 2.25s\tremaining: 12.1s\n", "157:\tlearn: 18248529.7799856\ttotal: 2.26s\tremaining: 12.1s\n", "158:\tlearn: 18241388.0845094\ttotal: 2.28s\tremaining: 12s\n", "159:\tlearn: 18234700.5127085\ttotal: 2.29s\tremaining: 12s\n", "160:\tlearn: 18228095.5839778\ttotal: 2.3s\tremaining: 12s\n", "161:\tlearn: 18222087.5153066\ttotal: 2.31s\tremaining: 12s\n", "162:\tlearn: 18215963.2971261\ttotal: 2.33s\tremaining: 12s\n", "163:\tlearn: 18210272.5545163\ttotal: 2.34s\tremaining: 11.9s\n", "164:\tlearn: 18208920.7703569\ttotal: 2.35s\tremaining: 11.9s\n", "165:\tlearn: 18204704.7145239\ttotal: 2.37s\tremaining: 11.9s\n", "166:\tlearn: 18187135.8260335\ttotal: 2.38s\tremaining: 11.9s\n", "167:\tlearn: 18183064.7135734\ttotal: 2.39s\tremaining: 11.9s\n", "168:\tlearn: 18177887.1670860\ttotal: 2.41s\tremaining: 11.8s\n", "169:\tlearn: 18173022.2110313\ttotal: 2.42s\tremaining: 11.8s\n", "170:\tlearn: 18168573.4167384\ttotal: 2.44s\tremaining: 11.8s\n", "171:\tlearn: 18165036.1971623\ttotal: 2.45s\tremaining: 11.8s\n", "172:\tlearn: 18161841.9822954\ttotal: 2.46s\tremaining: 11.8s\n", "173:\tlearn: 18129860.2061383\ttotal: 2.48s\tremaining: 11.8s\n", "174:\tlearn: 18127931.5161091\ttotal: 2.49s\tremaining: 11.7s\n", "175:\tlearn: 18124997.7778403\ttotal: 2.5s\tremaining: 11.7s\n", "176:\tlearn: 18122975.2084322\ttotal: 2.51s\tremaining: 11.7s\n", "177:\tlearn: 18120855.5325733\ttotal: 2.53s\tremaining: 11.7s\n", "178:\tlearn: 18117907.6019994\ttotal: 2.54s\tremaining: 11.7s\n", "179:\tlearn: 18116674.0864027\ttotal: 2.56s\tremaining: 11.7s\n", "180:\tlearn: 18114086.9287957\ttotal: 2.57s\tremaining: 11.6s\n", "181:\tlearn: 18087100.0827926\ttotal: 2.59s\tremaining: 11.6s\n", "182:\tlearn: 18071944.2213105\ttotal: 2.6s\tremaining: 11.6s\n", "183:\tlearn: 17952691.4261792\ttotal: 2.61s\tremaining: 11.6s\n", "184:\tlearn: 17950298.6715866\ttotal: 2.63s\tremaining: 11.6s\n", "185:\tlearn: 17949031.8169417\ttotal: 2.64s\tremaining: 11.6s\n", "186:\tlearn: 17937943.5186847\ttotal: 2.66s\tremaining: 11.5s\n", "187:\tlearn: 17937014.8027177\ttotal: 2.67s\tremaining: 11.5s\n", "188:\tlearn: 17936493.5945773\ttotal: 2.68s\tremaining: 11.5s\n", "189:\tlearn: 17935386.0093649\ttotal: 2.69s\tremaining: 11.5s\n", "190:\tlearn: 17934203.8644718\ttotal: 2.7s\tremaining: 11.5s\n", "191:\tlearn: 17928336.5184065\ttotal: 2.72s\tremaining: 11.4s\n", "192:\tlearn: 17925443.1940046\ttotal: 2.73s\tremaining: 11.4s\n", "193:\tlearn: 17924535.5533845\ttotal: 2.75s\tremaining: 11.4s\n", "194:\tlearn: 17917225.8802206\ttotal: 2.76s\tremaining: 11.4s\n", "195:\tlearn: 17904437.4148190\ttotal: 2.77s\tremaining: 11.4s\n", "196:\tlearn: 17902915.3467923\ttotal: 2.79s\tremaining: 11.4s\n", "197:\tlearn: 17900924.7512305\ttotal: 2.8s\tremaining: 11.3s\n", "198:\tlearn: 17899976.2262471\ttotal: 2.81s\tremaining: 11.3s\n", "199:\tlearn: 17896573.5977064\ttotal: 2.83s\tremaining: 11.3s\n", "200:\tlearn: 17894480.1301072\ttotal: 2.84s\tremaining: 11.3s\n", "201:\tlearn: 17891369.5414483\ttotal: 2.85s\tremaining: 11.3s\n", "202:\tlearn: 17853776.3679239\ttotal: 2.86s\tremaining: 11.2s\n", "203:\tlearn: 17851457.0828592\ttotal: 2.88s\tremaining: 11.2s\n", "204:\tlearn: 17849621.6767992\ttotal: 2.89s\tremaining: 11.2s\n", "205:\tlearn: 17848392.5509482\ttotal: 2.9s\tremaining: 11.2s\n", "206:\tlearn: 17845597.2428619\ttotal: 2.91s\tremaining: 11.1s\n", "207:\tlearn: 17841951.2763157\ttotal: 2.92s\tremaining: 11.1s\n", "208:\tlearn: 17829332.8912371\ttotal: 2.93s\tremaining: 11.1s\n", "209:\tlearn: 17825984.1152963\ttotal: 2.95s\tremaining: 11.1s\n", "210:\tlearn: 17821360.2498463\ttotal: 2.96s\tremaining: 11.1s\n", "211:\tlearn: 17816041.9633158\ttotal: 2.97s\tremaining: 11s\n", "212:\tlearn: 17815089.0154101\ttotal: 2.98s\tremaining: 11s\n", "213:\tlearn: 17812260.4222221\ttotal: 2.99s\tremaining: 11s\n", "214:\tlearn: 17811642.1796060\ttotal: 3s\tremaining: 11s\n", "215:\tlearn: 17811104.8656724\ttotal: 3.01s\tremaining: 10.9s\n", "216:\tlearn: 17810456.2984828\ttotal: 3.02s\tremaining: 10.9s\n", "217:\tlearn: 17809982.4909707\ttotal: 3.04s\tremaining: 10.9s\n", "218:\tlearn: 17809543.7803178\ttotal: 3.05s\tremaining: 10.9s\n", "219:\tlearn: 17809136.8325569\ttotal: 3.06s\tremaining: 10.9s\n", "220:\tlearn: 17808758.7315278\ttotal: 3.07s\tremaining: 10.8s\n", "221:\tlearn: 17808406.9145618\ttotal: 3.09s\tremaining: 10.8s\n", "222:\tlearn: 17806754.0179687\ttotal: 3.1s\tremaining: 10.8s\n", "223:\tlearn: 17806262.4885592\ttotal: 3.11s\tremaining: 10.8s\n", "224:\tlearn: 17805319.3776209\ttotal: 3.13s\tremaining: 10.8s\n", "225:\tlearn: 17805011.6013482\ttotal: 3.14s\tremaining: 10.7s\n", "226:\tlearn: 17804724.0362310\ttotal: 3.15s\tremaining: 10.7s\n", "227:\tlearn: 17793961.7547867\ttotal: 3.16s\tremaining: 10.7s\n", "228:\tlearn: 17793044.3976904\ttotal: 3.18s\tremaining: 10.7s\n", "229:\tlearn: 17791876.3449986\ttotal: 3.19s\tremaining: 10.7s\n", "230:\tlearn: 17770039.2877531\ttotal: 3.21s\tremaining: 10.7s\n", "231:\tlearn: 17769759.3423197\ttotal: 3.22s\tremaining: 10.7s\n", "232:\tlearn: 17769498.1846872\ttotal: 3.23s\tremaining: 10.6s\n", "233:\tlearn: 17769106.6516586\ttotal: 3.24s\tremaining: 10.6s\n", "234:\tlearn: 17765866.7512613\ttotal: 3.25s\tremaining: 10.6s\n", "235:\tlearn: 17763818.0836765\ttotal: 3.27s\tremaining: 10.6s\n", "236:\tlearn: 17761637.5687877\ttotal: 3.29s\tremaining: 10.6s\n", "237:\tlearn: 17755293.6166299\ttotal: 3.3s\tremaining: 10.6s\n", "238:\tlearn: 17749597.6285121\ttotal: 3.32s\tremaining: 10.6s\n", "239:\tlearn: 17731193.4780969\ttotal: 3.33s\tremaining: 10.6s\n", "240:\tlearn: 17730941.1840209\ttotal: 3.35s\tremaining: 10.5s\n", "241:\tlearn: 17730651.4109866\ttotal: 3.36s\tremaining: 10.5s\n", "242:\tlearn: 17729951.1772204\ttotal: 3.38s\tremaining: 10.5s\n", "243:\tlearn: 17725674.6169533\ttotal: 3.39s\tremaining: 10.5s\n", "244:\tlearn: 17724397.3837970\ttotal: 3.41s\tremaining: 10.5s\n", "245:\tlearn: 17723085.9667878\ttotal: 3.42s\tremaining: 10.5s\n", "246:\tlearn: 17716068.0643361\ttotal: 3.44s\tremaining: 10.5s\n", "247:\tlearn: 17685621.7941613\ttotal: 3.45s\tremaining: 10.5s\n", "248:\tlearn: 17684272.6716694\ttotal: 3.46s\tremaining: 10.4s\n", "249:\tlearn: 17683390.0888279\ttotal: 3.48s\tremaining: 10.4s\n", "250:\tlearn: 17683052.4845925\ttotal: 3.49s\tremaining: 10.4s\n", "251:\tlearn: 17678624.0868252\ttotal: 3.51s\tremaining: 10.4s\n", "252:\tlearn: 17665657.9640584\ttotal: 3.52s\tremaining: 10.4s\n", "253:\tlearn: 17664624.5487132\ttotal: 3.54s\tremaining: 10.4s\n", "254:\tlearn: 17663925.0646167\ttotal: 3.55s\tremaining: 10.4s\n", "255:\tlearn: 17653813.6196925\ttotal: 3.56s\tremaining: 10.4s\n", "256:\tlearn: 17636698.5157040\ttotal: 3.58s\tremaining: 10.3s\n", "257:\tlearn: 17634671.9750893\ttotal: 3.59s\tremaining: 10.3s\n", "258:\tlearn: 17633930.6422340\ttotal: 3.61s\tremaining: 10.3s\n", "259:\tlearn: 17633026.0861171\ttotal: 3.62s\tremaining: 10.3s\n", "260:\tlearn: 17632489.1254856\ttotal: 3.63s\tremaining: 10.3s\n", "261:\tlearn: 17628474.9187765\ttotal: 3.65s\tremaining: 10.3s\n", "262:\tlearn: 17627320.9817928\ttotal: 3.66s\tremaining: 10.3s\n", "263:\tlearn: 17626116.4772868\ttotal: 3.67s\tremaining: 10.2s\n", "264:\tlearn: 17623329.0754817\ttotal: 3.69s\tremaining: 10.2s\n", "265:\tlearn: 17622243.1901613\ttotal: 3.7s\tremaining: 10.2s\n", "266:\tlearn: 17550321.8250878\ttotal: 3.71s\tremaining: 10.2s\n", "267:\tlearn: 17549755.3651767\ttotal: 3.73s\tremaining: 10.2s\n", "268:\tlearn: 17545607.1212430\ttotal: 3.74s\tremaining: 10.2s\n", "269:\tlearn: 17541242.2629221\ttotal: 3.75s\tremaining: 10.1s\n", "270:\tlearn: 17499407.7313592\ttotal: 3.76s\tremaining: 10.1s\n", "271:\tlearn: 17499145.8282321\ttotal: 3.77s\tremaining: 10.1s\n", "272:\tlearn: 17498934.5535116\ttotal: 3.78s\tremaining: 10.1s\n", "273:\tlearn: 17498347.2546318\ttotal: 3.79s\tremaining: 10.1s\n", "274:\tlearn: 17498149.7061684\ttotal: 3.8s\tremaining: 10s\n", "275:\tlearn: 17497860.3337909\ttotal: 3.81s\tremaining: 10s\n", "276:\tlearn: 17497134.2565818\ttotal: 3.83s\tremaining: 9.98s\n", "277:\tlearn: 17496943.1446578\ttotal: 3.84s\tremaining: 9.96s\n", "278:\tlearn: 17495461.7397646\ttotal: 3.85s\tremaining: 9.95s\n", "279:\tlearn: 17492860.8467310\ttotal: 3.86s\tremaining: 9.94s\n", "280:\tlearn: 17492256.7750564\ttotal: 3.88s\tremaining: 9.92s\n", "281:\tlearn: 17491315.8920024\ttotal: 3.89s\tremaining: 9.91s\n", "282:\tlearn: 17488802.8492737\ttotal: 3.9s\tremaining: 9.89s\n", "283:\tlearn: 17479802.6541152\ttotal: 3.92s\tremaining: 9.87s\n", "284:\tlearn: 17477169.5331720\ttotal: 3.93s\tremaining: 9.86s\n", "285:\tlearn: 17474743.6190942\ttotal: 3.94s\tremaining: 9.84s\n", "286:\tlearn: 17468342.7955232\ttotal: 3.96s\tremaining: 9.83s\n", "287:\tlearn: 17467579.9985437\ttotal: 3.97s\tremaining: 9.82s\n", "288:\tlearn: 17467009.9684055\ttotal: 3.98s\tremaining: 9.8s\n", "289:\tlearn: 17464125.0260113\ttotal: 4s\tremaining: 9.79s\n", "290:\tlearn: 17463508.0564477\ttotal: 4.01s\tremaining: 9.77s\n", "291:\tlearn: 17453183.2620432\ttotal: 4.02s\tremaining: 9.75s\n", "292:\tlearn: 17452971.0671546\ttotal: 4.03s\tremaining: 9.73s\n", "293:\tlearn: 17452198.5884342\ttotal: 4.05s\tremaining: 9.72s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "294:\tlearn: 17450925.6159031\ttotal: 4.06s\tremaining: 9.7s\n", "295:\tlearn: 17450685.1155343\ttotal: 4.07s\tremaining: 9.68s\n", "296:\tlearn: 17447975.7379237\ttotal: 4.08s\tremaining: 9.66s\n", "297:\tlearn: 17446417.7251561\ttotal: 4.1s\tremaining: 9.65s\n", "298:\tlearn: 17446166.7629704\ttotal: 4.11s\tremaining: 9.63s\n", "299:\tlearn: 17445963.1442260\ttotal: 4.12s\tremaining: 9.61s\n", "300:\tlearn: 17445745.7958927\ttotal: 4.13s\tremaining: 9.58s\n", "301:\tlearn: 17444963.9290154\ttotal: 4.14s\tremaining: 9.57s\n", "302:\tlearn: 17432650.1591210\ttotal: 4.15s\tremaining: 9.55s\n", "303:\tlearn: 17430525.1210288\ttotal: 4.17s\tremaining: 9.54s\n", "304:\tlearn: 17418414.4601453\ttotal: 4.18s\tremaining: 9.52s\n", "305:\tlearn: 17417977.4735651\ttotal: 4.19s\tremaining: 9.5s\n", "306:\tlearn: 17335624.2943914\ttotal: 4.2s\tremaining: 9.49s\n", "307:\tlearn: 17323558.9233681\ttotal: 4.21s\tremaining: 9.47s\n", "308:\tlearn: 17323047.3527617\ttotal: 4.22s\tremaining: 9.45s\n", "309:\tlearn: 17322403.3488620\ttotal: 4.24s\tremaining: 9.43s\n", "310:\tlearn: 17322187.6973801\ttotal: 4.25s\tremaining: 9.41s\n", "311:\tlearn: 17320898.8497406\ttotal: 4.26s\tremaining: 9.4s\n", "312:\tlearn: 17312668.7000429\ttotal: 4.27s\tremaining: 9.38s\n", "313:\tlearn: 17299277.5985403\ttotal: 4.29s\tremaining: 9.36s\n", "314:\tlearn: 17298175.9786240\ttotal: 4.3s\tremaining: 9.35s\n", "315:\tlearn: 17296005.0430765\ttotal: 4.31s\tremaining: 9.33s\n", "316:\tlearn: 17295834.3986842\ttotal: 4.33s\tremaining: 9.32s\n", "317:\tlearn: 17295646.8271436\ttotal: 4.33s\tremaining: 9.3s\n", "318:\tlearn: 17295412.2240763\ttotal: 4.35s\tremaining: 9.28s\n", "319:\tlearn: 17295269.3891063\ttotal: 4.36s\tremaining: 9.26s\n", "320:\tlearn: 17294720.1427139\ttotal: 4.37s\tremaining: 9.24s\n", "321:\tlearn: 17280405.8179874\ttotal: 4.38s\tremaining: 9.22s\n", "322:\tlearn: 17279788.6705542\ttotal: 4.39s\tremaining: 9.2s\n", "323:\tlearn: 17259578.2219214\ttotal: 4.4s\tremaining: 9.19s\n", "324:\tlearn: 17258995.8851109\ttotal: 4.41s\tremaining: 9.16s\n", "325:\tlearn: 17256802.0040208\ttotal: 4.42s\tremaining: 9.15s\n", "326:\tlearn: 17245667.9352932\ttotal: 4.44s\tremaining: 9.13s\n", "327:\tlearn: 17245157.2383849\ttotal: 4.45s\tremaining: 9.12s\n", "328:\tlearn: 17244420.0505767\ttotal: 4.46s\tremaining: 9.11s\n", "329:\tlearn: 17240620.9311856\ttotal: 4.48s\tremaining: 9.09s\n", "330:\tlearn: 17240126.6382259\ttotal: 4.49s\tremaining: 9.07s\n", "331:\tlearn: 17239554.3263042\ttotal: 4.5s\tremaining: 9.06s\n", "332:\tlearn: 17239249.4122676\ttotal: 4.52s\tremaining: 9.05s\n", "333:\tlearn: 17237315.5959603\ttotal: 4.53s\tremaining: 9.04s\n", "334:\tlearn: 17237170.4183008\ttotal: 4.54s\tremaining: 9.02s\n", "335:\tlearn: 17235498.1709182\ttotal: 4.55s\tremaining: 9s\n", "336:\tlearn: 17154286.9322136\ttotal: 4.57s\tremaining: 8.98s\n", "337:\tlearn: 17152860.5403583\ttotal: 4.58s\tremaining: 8.96s\n", "338:\tlearn: 17139897.5803445\ttotal: 4.59s\tremaining: 8.95s\n", "339:\tlearn: 17139685.6194353\ttotal: 4.6s\tremaining: 8.93s\n", "340:\tlearn: 17129406.8909698\ttotal: 4.61s\tremaining: 8.92s\n", "341:\tlearn: 17126386.5318429\ttotal: 4.63s\tremaining: 8.9s\n", "342:\tlearn: 17125338.5826429\ttotal: 4.64s\tremaining: 8.89s\n", "343:\tlearn: 17124937.1764028\ttotal: 4.65s\tremaining: 8.87s\n", "344:\tlearn: 17124773.5128614\ttotal: 4.66s\tremaining: 8.85s\n", "345:\tlearn: 17123822.0085471\ttotal: 4.67s\tremaining: 8.84s\n", "346:\tlearn: 17122604.8415169\ttotal: 4.68s\tremaining: 8.82s\n", "347:\tlearn: 17121767.5370013\ttotal: 4.7s\tremaining: 8.8s\n", "348:\tlearn: 17109471.1428348\ttotal: 4.71s\tremaining: 8.79s\n", "349:\tlearn: 17092688.7777393\ttotal: 4.73s\tremaining: 8.78s\n", "350:\tlearn: 17081854.5539987\ttotal: 4.74s\tremaining: 8.76s\n", "351:\tlearn: 17081117.2220910\ttotal: 4.75s\tremaining: 8.75s\n", "352:\tlearn: 17079431.1991192\ttotal: 4.76s\tremaining: 8.73s\n", "353:\tlearn: 17065749.4676464\ttotal: 4.78s\tremaining: 8.72s\n", "354:\tlearn: 17050839.2238400\ttotal: 4.79s\tremaining: 8.7s\n", "355:\tlearn: 17050106.8831270\ttotal: 4.8s\tremaining: 8.69s\n", "356:\tlearn: 17046033.2332065\ttotal: 4.82s\tremaining: 8.67s\n", "357:\tlearn: 17043704.2415802\ttotal: 4.83s\tremaining: 8.66s\n", "358:\tlearn: 17034226.2631681\ttotal: 4.84s\tremaining: 8.64s\n", "359:\tlearn: 17019515.6806659\ttotal: 4.85s\tremaining: 8.63s\n", "360:\tlearn: 17018472.9763746\ttotal: 4.87s\tremaining: 8.61s\n", "361:\tlearn: 17017909.7121151\ttotal: 4.88s\tremaining: 8.6s\n", "362:\tlearn: 17017463.3942640\ttotal: 4.89s\tremaining: 8.58s\n", "363:\tlearn: 17016467.4317116\ttotal: 4.9s\tremaining: 8.56s\n", "364:\tlearn: 17016320.3746025\ttotal: 4.91s\tremaining: 8.55s\n", "365:\tlearn: 17014043.0108512\ttotal: 4.93s\tremaining: 8.53s\n", "366:\tlearn: 17013536.3710672\ttotal: 4.94s\tremaining: 8.51s\n", "367:\tlearn: 17011993.2014165\ttotal: 4.95s\tremaining: 8.5s\n", "368:\tlearn: 17011849.5641841\ttotal: 4.96s\tremaining: 8.48s\n", "369:\tlearn: 17011403.7126883\ttotal: 4.97s\tremaining: 8.46s\n", "370:\tlearn: 17009763.5741945\ttotal: 4.98s\tremaining: 8.45s\n", "371:\tlearn: 17009382.7519630\ttotal: 4.99s\tremaining: 8.43s\n", "372:\tlearn: 17008464.7915054\ttotal: 5s\tremaining: 8.41s\n", "373:\tlearn: 17008143.8161261\ttotal: 5.01s\tremaining: 8.39s\n", "374:\tlearn: 16996814.2215431\ttotal: 5.03s\tremaining: 8.38s\n", "375:\tlearn: 16996377.3351825\ttotal: 5.04s\tremaining: 8.36s\n", "376:\tlearn: 16996037.5806770\ttotal: 5.05s\tremaining: 8.34s\n", "377:\tlearn: 16991953.6478199\ttotal: 5.06s\tremaining: 8.33s\n", "378:\tlearn: 16961328.6727692\ttotal: 5.08s\tremaining: 8.32s\n", "379:\tlearn: 16957664.4831621\ttotal: 5.09s\tremaining: 8.3s\n", "380:\tlearn: 16956856.4526881\ttotal: 5.1s\tremaining: 8.29s\n", "381:\tlearn: 16947754.5891887\ttotal: 5.12s\tremaining: 8.28s\n", "382:\tlearn: 16937471.3061729\ttotal: 5.13s\tremaining: 8.26s\n", "383:\tlearn: 16910717.2697228\ttotal: 5.14s\tremaining: 8.25s\n", "384:\tlearn: 16883021.8749316\ttotal: 5.15s\tremaining: 8.23s\n", "385:\tlearn: 16874077.6620256\ttotal: 5.17s\tremaining: 8.22s\n", "386:\tlearn: 16859663.0508862\ttotal: 5.18s\tremaining: 8.2s\n", "387:\tlearn: 16843794.6984628\ttotal: 5.19s\tremaining: 8.19s\n", "388:\tlearn: 16843670.2191430\ttotal: 5.2s\tremaining: 8.17s\n", "389:\tlearn: 16833049.2556840\ttotal: 5.21s\tremaining: 8.15s\n", "390:\tlearn: 16821522.4443567\ttotal: 5.23s\tremaining: 8.14s\n", "391:\tlearn: 16818181.1766856\ttotal: 5.24s\tremaining: 8.13s\n", "392:\tlearn: 16817749.5049150\ttotal: 5.25s\tremaining: 8.12s\n", "393:\tlearn: 16817402.3614282\ttotal: 5.27s\tremaining: 8.1s\n", "394:\tlearn: 16815679.7151727\ttotal: 5.28s\tremaining: 8.09s\n", "395:\tlearn: 16810641.8717564\ttotal: 5.29s\tremaining: 8.07s\n", "396:\tlearn: 16810291.1871768\ttotal: 5.3s\tremaining: 8.06s\n", "397:\tlearn: 16808056.2422004\ttotal: 5.32s\tremaining: 8.04s\n", "398:\tlearn: 16807804.2454334\ttotal: 5.33s\tremaining: 8.03s\n", "399:\tlearn: 16799998.1957230\ttotal: 5.34s\tremaining: 8.02s\n", "400:\tlearn: 16799220.2656080\ttotal: 5.36s\tremaining: 8s\n", "401:\tlearn: 16798913.0252067\ttotal: 5.37s\tremaining: 7.98s\n", "402:\tlearn: 16798319.2545577\ttotal: 5.38s\tremaining: 7.97s\n", "403:\tlearn: 16796848.5752647\ttotal: 5.39s\tremaining: 7.95s\n", "404:\tlearn: 16757656.8985529\ttotal: 5.4s\tremaining: 7.93s\n", "405:\tlearn: 16745513.4381725\ttotal: 5.41s\tremaining: 7.92s\n", "406:\tlearn: 16735416.8114581\ttotal: 5.43s\tremaining: 7.91s\n", "407:\tlearn: 16734295.1424370\ttotal: 5.44s\tremaining: 7.89s\n", "408:\tlearn: 16733140.3781664\ttotal: 5.45s\tremaining: 7.88s\n", "409:\tlearn: 16723800.8980695\ttotal: 5.46s\tremaining: 7.86s\n", "410:\tlearn: 16721200.9625357\ttotal: 5.48s\tremaining: 7.85s\n", "411:\tlearn: 16720027.8472987\ttotal: 5.49s\tremaining: 7.83s\n", "412:\tlearn: 16717199.5760035\ttotal: 5.5s\tremaining: 7.82s\n", "413:\tlearn: 16713362.4492616\ttotal: 5.52s\tremaining: 7.81s\n", "414:\tlearn: 16712806.0473182\ttotal: 5.53s\tremaining: 7.8s\n", "415:\tlearn: 16711241.9902750\ttotal: 5.55s\tremaining: 7.79s\n", "416:\tlearn: 16710626.7325455\ttotal: 5.56s\tremaining: 7.77s\n", "417:\tlearn: 16644768.4542531\ttotal: 5.57s\tremaining: 7.75s\n", "418:\tlearn: 16644403.8081224\ttotal: 5.58s\tremaining: 7.74s\n", "419:\tlearn: 16644106.9601552\ttotal: 5.59s\tremaining: 7.72s\n", "420:\tlearn: 16643628.6346956\ttotal: 5.6s\tremaining: 7.71s\n", "421:\tlearn: 16640073.3813320\ttotal: 5.62s\tremaining: 7.69s\n", "422:\tlearn: 16639549.7950808\ttotal: 5.63s\tremaining: 7.68s\n", "423:\tlearn: 16639069.1006878\ttotal: 5.64s\tremaining: 7.66s\n", "424:\tlearn: 16638481.2382327\ttotal: 5.65s\tremaining: 7.65s\n", "425:\tlearn: 16638208.9073863\ttotal: 5.66s\tremaining: 7.63s\n", "426:\tlearn: 16609090.9227109\ttotal: 5.67s\tremaining: 7.61s\n", "427:\tlearn: 16607897.8537223\ttotal: 5.69s\tremaining: 7.6s\n", "428:\tlearn: 16607613.0069443\ttotal: 5.7s\tremaining: 7.58s\n", "429:\tlearn: 16603866.7848843\ttotal: 5.71s\tremaining: 7.57s\n", "430:\tlearn: 16566652.4020620\ttotal: 5.72s\tremaining: 7.55s\n", "431:\tlearn: 16566149.6048169\ttotal: 5.74s\tremaining: 7.54s\n", "432:\tlearn: 16564672.1011733\ttotal: 5.75s\tremaining: 7.53s\n", "433:\tlearn: 16564610.7741058\ttotal: 5.76s\tremaining: 7.51s\n", "434:\tlearn: 16564198.8911273\ttotal: 5.77s\tremaining: 7.49s\n", "435:\tlearn: 16559675.2968062\ttotal: 5.78s\tremaining: 7.48s\n", "436:\tlearn: 16558753.2346339\ttotal: 5.79s\tremaining: 7.46s\n", "437:\tlearn: 16558452.1907641\ttotal: 5.8s\tremaining: 7.45s\n", "438:\tlearn: 16546587.2383006\ttotal: 5.82s\tremaining: 7.43s\n", "439:\tlearn: 16543823.0847287\ttotal: 5.83s\tremaining: 7.42s\n", "440:\tlearn: 16542126.8424469\ttotal: 5.84s\tremaining: 7.4s\n", "441:\tlearn: 16541624.1632076\ttotal: 5.85s\tremaining: 7.39s\n", "442:\tlearn: 16540326.5322872\ttotal: 5.86s\tremaining: 7.37s\n", "443:\tlearn: 16530336.2084291\ttotal: 5.88s\tremaining: 7.36s\n", "444:\tlearn: 16530167.9665629\ttotal: 5.89s\tremaining: 7.34s\n", "445:\tlearn: 16528821.2477933\ttotal: 5.9s\tremaining: 7.33s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "446:\tlearn: 16528766.2012617\ttotal: 5.91s\tremaining: 7.31s\n", "447:\tlearn: 16518018.7193100\ttotal: 5.92s\tremaining: 7.3s\n", "448:\tlearn: 16508723.6897544\ttotal: 5.94s\tremaining: 7.29s\n", "449:\tlearn: 16508487.2637814\ttotal: 5.95s\tremaining: 7.27s\n", "450:\tlearn: 16473955.6540161\ttotal: 5.96s\tremaining: 7.26s\n", "451:\tlearn: 16453172.0203944\ttotal: 5.98s\tremaining: 7.25s\n", "452:\tlearn: 16451483.6324413\ttotal: 5.99s\tremaining: 7.23s\n", "453:\tlearn: 16451257.8036014\ttotal: 6s\tremaining: 7.22s\n", "454:\tlearn: 16448369.9508352\ttotal: 6.02s\tremaining: 7.21s\n", "455:\tlearn: 16446719.1385193\ttotal: 6.03s\tremaining: 7.19s\n", "456:\tlearn: 16420736.1369659\ttotal: 6.04s\tremaining: 7.18s\n", "457:\tlearn: 16420629.2824606\ttotal: 6.05s\tremaining: 7.16s\n", "458:\tlearn: 16420336.6729748\ttotal: 6.07s\tremaining: 7.15s\n", "459:\tlearn: 16420155.4584530\ttotal: 6.08s\tremaining: 7.13s\n", "460:\tlearn: 16419734.8233202\ttotal: 6.09s\tremaining: 7.12s\n", "461:\tlearn: 16419517.6225944\ttotal: 6.1s\tremaining: 7.1s\n", "462:\tlearn: 16406145.7183320\ttotal: 6.12s\tremaining: 7.09s\n", "463:\tlearn: 16404609.0651931\ttotal: 6.13s\tremaining: 7.08s\n", "464:\tlearn: 16404332.0732862\ttotal: 6.14s\tremaining: 7.07s\n", "465:\tlearn: 16404019.7507952\ttotal: 6.16s\tremaining: 7.05s\n", "466:\tlearn: 16403507.0137349\ttotal: 6.17s\tremaining: 7.04s\n", "467:\tlearn: 16402993.5886996\ttotal: 6.18s\tremaining: 7.03s\n", "468:\tlearn: 16385955.8460101\ttotal: 6.19s\tremaining: 7.01s\n", "469:\tlearn: 16373237.2004642\ttotal: 6.2s\tremaining: 7s\n", "470:\tlearn: 16373038.3665164\ttotal: 6.21s\tremaining: 6.98s\n", "471:\tlearn: 16372801.5860356\ttotal: 6.23s\tremaining: 6.97s\n", "472:\tlearn: 16360759.6605520\ttotal: 6.24s\tremaining: 6.95s\n", "473:\tlearn: 16360169.9657388\ttotal: 6.25s\tremaining: 6.94s\n", "474:\tlearn: 16351841.0373273\ttotal: 6.26s\tremaining: 6.92s\n", "475:\tlearn: 16349809.4004009\ttotal: 6.28s\tremaining: 6.91s\n", "476:\tlearn: 16344483.1074475\ttotal: 6.3s\tremaining: 6.9s\n", "477:\tlearn: 16340922.7262468\ttotal: 6.31s\tremaining: 6.89s\n", "478:\tlearn: 16334736.4373107\ttotal: 6.32s\tremaining: 6.88s\n", "479:\tlearn: 16334043.7402281\ttotal: 6.33s\tremaining: 6.86s\n", "480:\tlearn: 16333745.0129155\ttotal: 6.35s\tremaining: 6.85s\n", "481:\tlearn: 16332170.0024156\ttotal: 6.36s\tremaining: 6.83s\n", "482:\tlearn: 16331680.4256261\ttotal: 6.37s\tremaining: 6.82s\n", "483:\tlearn: 16321943.5880137\ttotal: 6.38s\tremaining: 6.81s\n", "484:\tlearn: 16313566.1128530\ttotal: 6.4s\tremaining: 6.79s\n", "485:\tlearn: 16312784.3783495\ttotal: 6.41s\tremaining: 6.78s\n", "486:\tlearn: 16304256.8971602\ttotal: 6.42s\tremaining: 6.76s\n", "487:\tlearn: 16299338.9360929\ttotal: 6.43s\tremaining: 6.75s\n", "488:\tlearn: 16298399.2768748\ttotal: 6.45s\tremaining: 6.74s\n", "489:\tlearn: 16282861.5959599\ttotal: 6.46s\tremaining: 6.72s\n", "490:\tlearn: 16278027.7798172\ttotal: 6.47s\tremaining: 6.71s\n", "491:\tlearn: 16262455.7433251\ttotal: 6.49s\tremaining: 6.7s\n", "492:\tlearn: 16254609.6670435\ttotal: 6.5s\tremaining: 6.68s\n", "493:\tlearn: 16250306.9197526\ttotal: 6.51s\tremaining: 6.67s\n", "494:\tlearn: 16249855.9315045\ttotal: 6.53s\tremaining: 6.66s\n", "495:\tlearn: 16248555.7562997\ttotal: 6.54s\tremaining: 6.64s\n", "496:\tlearn: 16247555.1566330\ttotal: 6.55s\tremaining: 6.63s\n", "497:\tlearn: 16247235.5993966\ttotal: 6.56s\tremaining: 6.62s\n", "498:\tlearn: 16246264.7483105\ttotal: 6.57s\tremaining: 6.6s\n", "499:\tlearn: 16246007.7491962\ttotal: 6.58s\tremaining: 6.58s\n", "500:\tlearn: 16222867.6954421\ttotal: 6.59s\tremaining: 6.57s\n", "501:\tlearn: 16222688.8853061\ttotal: 6.61s\tremaining: 6.55s\n", "502:\tlearn: 16217885.3385915\ttotal: 6.62s\tremaining: 6.54s\n", "503:\tlearn: 16217409.1580145\ttotal: 6.63s\tremaining: 6.52s\n", "504:\tlearn: 16216838.3191240\ttotal: 6.64s\tremaining: 6.51s\n", "505:\tlearn: 16216329.9777509\ttotal: 6.65s\tremaining: 6.5s\n", "506:\tlearn: 16201534.4156055\ttotal: 6.67s\tremaining: 6.48s\n", "507:\tlearn: 16198138.1904772\ttotal: 6.68s\tremaining: 6.47s\n", "508:\tlearn: 16197904.2583705\ttotal: 6.69s\tremaining: 6.45s\n", "509:\tlearn: 16193656.6407621\ttotal: 6.7s\tremaining: 6.44s\n", "510:\tlearn: 16180805.8618897\ttotal: 6.71s\tremaining: 6.42s\n", "511:\tlearn: 16176908.1769610\ttotal: 6.73s\tremaining: 6.41s\n", "512:\tlearn: 16168261.0438871\ttotal: 6.74s\tremaining: 6.4s\n", "513:\tlearn: 16167754.4165306\ttotal: 6.75s\tremaining: 6.38s\n", "514:\tlearn: 16166295.0362243\ttotal: 6.77s\tremaining: 6.37s\n", "515:\tlearn: 16166058.4053693\ttotal: 6.78s\tremaining: 6.36s\n", "516:\tlearn: 16155412.7707338\ttotal: 6.79s\tremaining: 6.34s\n", "517:\tlearn: 16152266.1742558\ttotal: 6.8s\tremaining: 6.33s\n", "518:\tlearn: 16151552.8907870\ttotal: 6.82s\tremaining: 6.32s\n", "519:\tlearn: 16140281.4351978\ttotal: 6.83s\tremaining: 6.3s\n", "520:\tlearn: 16133450.4403783\ttotal: 6.84s\tremaining: 6.29s\n", "521:\tlearn: 16132209.1334220\ttotal: 6.85s\tremaining: 6.27s\n", "522:\tlearn: 16118104.6552795\ttotal: 6.86s\tremaining: 6.26s\n", "523:\tlearn: 16108764.2393062\ttotal: 6.87s\tremaining: 6.25s\n", "524:\tlearn: 16108234.0634605\ttotal: 6.89s\tremaining: 6.23s\n", "525:\tlearn: 16107619.6760099\ttotal: 6.9s\tremaining: 6.22s\n", "526:\tlearn: 16104870.7442280\ttotal: 6.91s\tremaining: 6.2s\n", "527:\tlearn: 16102428.3934069\ttotal: 6.92s\tremaining: 6.19s\n", "528:\tlearn: 16102157.2857565\ttotal: 6.93s\tremaining: 6.17s\n", "529:\tlearn: 16101584.7403855\ttotal: 6.95s\tremaining: 6.16s\n", "530:\tlearn: 16101480.3344969\ttotal: 6.96s\tremaining: 6.15s\n", "531:\tlearn: 16100595.6548675\ttotal: 6.97s\tremaining: 6.13s\n", "532:\tlearn: 16097511.0825233\ttotal: 6.99s\tremaining: 6.12s\n", "533:\tlearn: 16096615.9743637\ttotal: 7s\tremaining: 6.11s\n", "534:\tlearn: 16096369.6922988\ttotal: 7.01s\tremaining: 6.09s\n", "535:\tlearn: 16095946.1647864\ttotal: 7.02s\tremaining: 6.08s\n", "536:\tlearn: 16095637.6185090\ttotal: 7.03s\tremaining: 6.06s\n", "537:\tlearn: 16094682.0243853\ttotal: 7.04s\tremaining: 6.05s\n", "538:\tlearn: 16094291.9050311\ttotal: 7.05s\tremaining: 6.03s\n", "539:\tlearn: 16093984.5280001\ttotal: 7.07s\tremaining: 6.02s\n", "540:\tlearn: 16090374.6401334\ttotal: 7.08s\tremaining: 6.01s\n", "541:\tlearn: 16090226.8772271\ttotal: 7.09s\tremaining: 5.99s\n", "542:\tlearn: 16090050.1805201\ttotal: 7.1s\tremaining: 5.98s\n", "543:\tlearn: 16069181.1048944\ttotal: 7.12s\tremaining: 5.96s\n", "544:\tlearn: 16068504.9399291\ttotal: 7.13s\tremaining: 5.95s\n", "545:\tlearn: 16068245.3744393\ttotal: 7.14s\tremaining: 5.94s\n", "546:\tlearn: 16065773.4114093\ttotal: 7.15s\tremaining: 5.92s\n", "547:\tlearn: 16051662.5046318\ttotal: 7.17s\tremaining: 5.91s\n", "548:\tlearn: 16035327.2446945\ttotal: 7.18s\tremaining: 5.9s\n", "549:\tlearn: 16035199.2858857\ttotal: 7.2s\tremaining: 5.89s\n", "550:\tlearn: 16033842.9666151\ttotal: 7.21s\tremaining: 5.88s\n", "551:\tlearn: 15995073.4381976\ttotal: 7.22s\tremaining: 5.86s\n", "552:\tlearn: 15994812.5505379\ttotal: 7.23s\tremaining: 5.85s\n", "553:\tlearn: 15994595.9921031\ttotal: 7.24s\tremaining: 5.83s\n", "554:\tlearn: 15992248.3834318\ttotal: 7.26s\tremaining: 5.82s\n", "555:\tlearn: 15992027.4484601\ttotal: 7.27s\tremaining: 5.8s\n", "556:\tlearn: 15990566.0719983\ttotal: 7.28s\tremaining: 5.79s\n", "557:\tlearn: 15985609.0920187\ttotal: 7.29s\tremaining: 5.78s\n", "558:\tlearn: 15984517.8156083\ttotal: 7.3s\tremaining: 5.76s\n", "559:\tlearn: 15958775.9803743\ttotal: 7.32s\tremaining: 5.75s\n", "560:\tlearn: 15958166.8639855\ttotal: 7.33s\tremaining: 5.73s\n", "561:\tlearn: 15949224.5334582\ttotal: 7.34s\tremaining: 5.72s\n", "562:\tlearn: 15948769.9101270\ttotal: 7.35s\tremaining: 5.71s\n", "563:\tlearn: 15930009.9576761\ttotal: 7.36s\tremaining: 5.69s\n", "564:\tlearn: 15917439.6202170\ttotal: 7.38s\tremaining: 5.68s\n", "565:\tlearn: 15908669.4567536\ttotal: 7.39s\tremaining: 5.67s\n", "566:\tlearn: 15908084.2939630\ttotal: 7.4s\tremaining: 5.65s\n", "567:\tlearn: 15906697.1590494\ttotal: 7.41s\tremaining: 5.64s\n", "568:\tlearn: 15906522.4609846\ttotal: 7.43s\tremaining: 5.63s\n", "569:\tlearn: 15906139.9138507\ttotal: 7.44s\tremaining: 5.61s\n", "570:\tlearn: 15905855.0642382\ttotal: 7.45s\tremaining: 5.6s\n", "571:\tlearn: 15897372.3501416\ttotal: 7.46s\tremaining: 5.58s\n", "572:\tlearn: 15893536.4661240\ttotal: 7.47s\tremaining: 5.57s\n", "573:\tlearn: 15893206.2810918\ttotal: 7.49s\tremaining: 5.56s\n", "574:\tlearn: 15892918.2703602\ttotal: 7.5s\tremaining: 5.54s\n", "575:\tlearn: 15892752.8029869\ttotal: 7.51s\tremaining: 5.53s\n", "576:\tlearn: 15885169.7413434\ttotal: 7.52s\tremaining: 5.51s\n", "577:\tlearn: 15884936.8745209\ttotal: 7.53s\tremaining: 5.5s\n", "578:\tlearn: 15876877.1991641\ttotal: 7.54s\tremaining: 5.49s\n", "579:\tlearn: 15865774.4061534\ttotal: 7.56s\tremaining: 5.47s\n", "580:\tlearn: 15859212.9207966\ttotal: 7.57s\tremaining: 5.46s\n", "581:\tlearn: 15858807.6511813\ttotal: 7.58s\tremaining: 5.45s\n", "582:\tlearn: 15850129.9468116\ttotal: 7.6s\tremaining: 5.43s\n", "583:\tlearn: 15845554.5689368\ttotal: 7.61s\tremaining: 5.42s\n", "584:\tlearn: 15844986.6765475\ttotal: 7.62s\tremaining: 5.41s\n", "585:\tlearn: 15844796.1180439\ttotal: 7.63s\tremaining: 5.39s\n", "586:\tlearn: 15844586.1630771\ttotal: 7.65s\tremaining: 5.38s\n", "587:\tlearn: 15827685.4584540\ttotal: 7.66s\tremaining: 5.37s\n", "588:\tlearn: 15826910.6044821\ttotal: 7.68s\tremaining: 5.36s\n", "589:\tlearn: 15824060.9875073\ttotal: 7.69s\tremaining: 5.34s\n", "590:\tlearn: 15818523.6912985\ttotal: 7.7s\tremaining: 5.33s\n", "591:\tlearn: 15810640.6921394\ttotal: 7.72s\tremaining: 5.32s\n", "592:\tlearn: 15795481.4197185\ttotal: 7.73s\tremaining: 5.3s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "593:\tlearn: 15795256.4491006\ttotal: 7.74s\tremaining: 5.29s\n", "594:\tlearn: 15784420.7363473\ttotal: 7.76s\tremaining: 5.28s\n", "595:\tlearn: 15784290.1819258\ttotal: 7.78s\tremaining: 5.27s\n", "596:\tlearn: 15783955.0773924\ttotal: 7.79s\tremaining: 5.26s\n", "597:\tlearn: 15781518.5372107\ttotal: 7.8s\tremaining: 5.25s\n", "598:\tlearn: 15779547.4210947\ttotal: 7.82s\tremaining: 5.24s\n", "599:\tlearn: 15777334.3663340\ttotal: 7.83s\tremaining: 5.22s\n", "600:\tlearn: 15774774.7721883\ttotal: 7.85s\tremaining: 5.21s\n", "601:\tlearn: 15774672.2356339\ttotal: 7.86s\tremaining: 5.2s\n", "602:\tlearn: 15773528.0736833\ttotal: 7.87s\tremaining: 5.18s\n", "603:\tlearn: 15768721.3649454\ttotal: 7.89s\tremaining: 5.17s\n", "604:\tlearn: 15768502.0877019\ttotal: 7.9s\tremaining: 5.16s\n", "605:\tlearn: 15768057.4929247\ttotal: 7.92s\tremaining: 5.15s\n", "606:\tlearn: 15767950.4285043\ttotal: 7.93s\tremaining: 5.14s\n", "607:\tlearn: 15767445.1324607\ttotal: 7.95s\tremaining: 5.12s\n", "608:\tlearn: 15767269.0628064\ttotal: 7.96s\tremaining: 5.11s\n", "609:\tlearn: 15767020.8174624\ttotal: 7.97s\tremaining: 5.1s\n", "610:\tlearn: 15762309.5160245\ttotal: 7.99s\tremaining: 5.09s\n", "611:\tlearn: 15757527.7718093\ttotal: 8s\tremaining: 5.07s\n", "612:\tlearn: 15757150.7731734\ttotal: 8.02s\tremaining: 5.06s\n", "613:\tlearn: 15756885.6252756\ttotal: 8.03s\tremaining: 5.05s\n", "614:\tlearn: 15755584.6816303\ttotal: 8.05s\tremaining: 5.04s\n", "615:\tlearn: 15755485.6737331\ttotal: 8.06s\tremaining: 5.02s\n", "616:\tlearn: 15754432.0517599\ttotal: 8.07s\tremaining: 5.01s\n", "617:\tlearn: 15744535.8203508\ttotal: 8.09s\tremaining: 5s\n", "618:\tlearn: 15740683.5538600\ttotal: 8.1s\tremaining: 4.99s\n", "619:\tlearn: 15736903.5667213\ttotal: 8.12s\tremaining: 4.97s\n", "620:\tlearn: 15736355.4210963\ttotal: 8.13s\tremaining: 4.96s\n", "621:\tlearn: 15729940.4032081\ttotal: 8.15s\tremaining: 4.95s\n", "622:\tlearn: 15729775.7542976\ttotal: 8.17s\tremaining: 4.94s\n", "623:\tlearn: 15726578.4125003\ttotal: 8.18s\tremaining: 4.93s\n", "624:\tlearn: 15713451.5317183\ttotal: 8.2s\tremaining: 4.92s\n", "625:\tlearn: 15712116.7478338\ttotal: 8.22s\tremaining: 4.91s\n", "626:\tlearn: 15712039.0336448\ttotal: 8.23s\tremaining: 4.9s\n", "627:\tlearn: 15711687.4136682\ttotal: 8.25s\tremaining: 4.89s\n", "628:\tlearn: 15711298.6681597\ttotal: 8.27s\tremaining: 4.88s\n", "629:\tlearn: 15705228.0021081\ttotal: 8.28s\tremaining: 4.86s\n", "630:\tlearn: 15705060.0247650\ttotal: 8.29s\tremaining: 4.85s\n", "631:\tlearn: 15702513.1910574\ttotal: 8.31s\tremaining: 4.84s\n", "632:\tlearn: 15702203.6145508\ttotal: 8.32s\tremaining: 4.82s\n", "633:\tlearn: 15698975.2951288\ttotal: 8.33s\tremaining: 4.81s\n", "634:\tlearn: 15694674.2341421\ttotal: 8.34s\tremaining: 4.8s\n", "635:\tlearn: 15686913.9104937\ttotal: 8.36s\tremaining: 4.78s\n", "636:\tlearn: 15686809.9586513\ttotal: 8.37s\tremaining: 4.77s\n", "637:\tlearn: 15685604.3787689\ttotal: 8.38s\tremaining: 4.76s\n", "638:\tlearn: 15685081.4917552\ttotal: 8.4s\tremaining: 4.74s\n", "639:\tlearn: 15676541.2826685\ttotal: 8.41s\tremaining: 4.73s\n", "640:\tlearn: 15672855.0180760\ttotal: 8.42s\tremaining: 4.72s\n", "641:\tlearn: 15666780.7593096\ttotal: 8.44s\tremaining: 4.7s\n", "642:\tlearn: 15659438.3508408\ttotal: 8.45s\tremaining: 4.69s\n", "643:\tlearn: 15653755.4598701\ttotal: 8.46s\tremaining: 4.68s\n", "644:\tlearn: 15652034.4638985\ttotal: 8.48s\tremaining: 4.67s\n", "645:\tlearn: 15645095.6489597\ttotal: 8.49s\tremaining: 4.65s\n", "646:\tlearn: 15641226.9420905\ttotal: 8.5s\tremaining: 4.64s\n", "647:\tlearn: 15639833.9184524\ttotal: 8.52s\tremaining: 4.63s\n", "648:\tlearn: 15639581.1651510\ttotal: 8.53s\tremaining: 4.61s\n", "649:\tlearn: 15635923.3848062\ttotal: 8.54s\tremaining: 4.6s\n", "650:\tlearn: 15635813.1152459\ttotal: 8.55s\tremaining: 4.58s\n", "651:\tlearn: 15635469.3555938\ttotal: 8.56s\tremaining: 4.57s\n", "652:\tlearn: 15635363.9174910\ttotal: 8.57s\tremaining: 4.56s\n", "653:\tlearn: 15633936.7433448\ttotal: 8.59s\tremaining: 4.54s\n", "654:\tlearn: 15633839.2271448\ttotal: 8.6s\tremaining: 4.53s\n", "655:\tlearn: 15633735.8610291\ttotal: 8.61s\tremaining: 4.52s\n", "656:\tlearn: 15633309.8063070\ttotal: 8.63s\tremaining: 4.5s\n", "657:\tlearn: 15632683.8986677\ttotal: 8.64s\tremaining: 4.49s\n", "658:\tlearn: 15632461.2639014\ttotal: 8.65s\tremaining: 4.48s\n", "659:\tlearn: 15627123.1765533\ttotal: 8.66s\tremaining: 4.46s\n", "660:\tlearn: 15626996.0787558\ttotal: 8.68s\tremaining: 4.45s\n", "661:\tlearn: 15624291.0204091\ttotal: 8.69s\tremaining: 4.44s\n", "662:\tlearn: 15617684.3098363\ttotal: 8.71s\tremaining: 4.42s\n", "663:\tlearn: 15611967.2176796\ttotal: 8.72s\tremaining: 4.41s\n", "664:\tlearn: 15598472.2546786\ttotal: 8.73s\tremaining: 4.4s\n", "665:\tlearn: 15597526.0470563\ttotal: 8.74s\tremaining: 4.38s\n", "666:\tlearn: 15597430.3920481\ttotal: 8.75s\tremaining: 4.37s\n", "667:\tlearn: 15596422.7059295\ttotal: 8.77s\tremaining: 4.36s\n", "668:\tlearn: 15591400.2242411\ttotal: 8.78s\tremaining: 4.34s\n", "669:\tlearn: 15585199.5277811\ttotal: 8.79s\tremaining: 4.33s\n", "670:\tlearn: 15585003.5063693\ttotal: 8.8s\tremaining: 4.32s\n", "671:\tlearn: 15578765.7193891\ttotal: 8.81s\tremaining: 4.3s\n", "672:\tlearn: 15577252.0151364\ttotal: 8.83s\tremaining: 4.29s\n", "673:\tlearn: 15576511.8797514\ttotal: 8.84s\tremaining: 4.28s\n", "674:\tlearn: 15576120.7606092\ttotal: 8.85s\tremaining: 4.26s\n", "675:\tlearn: 15574398.5273782\ttotal: 8.86s\tremaining: 4.25s\n", "676:\tlearn: 15565660.7493905\ttotal: 8.88s\tremaining: 4.24s\n", "677:\tlearn: 15561009.3437211\ttotal: 8.89s\tremaining: 4.22s\n", "678:\tlearn: 15548878.4770401\ttotal: 8.9s\tremaining: 4.21s\n", "679:\tlearn: 15527713.9632219\ttotal: 8.91s\tremaining: 4.19s\n", "680:\tlearn: 15519745.2151864\ttotal: 8.93s\tremaining: 4.18s\n", "681:\tlearn: 15519391.2760902\ttotal: 8.94s\tremaining: 4.17s\n", "682:\tlearn: 15514461.8611265\ttotal: 8.95s\tremaining: 4.15s\n", "683:\tlearn: 15514296.1001141\ttotal: 8.96s\tremaining: 4.14s\n", "684:\tlearn: 15514204.8658979\ttotal: 8.97s\tremaining: 4.13s\n", "685:\tlearn: 15513977.3554214\ttotal: 8.98s\tremaining: 4.11s\n", "686:\tlearn: 15513906.5046745\ttotal: 9s\tremaining: 4.1s\n", "687:\tlearn: 15513701.8112778\ttotal: 9.01s\tremaining: 4.09s\n", "688:\tlearn: 15513602.4959013\ttotal: 9.02s\tremaining: 4.07s\n", "689:\tlearn: 15513510.7910896\ttotal: 9.03s\tremaining: 4.06s\n", "690:\tlearn: 15513352.2070048\ttotal: 9.04s\tremaining: 4.04s\n", "691:\tlearn: 15513238.8204588\ttotal: 9.05s\tremaining: 4.03s\n", "692:\tlearn: 15513154.0618557\ttotal: 9.06s\tremaining: 4.01s\n", "693:\tlearn: 15512878.9114412\ttotal: 9.07s\tremaining: 4s\n", "694:\tlearn: 15509248.2055515\ttotal: 9.09s\tremaining: 3.99s\n", "695:\tlearn: 15508734.7327170\ttotal: 9.1s\tremaining: 3.97s\n", "696:\tlearn: 15508495.7881550\ttotal: 9.11s\tremaining: 3.96s\n", "697:\tlearn: 15508349.6872134\ttotal: 9.12s\tremaining: 3.95s\n", "698:\tlearn: 15508190.6588965\ttotal: 9.13s\tremaining: 3.93s\n", "699:\tlearn: 15508018.8419773\ttotal: 9.14s\tremaining: 3.92s\n", "700:\tlearn: 15507826.3791202\ttotal: 9.16s\tremaining: 3.9s\n", "701:\tlearn: 15507705.8100928\ttotal: 9.17s\tremaining: 3.89s\n", "702:\tlearn: 15507533.3512682\ttotal: 9.18s\tremaining: 3.88s\n", "703:\tlearn: 15501571.2913355\ttotal: 9.19s\tremaining: 3.87s\n", "704:\tlearn: 15495921.0773672\ttotal: 9.21s\tremaining: 3.85s\n", "705:\tlearn: 15495385.0875416\ttotal: 9.22s\tremaining: 3.84s\n", "706:\tlearn: 15495191.8032918\ttotal: 9.23s\tremaining: 3.83s\n", "707:\tlearn: 15494128.9589635\ttotal: 9.24s\tremaining: 3.81s\n", "708:\tlearn: 15493806.9566177\ttotal: 9.26s\tremaining: 3.8s\n", "709:\tlearn: 15493694.0465547\ttotal: 9.27s\tremaining: 3.78s\n", "710:\tlearn: 15493305.1729869\ttotal: 9.28s\tremaining: 3.77s\n", "711:\tlearn: 15487948.0399475\ttotal: 9.3s\tremaining: 3.76s\n", "712:\tlearn: 15487843.0916850\ttotal: 9.31s\tremaining: 3.75s\n", "713:\tlearn: 15482765.7669785\ttotal: 9.32s\tremaining: 3.73s\n", "714:\tlearn: 15474767.3580796\ttotal: 9.33s\tremaining: 3.72s\n", "715:\tlearn: 15472407.8166003\ttotal: 9.35s\tremaining: 3.71s\n", "716:\tlearn: 15467592.3874842\ttotal: 9.36s\tremaining: 3.69s\n", "717:\tlearn: 15467435.4901525\ttotal: 9.37s\tremaining: 3.68s\n", "718:\tlearn: 15462871.2869120\ttotal: 9.38s\tremaining: 3.67s\n", "719:\tlearn: 15462771.0380185\ttotal: 9.39s\tremaining: 3.65s\n", "720:\tlearn: 15462475.2715024\ttotal: 9.4s\tremaining: 3.64s\n", "721:\tlearn: 15454885.7938423\ttotal: 9.42s\tremaining: 3.63s\n", "722:\tlearn: 15450557.5824215\ttotal: 9.43s\tremaining: 3.61s\n", "723:\tlearn: 15446455.2317749\ttotal: 9.44s\tremaining: 3.6s\n", "724:\tlearn: 15445711.0004476\ttotal: 9.46s\tremaining: 3.59s\n", "725:\tlearn: 15441822.5331613\ttotal: 9.47s\tremaining: 3.57s\n", "726:\tlearn: 15441182.6843715\ttotal: 9.48s\tremaining: 3.56s\n", "727:\tlearn: 15441088.8915881\ttotal: 9.49s\tremaining: 3.55s\n", "728:\tlearn: 15441002.5272406\ttotal: 9.51s\tremaining: 3.53s\n", "729:\tlearn: 15440884.2830869\ttotal: 9.52s\tremaining: 3.52s\n", "730:\tlearn: 15440734.2579995\ttotal: 9.53s\tremaining: 3.51s\n", "731:\tlearn: 15440611.8887909\ttotal: 9.54s\tremaining: 3.49s\n", "732:\tlearn: 15440249.4221271\ttotal: 9.55s\tremaining: 3.48s\n", "733:\tlearn: 15440158.8154476\ttotal: 9.56s\tremaining: 3.46s\n", "734:\tlearn: 15436472.7845071\ttotal: 9.57s\tremaining: 3.45s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "735:\tlearn: 15433672.4484876\ttotal: 9.59s\tremaining: 3.44s\n", "736:\tlearn: 15433490.1840146\ttotal: 9.6s\tremaining: 3.43s\n", "737:\tlearn: 15433308.6881010\ttotal: 9.61s\tremaining: 3.41s\n", "738:\tlearn: 15433042.7409848\ttotal: 9.63s\tremaining: 3.4s\n", "739:\tlearn: 15432541.8769518\ttotal: 9.64s\tremaining: 3.39s\n", "740:\tlearn: 15431856.6761047\ttotal: 9.66s\tremaining: 3.38s\n", "741:\tlearn: 15431804.7359345\ttotal: 9.67s\tremaining: 3.36s\n", "742:\tlearn: 15427355.2392047\ttotal: 9.68s\tremaining: 3.35s\n", "743:\tlearn: 15427218.1028185\ttotal: 9.69s\tremaining: 3.33s\n", "744:\tlearn: 15424332.1093472\ttotal: 9.7s\tremaining: 3.32s\n", "745:\tlearn: 15388321.8033125\ttotal: 9.71s\tremaining: 3.31s\n", "746:\tlearn: 15377267.9048803\ttotal: 9.73s\tremaining: 3.29s\n", "747:\tlearn: 15374625.6198420\ttotal: 9.74s\tremaining: 3.28s\n", "748:\tlearn: 15370386.0426691\ttotal: 9.75s\tremaining: 3.27s\n", "749:\tlearn: 15359901.4299089\ttotal: 9.77s\tremaining: 3.25s\n", "750:\tlearn: 15358774.7332579\ttotal: 9.78s\tremaining: 3.24s\n", "751:\tlearn: 15358651.8711020\ttotal: 9.79s\tremaining: 3.23s\n", "752:\tlearn: 15358300.8764559\ttotal: 9.8s\tremaining: 3.21s\n", "753:\tlearn: 15357884.8170886\ttotal: 9.81s\tremaining: 3.2s\n", "754:\tlearn: 15357643.0994172\ttotal: 9.83s\tremaining: 3.19s\n", "755:\tlearn: 15357565.0887636\ttotal: 9.84s\tremaining: 3.17s\n", "756:\tlearn: 15351820.0777339\ttotal: 9.85s\tremaining: 3.16s\n", "757:\tlearn: 15351414.2517094\ttotal: 9.86s\tremaining: 3.15s\n", "758:\tlearn: 15349501.7532204\ttotal: 9.88s\tremaining: 3.14s\n", "759:\tlearn: 15348526.7586048\ttotal: 9.89s\tremaining: 3.12s\n", "760:\tlearn: 15348352.7244253\ttotal: 9.9s\tremaining: 3.11s\n", "761:\tlearn: 15347292.4488773\ttotal: 9.91s\tremaining: 3.1s\n", "762:\tlearn: 15347207.8865499\ttotal: 9.93s\tremaining: 3.08s\n", "763:\tlearn: 15342959.4790246\ttotal: 9.94s\tremaining: 3.07s\n", "764:\tlearn: 15342697.6483068\ttotal: 9.95s\tremaining: 3.06s\n", "765:\tlearn: 15313028.1511576\ttotal: 9.96s\tremaining: 3.04s\n", "766:\tlearn: 15310947.6382018\ttotal: 9.98s\tremaining: 3.03s\n", "767:\tlearn: 15263340.9642127\ttotal: 9.99s\tremaining: 3.02s\n", "768:\tlearn: 15259807.2026083\ttotal: 10s\tremaining: 3s\n", "769:\tlearn: 15259504.1148296\ttotal: 10s\tremaining: 2.99s\n", "770:\tlearn: 15259395.2637694\ttotal: 10s\tremaining: 2.98s\n", "771:\tlearn: 15256045.7141942\ttotal: 10s\tremaining: 2.96s\n", "772:\tlearn: 15252870.9021417\ttotal: 10.1s\tremaining: 2.95s\n", "773:\tlearn: 15248430.3201074\ttotal: 10.1s\tremaining: 2.94s\n", "774:\tlearn: 15246752.4177458\ttotal: 10.1s\tremaining: 2.93s\n", "775:\tlearn: 15245960.1417687\ttotal: 10.1s\tremaining: 2.91s\n", "776:\tlearn: 15245917.6645107\ttotal: 10.1s\tremaining: 2.9s\n", "777:\tlearn: 15245817.5185452\ttotal: 10.1s\tremaining: 2.89s\n", "778:\tlearn: 15245619.8351855\ttotal: 10.1s\tremaining: 2.87s\n", "779:\tlearn: 15244869.5667520\ttotal: 10.1s\tremaining: 2.86s\n", "780:\tlearn: 15244818.8943236\ttotal: 10.2s\tremaining: 2.85s\n", "781:\tlearn: 15244254.3637038\ttotal: 10.2s\tremaining: 2.83s\n", "782:\tlearn: 15243818.2939855\ttotal: 10.2s\tremaining: 2.82s\n", "783:\tlearn: 15243668.1645179\ttotal: 10.2s\tremaining: 2.81s\n", "784:\tlearn: 15240656.8617467\ttotal: 10.2s\tremaining: 2.8s\n", "785:\tlearn: 15237802.6637690\ttotal: 10.2s\tremaining: 2.78s\n", "786:\tlearn: 15235097.3769887\ttotal: 10.2s\tremaining: 2.77s\n", "787:\tlearn: 15231063.4576018\ttotal: 10.3s\tremaining: 2.76s\n", "788:\tlearn: 15224406.4239600\ttotal: 10.3s\tremaining: 2.74s\n", "789:\tlearn: 15220791.2846445\ttotal: 10.3s\tremaining: 2.73s\n", "790:\tlearn: 15220221.4180094\ttotal: 10.3s\tremaining: 2.72s\n", "791:\tlearn: 15220121.7499013\ttotal: 10.3s\tremaining: 2.71s\n", "792:\tlearn: 15218396.9325757\ttotal: 10.3s\tremaining: 2.69s\n", "793:\tlearn: 15213830.8844557\ttotal: 10.3s\tremaining: 2.68s\n", "794:\tlearn: 15212644.4009126\ttotal: 10.3s\tremaining: 2.67s\n", "795:\tlearn: 15212570.5272286\ttotal: 10.4s\tremaining: 2.65s\n", "796:\tlearn: 15172874.1552397\ttotal: 10.4s\tremaining: 2.64s\n", "797:\tlearn: 15164671.3501787\ttotal: 10.4s\tremaining: 2.63s\n", "798:\tlearn: 15162711.8871221\ttotal: 10.4s\tremaining: 2.62s\n", "799:\tlearn: 15162618.0050229\ttotal: 10.4s\tremaining: 2.6s\n", "800:\tlearn: 15161186.8924011\ttotal: 10.4s\tremaining: 2.59s\n", "801:\tlearn: 15160994.4738412\ttotal: 10.4s\tremaining: 2.58s\n", "802:\tlearn: 15159385.3831268\ttotal: 10.4s\tremaining: 2.56s\n", "803:\tlearn: 15159166.1576231\ttotal: 10.5s\tremaining: 2.55s\n", "804:\tlearn: 15156764.1801770\ttotal: 10.5s\tremaining: 2.54s\n", "805:\tlearn: 15146691.8394282\ttotal: 10.5s\tremaining: 2.52s\n", "806:\tlearn: 15146533.6706853\ttotal: 10.5s\tremaining: 2.51s\n", "807:\tlearn: 15146408.7773292\ttotal: 10.5s\tremaining: 2.5s\n", "808:\tlearn: 15142359.7678728\ttotal: 10.5s\tremaining: 2.48s\n", "809:\tlearn: 15142322.1248825\ttotal: 10.5s\tremaining: 2.47s\n", "810:\tlearn: 15132770.1153732\ttotal: 10.5s\tremaining: 2.46s\n", "811:\tlearn: 15101480.9924963\ttotal: 10.6s\tremaining: 2.44s\n", "812:\tlearn: 15101445.1875248\ttotal: 10.6s\tremaining: 2.43s\n", "813:\tlearn: 15075376.4419388\ttotal: 10.6s\tremaining: 2.42s\n", "814:\tlearn: 15073160.0820287\ttotal: 10.6s\tremaining: 2.4s\n", "815:\tlearn: 15072725.5140996\ttotal: 10.6s\tremaining: 2.39s\n", "816:\tlearn: 15072585.7018342\ttotal: 10.6s\tremaining: 2.38s\n", "817:\tlearn: 15071522.0001919\ttotal: 10.6s\tremaining: 2.37s\n", "818:\tlearn: 15071382.2097110\ttotal: 10.6s\tremaining: 2.35s\n", "819:\tlearn: 15071301.2886091\ttotal: 10.7s\tremaining: 2.34s\n", "820:\tlearn: 15071025.2992144\ttotal: 10.7s\tremaining: 2.33s\n", "821:\tlearn: 15069498.2268762\ttotal: 10.7s\tremaining: 2.31s\n", "822:\tlearn: 15061575.7065075\ttotal: 10.7s\tremaining: 2.3s\n", "823:\tlearn: 15061416.4068476\ttotal: 10.7s\tremaining: 2.29s\n", "824:\tlearn: 15060945.6687130\ttotal: 10.7s\tremaining: 2.27s\n", "825:\tlearn: 15051099.8538783\ttotal: 10.7s\tremaining: 2.26s\n", "826:\tlearn: 15050450.9663299\ttotal: 10.7s\tremaining: 2.25s\n", "827:\tlearn: 15049722.9751983\ttotal: 10.8s\tremaining: 2.23s\n", "828:\tlearn: 15049467.7452535\ttotal: 10.8s\tremaining: 2.22s\n", "829:\tlearn: 15049412.7697933\ttotal: 10.8s\tremaining: 2.21s\n", "830:\tlearn: 15048891.7740041\ttotal: 10.8s\tremaining: 2.19s\n", "831:\tlearn: 15048043.0994998\ttotal: 10.8s\tremaining: 2.18s\n", "832:\tlearn: 15046697.3368860\ttotal: 10.8s\tremaining: 2.17s\n", "833:\tlearn: 15038272.8803419\ttotal: 10.8s\tremaining: 2.15s\n", "834:\tlearn: 15034639.9951102\ttotal: 10.8s\tremaining: 2.14s\n", "835:\tlearn: 15030153.8614245\ttotal: 10.9s\tremaining: 2.13s\n", "836:\tlearn: 15027964.0190757\ttotal: 10.9s\tremaining: 2.12s\n", "837:\tlearn: 15023890.1409211\ttotal: 10.9s\tremaining: 2.1s\n", "838:\tlearn: 15022954.0613643\ttotal: 10.9s\tremaining: 2.09s\n", "839:\tlearn: 15022653.7321874\ttotal: 10.9s\tremaining: 2.08s\n", "840:\tlearn: 15021763.5899870\ttotal: 10.9s\tremaining: 2.06s\n", "841:\tlearn: 15021552.9666208\ttotal: 10.9s\tremaining: 2.05s\n", "842:\tlearn: 15017213.3112838\ttotal: 10.9s\tremaining: 2.04s\n", "843:\tlearn: 15006868.9919636\ttotal: 11s\tremaining: 2.02s\n", "844:\tlearn: 15006047.4873296\ttotal: 11s\tremaining: 2.01s\n", "845:\tlearn: 15003167.3995596\ttotal: 11s\tremaining: 2s\n", "846:\tlearn: 15001516.1719277\ttotal: 11s\tremaining: 1.99s\n", "847:\tlearn: 15000023.6971343\ttotal: 11s\tremaining: 1.97s\n", "848:\tlearn: 14996097.9901016\ttotal: 11s\tremaining: 1.96s\n", "849:\tlearn: 14995809.9617414\ttotal: 11s\tremaining: 1.95s\n", "850:\tlearn: 14991694.0680204\ttotal: 11s\tremaining: 1.93s\n", "851:\tlearn: 14990806.5441048\ttotal: 11.1s\tremaining: 1.92s\n", "852:\tlearn: 14990539.4146062\ttotal: 11.1s\tremaining: 1.91s\n", "853:\tlearn: 14990428.7975864\ttotal: 11.1s\tremaining: 1.89s\n", "854:\tlearn: 14989061.7567162\ttotal: 11.1s\tremaining: 1.88s\n", "855:\tlearn: 14983131.0103419\ttotal: 11.1s\tremaining: 1.87s\n", "856:\tlearn: 14982655.3316759\ttotal: 11.1s\tremaining: 1.86s\n", "857:\tlearn: 14977099.0608273\ttotal: 11.1s\tremaining: 1.84s\n", "858:\tlearn: 14976713.9058693\ttotal: 11.2s\tremaining: 1.83s\n", "859:\tlearn: 14976613.3364184\ttotal: 11.2s\tremaining: 1.82s\n", "860:\tlearn: 14964115.6999829\ttotal: 11.2s\tremaining: 1.81s\n", "861:\tlearn: 14961152.7626425\ttotal: 11.2s\tremaining: 1.79s\n", "862:\tlearn: 14960316.8698796\ttotal: 11.2s\tremaining: 1.78s\n", "863:\tlearn: 14960206.5805103\ttotal: 11.2s\tremaining: 1.77s\n", "864:\tlearn: 14948350.2065232\ttotal: 11.2s\tremaining: 1.75s\n", "865:\tlearn: 14948237.8225238\ttotal: 11.3s\tremaining: 1.74s\n", "866:\tlearn: 14948145.1280412\ttotal: 11.3s\tremaining: 1.73s\n", "867:\tlearn: 14947479.9936319\ttotal: 11.3s\tremaining: 1.72s\n", "868:\tlearn: 14946706.9144290\ttotal: 11.3s\tremaining: 1.7s\n", "869:\tlearn: 14946008.4529886\ttotal: 11.3s\tremaining: 1.69s\n", "870:\tlearn: 14938382.9733130\ttotal: 11.3s\tremaining: 1.68s\n", "871:\tlearn: 14935923.0018589\ttotal: 11.3s\tremaining: 1.66s\n", "872:\tlearn: 14935763.9386719\ttotal: 11.4s\tremaining: 1.65s\n", "873:\tlearn: 14935390.9032799\ttotal: 11.4s\tremaining: 1.64s\n", "874:\tlearn: 14924136.4999000\ttotal: 11.4s\tremaining: 1.63s\n", "875:\tlearn: 14923231.4975181\ttotal: 11.4s\tremaining: 1.61s\n", "876:\tlearn: 14920764.7489123\ttotal: 11.4s\tremaining: 1.6s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "877:\tlearn: 14920619.6869935\ttotal: 11.4s\tremaining: 1.59s\n", "878:\tlearn: 14920259.9887151\ttotal: 11.4s\tremaining: 1.57s\n", "879:\tlearn: 14918671.6063618\ttotal: 11.4s\tremaining: 1.56s\n", "880:\tlearn: 14909484.3446534\ttotal: 11.5s\tremaining: 1.55s\n", "881:\tlearn: 14909331.3722806\ttotal: 11.5s\tremaining: 1.53s\n", "882:\tlearn: 14909000.4744294\ttotal: 11.5s\tremaining: 1.52s\n", "883:\tlearn: 14907810.4215534\ttotal: 11.5s\tremaining: 1.51s\n", "884:\tlearn: 14907739.9399244\ttotal: 11.5s\tremaining: 1.5s\n", "885:\tlearn: 14907643.8234156\ttotal: 11.5s\tremaining: 1.48s\n", "886:\tlearn: 14902234.6414918\ttotal: 11.5s\tremaining: 1.47s\n", "887:\tlearn: 14899639.1808572\ttotal: 11.5s\tremaining: 1.46s\n", "888:\tlearn: 14898572.6302420\ttotal: 11.6s\tremaining: 1.44s\n", "889:\tlearn: 14898481.9011232\ttotal: 11.6s\tremaining: 1.43s\n", "890:\tlearn: 14898396.0569341\ttotal: 11.6s\tremaining: 1.42s\n", "891:\tlearn: 14897825.8306216\ttotal: 11.6s\tremaining: 1.4s\n", "892:\tlearn: 14897667.5383103\ttotal: 11.6s\tremaining: 1.39s\n", "893:\tlearn: 14894977.2967186\ttotal: 11.6s\tremaining: 1.38s\n", "894:\tlearn: 14894451.1015405\ttotal: 11.6s\tremaining: 1.36s\n", "895:\tlearn: 14894302.6357933\ttotal: 11.6s\tremaining: 1.35s\n", "896:\tlearn: 14892042.1460828\ttotal: 11.7s\tremaining: 1.34s\n", "897:\tlearn: 14891913.2181914\ttotal: 11.7s\tremaining: 1.32s\n", "898:\tlearn: 14891548.8875063\ttotal: 11.7s\tremaining: 1.31s\n", "899:\tlearn: 14891535.2806629\ttotal: 11.7s\tremaining: 1.3s\n", "900:\tlearn: 14885531.4554658\ttotal: 11.7s\tremaining: 1.29s\n", "901:\tlearn: 14885300.5490787\ttotal: 11.7s\tremaining: 1.27s\n", "902:\tlearn: 14882237.8064495\ttotal: 11.7s\tremaining: 1.26s\n", "903:\tlearn: 14871534.0201501\ttotal: 11.7s\tremaining: 1.25s\n", "904:\tlearn: 14870231.5883229\ttotal: 11.8s\tremaining: 1.23s\n", "905:\tlearn: 14870176.1224648\ttotal: 11.8s\tremaining: 1.22s\n", "906:\tlearn: 14869680.0980501\ttotal: 11.8s\tremaining: 1.21s\n", "907:\tlearn: 14869457.0180442\ttotal: 11.8s\tremaining: 1.2s\n", "908:\tlearn: 14868933.3692698\ttotal: 11.8s\tremaining: 1.18s\n", "909:\tlearn: 14863961.6816683\ttotal: 11.8s\tremaining: 1.17s\n", "910:\tlearn: 14863914.5911026\ttotal: 11.8s\tremaining: 1.16s\n", "911:\tlearn: 14855476.3946477\ttotal: 11.8s\tremaining: 1.14s\n", "912:\tlearn: 14855166.2145584\ttotal: 11.9s\tremaining: 1.13s\n", "913:\tlearn: 14850934.1124148\ttotal: 11.9s\tremaining: 1.12s\n", "914:\tlearn: 14846483.1667184\ttotal: 11.9s\tremaining: 1.1s\n", "915:\tlearn: 14837736.3329095\ttotal: 11.9s\tremaining: 1.09s\n", "916:\tlearn: 14837595.6179847\ttotal: 11.9s\tremaining: 1.08s\n", "917:\tlearn: 14830817.0386636\ttotal: 11.9s\tremaining: 1.06s\n", "918:\tlearn: 14830573.0969752\ttotal: 11.9s\tremaining: 1.05s\n", "919:\tlearn: 14830470.0025192\ttotal: 12s\tremaining: 1.04s\n", "920:\tlearn: 14829815.6285131\ttotal: 12s\tremaining: 1.03s\n", "921:\tlearn: 14825434.2189552\ttotal: 12s\tremaining: 1.01s\n", "922:\tlearn: 14819445.0181126\ttotal: 12s\tremaining: 1s\n", "923:\tlearn: 14819398.6279561\ttotal: 12s\tremaining: 988ms\n", "924:\tlearn: 14818813.5923928\ttotal: 12s\tremaining: 975ms\n", "925:\tlearn: 14815956.9177135\ttotal: 12s\tremaining: 962ms\n", "926:\tlearn: 14815788.2679741\ttotal: 12s\tremaining: 948ms\n", "927:\tlearn: 14815448.9260298\ttotal: 12.1s\tremaining: 935ms\n", "928:\tlearn: 14815074.5372959\ttotal: 12.1s\tremaining: 923ms\n", "929:\tlearn: 14814229.2585638\ttotal: 12.1s\tremaining: 910ms\n", "930:\tlearn: 14804039.0241152\ttotal: 12.1s\tremaining: 897ms\n", "931:\tlearn: 14803929.0738285\ttotal: 12.1s\tremaining: 883ms\n", "932:\tlearn: 14803822.0317935\ttotal: 12.1s\tremaining: 870ms\n", "933:\tlearn: 14802847.7620639\ttotal: 12.1s\tremaining: 857ms\n", "934:\tlearn: 14802644.0143811\ttotal: 12.1s\tremaining: 844ms\n", "935:\tlearn: 14801390.4240818\ttotal: 12.2s\tremaining: 831ms\n", "936:\tlearn: 14801273.4225706\ttotal: 12.2s\tremaining: 818ms\n", "937:\tlearn: 14801021.8599058\ttotal: 12.2s\tremaining: 805ms\n", "938:\tlearn: 14800715.3029627\ttotal: 12.2s\tremaining: 792ms\n", "939:\tlearn: 14799551.6406369\ttotal: 12.2s\tremaining: 779ms\n", "940:\tlearn: 14795598.5613345\ttotal: 12.2s\tremaining: 766ms\n", "941:\tlearn: 14794465.0278834\ttotal: 12.2s\tremaining: 753ms\n", "942:\tlearn: 14794259.7563387\ttotal: 12.2s\tremaining: 740ms\n", "943:\tlearn: 14794156.6214413\ttotal: 12.3s\tremaining: 727ms\n", "944:\tlearn: 14792982.4344262\ttotal: 12.3s\tremaining: 714ms\n", "945:\tlearn: 14792468.8012658\ttotal: 12.3s\tremaining: 701ms\n", "946:\tlearn: 14792139.9923168\ttotal: 12.3s\tremaining: 688ms\n", "947:\tlearn: 14786295.7938911\ttotal: 12.3s\tremaining: 675ms\n", "948:\tlearn: 14783272.8762359\ttotal: 12.3s\tremaining: 662ms\n", "949:\tlearn: 14782689.5513664\ttotal: 12.3s\tremaining: 649ms\n", "950:\tlearn: 14782664.1266181\ttotal: 12.3s\tremaining: 636ms\n", "951:\tlearn: 14774339.7873426\ttotal: 12.4s\tremaining: 623ms\n", "952:\tlearn: 14769038.5403572\ttotal: 12.4s\tremaining: 610ms\n", "953:\tlearn: 14761312.5028488\ttotal: 12.4s\tremaining: 597ms\n", "954:\tlearn: 14760414.3496721\ttotal: 12.4s\tremaining: 584ms\n", "955:\tlearn: 14752950.6860631\ttotal: 12.4s\tremaining: 571ms\n", "956:\tlearn: 14752691.2767919\ttotal: 12.4s\tremaining: 558ms\n", "957:\tlearn: 14752196.9268404\ttotal: 12.4s\tremaining: 545ms\n", "958:\tlearn: 14750149.4071752\ttotal: 12.5s\tremaining: 532ms\n", "959:\tlearn: 14749691.8632556\ttotal: 12.5s\tremaining: 519ms\n", "960:\tlearn: 14749496.6164671\ttotal: 12.5s\tremaining: 507ms\n", "961:\tlearn: 14749338.8909588\ttotal: 12.5s\tremaining: 494ms\n", "962:\tlearn: 14749291.1143099\ttotal: 12.5s\tremaining: 481ms\n", "963:\tlearn: 14739582.8672605\ttotal: 12.5s\tremaining: 468ms\n", "964:\tlearn: 14739542.4442574\ttotal: 12.5s\tremaining: 455ms\n", "965:\tlearn: 14739306.4391584\ttotal: 12.5s\tremaining: 442ms\n", "966:\tlearn: 14708630.7344942\ttotal: 12.6s\tremaining: 429ms\n", "967:\tlearn: 14680984.5320050\ttotal: 12.6s\tremaining: 416ms\n", "968:\tlearn: 14677319.7390898\ttotal: 12.6s\tremaining: 403ms\n", "969:\tlearn: 14677109.6650390\ttotal: 12.6s\tremaining: 389ms\n", "970:\tlearn: 14676355.6242387\ttotal: 12.6s\tremaining: 376ms\n", "971:\tlearn: 14667109.8761175\ttotal: 12.6s\tremaining: 363ms\n", "972:\tlearn: 14643934.7657464\ttotal: 12.6s\tremaining: 350ms\n", "973:\tlearn: 14642633.8347823\ttotal: 12.6s\tremaining: 337ms\n", "974:\tlearn: 14642167.8071045\ttotal: 12.7s\tremaining: 324ms\n", "975:\tlearn: 14641845.3365852\ttotal: 12.7s\tremaining: 311ms\n", "976:\tlearn: 14640306.5144587\ttotal: 12.7s\tremaining: 298ms\n", "977:\tlearn: 14640184.9147582\ttotal: 12.7s\tremaining: 286ms\n", "978:\tlearn: 14640008.8483425\ttotal: 12.7s\tremaining: 273ms\n", "979:\tlearn: 14636344.5519428\ttotal: 12.7s\tremaining: 260ms\n", "980:\tlearn: 14633576.2663352\ttotal: 12.7s\tremaining: 247ms\n", "981:\tlearn: 14624796.2134142\ttotal: 12.7s\tremaining: 234ms\n", "982:\tlearn: 14624250.6774943\ttotal: 12.8s\tremaining: 221ms\n", "983:\tlearn: 14615917.0650799\ttotal: 12.8s\tremaining: 208ms\n", "984:\tlearn: 14615782.8388140\ttotal: 12.8s\tremaining: 195ms\n", "985:\tlearn: 14612724.2754075\ttotal: 12.8s\tremaining: 182ms\n", "986:\tlearn: 14609973.2837772\ttotal: 12.8s\tremaining: 169ms\n", "987:\tlearn: 14605203.8050795\ttotal: 12.8s\tremaining: 156ms\n", "988:\tlearn: 14605011.6874159\ttotal: 12.8s\tremaining: 143ms\n", "989:\tlearn: 14594552.7887146\ttotal: 12.8s\tremaining: 130ms\n", "990:\tlearn: 14591881.9316489\ttotal: 12.9s\tremaining: 117ms\n", "991:\tlearn: 14581962.3358039\ttotal: 12.9s\tremaining: 104ms\n", "992:\tlearn: 14581829.5331587\ttotal: 12.9s\tremaining: 90.8ms\n", "993:\tlearn: 14581669.7347033\ttotal: 12.9s\tremaining: 77.9ms\n", "994:\tlearn: 14577373.3119596\ttotal: 12.9s\tremaining: 64.9ms\n", "995:\tlearn: 14577196.6327960\ttotal: 12.9s\tremaining: 51.9ms\n", "996:\tlearn: 14577122.1536884\ttotal: 12.9s\tremaining: 38.9ms\n", "997:\tlearn: 14574702.1336653\ttotal: 12.9s\tremaining: 25.9ms\n", "998:\tlearn: 14574660.6060510\ttotal: 13s\tremaining: 13ms\n", "999:\tlearn: 14574625.9856659\ttotal: 13s\tremaining: 0us\n" ] }, { "data": { "text/html": [ "
Pipeline(steps=[('preprocessor',\n",
       "                 ColumnTransformer(transformers=[('num', StandardScaler(),\n",
       "                                                  ['geo_lat', 'geo_lon',\n",
       "                                                   'level', 'levels', 'rooms',\n",
       "                                                   'area', 'kitchen_area']),\n",
       "                                                 ('cat',\n",
       "                                                  OrdinalEncoder(handle_unknown='use_encoded_value',\n",
       "                                                                 unknown_value=99999999),\n",
       "                                                  ['region', 'building_type',\n",
       "                                                   'object_type'])])),\n",
       "                ('model',\n",
       "                 <catboost.core.CatBoostRegressor object at 0x7be8319969b0>)])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocessor',\n", " ColumnTransformer(transformers=[('num', StandardScaler(),\n", " ['geo_lat', 'geo_lon',\n", " 'level', 'levels', 'rooms',\n", " 'area', 'kitchen_area']),\n", " ('cat',\n", " OrdinalEncoder(handle_unknown='use_encoded_value',\n", " unknown_value=99999999),\n", " ['region', 'building_type',\n", " 'object_type'])])),\n", " ('model',\n", " )])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "pipeline = Pipeline(steps=[('preprocessor', preprocessor), \n", " ('model', regressor)])\n", "\n", "pipeline.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ " model_params = {\n", " \"geo_lat\":56.327686,\n", " \"geo_lon\":43.928062,\n", " \"region\":2871.000000,\n", " \"building_type\":1.000000,\n", " \"level\":8.000000,\n", " \"levels\":10.000000,\n", " \"rooms\":2.000000,\n", " \"area\":56.000000,\n", " \"kitchen_area\":8.500000,\n", " \"object_type\":1.000000,\n", " \"floor_level\":0,\n", " }\n", " df_pred = pd.DataFrame(model_params, index=[0])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3414175.89042869])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "pipeline.predict(df_pred)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mae': 1447931.3425270966,\n", " 'mape': 1.6294525363466488e+18,\n", " 'mse': 281898017343454.56}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions = pipeline.predict(X_test) \n", "\n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n", "\n", "metrics" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "\n", "# Работаем с MLflow локально\n", "TRACKING_SERVER_HOST = \"127.0.0.1\"\n", "TRACKING_SERVER_PORT = 5000\n", "\n", "registry_uri = f\"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}\"\n", "tracking_uri = f\"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}\"\n", "\n", "mlflow.set_tracking_uri(tracking_uri) \n", "mlflow.set_registry_uri(registry_uri) \n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# название тестового эксперимента, запуска (run) внутри него, имени, под которым модель будет регистрироваться\n", "EXPERIMENT_NAME = \"estate_project\"\n", "RUN_NAME = \"baseline model\"\n", "REGISTRY_MODEL_NAME = \"estate_model_rf\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Логируем вручную" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Обязательно логируем сигнатуру модели и пример входных данных. Подготовим их\n", "from mlflow.models import infer_signature\n", "\n", "signature = infer_signature(model_input = X_train.head(5))\n", "input_example = X_train.head(5)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Будем логировать requirements и артефакт - текстовый файл\n", "req_file = 'requirements.txt'\n", "art = 'comment.txt'" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# Параметры, котороые будут залогированы, можем задавать вручную или полностью взять из модели\n", "#params_dict = {'n_estimators': 10, 'max_depth': 10}\n", "params_dict = pipeline.get_params()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Когда создаем новый эксперимент, то: \n", "experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n", "\n", "# Впоследствии. чтобы добавлять запуски в этот же эксепримент мы должны получить его id:\n", "#experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(pipeline, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact(art)\n", " mlflow.log_params(params_dict)\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Удаление runs, experiments\n", "\n", "Использовать осторожно" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "#mlflow.delete_experiment(experiment_id)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mlflow.search_runs(\n", " #experiment_ids=[experiment_id],\n", " experiment_names=[EXPERIMENT_NAME],\n", " # filter_string='status = \"FAILED\"'\n", " #filter_string='metrics.mae > 1'\n", " \n", ")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "\n", "#mlflow.delete_run('74d2a7a40c07413c9cf65df841164356')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Автологирование\n", "После включения будет срабатывать на каждом обучении модели (на методе fit()).\n", "\n", "Есть плюсы, есть и минусы. Предлагается сделать прогон и сравнить с результатами вручную " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mlflow.sklearn.autolog()\n", "\n", "with mlflow.start_run(run_name='auto', experiment_id=experiment_id) as run:\n", " pipeline.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "# Отключаем автологирование\n", "mlflow.sklearn.autolog(disable=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model #2\n", "Обучим вторую \"маленькую\" модель\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "regressor2 = RandomForestRegressor(n_estimators=10, max_depth=6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pipeline = Pipeline(steps=[('preprocessor', preprocessor), \n", " ('model', regressor2)])\n", "\n", "pipeline.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictions = pipeline.predict(X_test) \n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n", "\n", "metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !!! Проверить название прогона а также все логируемые параметры и артефакты, что они соответствуют второй \"маленькой\" модели. \n", "\n", "\n", "RUN_NAME = 'smaller_model'\n", "\n", "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(pipeline, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact(art)\n", " mlflow.log_params(pipeline.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# No model\n", "# Логировать можно только артефакты, без модели. Например, залогироавть графики после этапа EDA\n", "\n", "RUN_NAME = 'no_model'\n", "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " run_id = run.info.run_id \n", " mlflow.log_artifact(art)\n", "\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "run_id = '06fa7ec1f1b74aedb3509c88dc4ee1c0' # Указываем run id\n", "mlflow.register_model(f\"runs:/{run_id}/models\", REGISTRY_MODEL_NAME)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Можно регистрировать сразу при создании прогона\n", "\n", "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "\n", "with mlflow.start_run(run_name='register_at_run', experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(pipeline, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file,\n", " registered_model_name = REGISTRY_MODEL_NAME # Указываем для какой модели регистрируем\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact(art)\n", " mlflow.log_params(pipeline.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Можно найти зарегистрированные модели\n", "model_reg = mlflow.search_registered_models()\n", "model_reg[0]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "\n", "model_name = REGISTRY_MODEL_NAME\n", "model_version = 1\n", "\n", "model_loaded = mlflow.sklearn.load_model(model_uri=f\"models:/{model_name}/{model_version}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model_loaded.predict(X_test.iloc[0:1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_test.iloc[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Feature engineering" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sklearn" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import QuantileTransformer, SplineTransformer, PolynomialFeatures, MinMaxScaler" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "X_train_sklearn = X_train.copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### PolynomialFeatures\n", "Создает полином степени `degree` из указанных признаков\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "pf = PolynomialFeatures(degree=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_train_sklearn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.fit_transform(X_train_sklearn[['area','kitchen_area']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### SplineTransformer\n", "Cоздаёт новую матрицу признаков, состоящую из сплайнов порядка degree. Количество сгенерированных сплайнов равно `n_splines=n_knots + degree - 1` для каждого признака, где\n", "\n", "`n_knots` определяет количество узлов (точек, в которых сопрягаются сплайны) для каждого признака. \n", "\n", "`degree` определяет порядок полинома, используемого для построения сплайнов. " ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "sp = SplineTransformer(n_knots=3, degree=3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sp.fit_transform(X_train_sklearn[['area']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### QuantileTransformer\n", "Этот метод преобразует признаки, чтобы они распределялись равномерно или нормально — так данные меньше подвергаются влиянию выбросов. Преобразование применяется к каждому признаку независимо. Идея метода такова: оценить функцию распределения признака, чтобы преобразовать исходные значения в равномерное или нормальное распределение. \n", "\n", "`output_distribution='uniform'` или\n", "`output_distribution='normal'` соответственно\n", "\n", "\n", "Пример использования: если у вас есть данные о доходах с широким диапазоном значений, квантильное преобразование сделает их более сопоставимыми и устойчивыми к выбросам." ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "qt = QuantileTransformer()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qt.fit_transform(X_train_sklearn[['area']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Объединяем в ColumnTransformer и создаем Pipeline " ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "pf = PolynomialFeatures(degree=2)\n", "qt = QuantileTransformer()\n", "sp = SplineTransformer(n_knots=3, degree=3)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "# Значения преобразованных признаков нужно отскейлить, поэтому создаем pipeline из двух шагов - преобразование и скейлинг\n", "pf_pipeline = Pipeline(steps=[\n", " ('poly', pf),\n", " ('scale', StandardScaler())\n", "])" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "preprocessor_sklearn = ColumnTransformer(\n", " transformers=[\n", " ('num', s_scaler, num_features), # преобразования для числовых признаков\n", " ('cat', l_encoder, cat_features), # преобразования для категориальных признаков\n", " ('quantile', qt,num_features),\n", " ('poly', pf_pipeline, ['area', 'kitchen_area']), # В преобразования добавляем созданный ранее pipeline\n", " ('spline', sp, ['area'])\n", " ],\n", " remainder='drop',\n", " ) # Удаляем столбцы, которые не затронуты преобразования" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Посмотрим что из себя теперь представляет датафрейм" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "## не влезаем в float64 в полиномальном преобразовании. Использовать его нужно с умом!\n", "X_train_sklearn[['area', 'kitchen_area']] = X_train_sklearn[['area', 'kitchen_area']].astype('float128')\n", "X_train_sklearn[['area', 'kitchen_area']] = X_train_sklearn[['area', 'kitchen_area']].astype('float128')" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "X_train_sklearn_raw = preprocessor_sklearn.fit_transform(X_train_sklearn)\n", "X_train_sklearn = pd.DataFrame(X_train_sklearn_raw, columns=preprocessor_sklearn.get_feature_names_out())" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__geo_latnum__geo_lonnum__levelnum__levelsnum__roomsnum__areanum__kitchen_areacat__regioncat__building_typecat__object_typequantile__geo_latquantile__geo_lonquantile__levelquantile__levelsquantile__roomsquantile__areaquantile__kitchen_areapoly__1poly__areapoly__kitchen_areapoly__area^2poly__area kitchen_areapoly__kitchen_area^2spline__area_sp_0spline__area_sp_1spline__area_sp_2spline__area_sp_3spline__area_sp_4
00.495902-0.4497420.359235-0.2147890.2534130.063735-0.18628520.01.00.00.7662570.5110280.7172170.5365370.6006010.6236240.3748750.00.063735-0.186285-0.010002-0.132188-0.0027920.1558060.6661790.1780130.0000020.0
10.1778061.433673-0.246529-0.3677180.253413-0.114293-0.18628570.01.00.00.2971420.8679990.5220220.3868870.6006010.5415420.3748750.0-0.114293-0.186285-0.017375-0.169370-0.0027920.1569210.6662750.1768030.0000010.0
.......................................................................................
410773-0.748366-0.804077-0.6503710.7027880.2534131.3654411.50183352.03.00.00.1931430.1147530.3098100.7417420.6006010.9613670.9845350.01.3654411.5018330.0684381.5701630.0086160.1478200.6651590.1870110.0000100.0
4107741.257769-1.101815-0.0446080.0910701.1759110.553789-0.14254414.01.00.00.9080360.0757250.6046050.6456460.8673670.8418420.4364360.00.553789-0.1425440.014463-0.002742-0.0026490.1527670.6658600.1813700.0000040.0
\n", "

410775 rows × 28 columns

\n", "
" ], "text/plain": [ " num__geo_lat num__geo_lon num__level num__levels num__rooms \\\n", "0 0.495902 -0.449742 0.359235 -0.214789 0.253413 \n", "1 0.177806 1.433673 -0.246529 -0.367718 0.253413 \n", "... ... ... ... ... ... \n", "410773 -0.748366 -0.804077 -0.650371 0.702788 0.253413 \n", "410774 1.257769 -1.101815 -0.044608 0.091070 1.175911 \n", "\n", " num__area num__kitchen_area cat__region cat__building_type \\\n", "0 0.063735 -0.186285 20.0 1.0 \n", "1 -0.114293 -0.186285 70.0 1.0 \n", "... ... ... ... ... \n", "410773 1.365441 1.501833 52.0 3.0 \n", "410774 0.553789 -0.142544 14.0 1.0 \n", "\n", " cat__object_type quantile__geo_lat quantile__geo_lon \\\n", "0 0.0 0.766257 0.511028 \n", "1 0.0 0.297142 0.867999 \n", "... ... ... ... \n", "410773 0.0 0.193143 0.114753 \n", "410774 0.0 0.908036 0.075725 \n", "\n", " quantile__level quantile__levels quantile__rooms quantile__area \\\n", "0 0.717217 0.536537 0.600601 0.623624 \n", "1 0.522022 0.386887 0.600601 0.541542 \n", "... ... ... ... ... \n", "410773 0.309810 0.741742 0.600601 0.961367 \n", "410774 0.604605 0.645646 0.867367 0.841842 \n", "\n", " quantile__kitchen_area poly__1 poly__area poly__kitchen_area \\\n", "0 0.374875 0.0 0.063735 -0.186285 \n", "1 0.374875 0.0 -0.114293 -0.186285 \n", "... ... ... ... ... \n", "410773 0.984535 0.0 1.365441 1.501833 \n", "410774 0.436436 0.0 0.553789 -0.142544 \n", "\n", " poly__area^2 poly__area kitchen_area poly__kitchen_area^2 \\\n", "0 -0.010002 -0.132188 -0.002792 \n", "1 -0.017375 -0.169370 -0.002792 \n", "... ... ... ... \n", "410773 0.068438 1.570163 0.008616 \n", "410774 0.014463 -0.002742 -0.002649 \n", "\n", " spline__area_sp_0 spline__area_sp_1 spline__area_sp_2 \\\n", "0 0.155806 0.666179 0.178013 \n", "1 0.156921 0.666275 0.176803 \n", "... ... ... ... \n", "410773 0.147820 0.665159 0.187011 \n", "410774 0.152767 0.665860 0.181370 \n", "\n", " spline__area_sp_3 spline__area_sp_4 \n", "0 0.000002 0.0 \n", "1 0.000001 0.0 \n", "... ... ... \n", "410773 0.000010 0.0 \n", "410774 0.000004 0.0 \n", "\n", "[410775 rows x 28 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Удобно использовать для отображения всех строк\\столбцов в DataFrame\n", "with pd.option_context('display.max_rows', 5, 'display.max_columns', None):\n", " display (X_train_sklearn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Создаем пайплайн с препроцессингом и моделью" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pipeline_sklearn = Pipeline(steps=[\n", " ('transform', preprocessor_sklearn),\n", " ('model', regressor)\n", "])\n", "\n", "model_sklearn = pipeline_sklearn.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model_sklearn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictions = model_sklearn.predict(X_test) \n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n", "\n", "metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "RUN_NAME = 'fe_sklearn'\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(model_sklearn, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact(art)\n", " mlflow.log_params(model_sklearn.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Autofeat" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "from autofeat import AutoFeatRegressor\n", "transformations = [\"1/\", \"exp\", \"log\", \"abs\", \"sqrt\", \"^2\", \"^3\", \"1+\", \"1-\", \"sin\", \"cos\", \"exp-\", \"2^\"] " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "afreg = AutoFeatRegressor(verbose=1, feateng_steps=2, max_gb=8, transformations=[\"log\", \"sqrt\"],feateng_cols=num_features)\n", "X_train_arf = afreg.fit_transform(X_train,y_train)\n", "X_train_arf" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "# Создаем обертку, в которой добавляем метод get_feature_names_out() для получения названий признаков\n", "import numpy as np\n", "\n", "class AutoFeatWrapper():\n", " def __init__(self, feateng_cols, feateng_steps=1, max_gb=16, transformations=[\"1/\", \"exp\", \"log\"], n_jobs=-1, verbose=1):\n", " self.feateng_cols = feateng_cols\n", " self.feateng_steps = feateng_steps\n", " self.max_gb = max_gb\n", " self.transformations = transformations\n", " self.n_jobs = n_jobs\n", " self.afreg = AutoFeatRegressor(feateng_cols=self.feateng_cols,\n", " feateng_steps=self.feateng_steps,\n", " max_gb=self.max_gb,\n", " transformations=self.transformations,\n", " n_jobs=self.n_jobs)\n", " \n", " def fit(self, X, y=None):\n", " self.afreg.fit(X, y)\n", " return self\n", " \n", " def transform(self, X):\n", " return self.afreg.transform(X)\n", " \n", " def get_feature_names_out(self, input_features=None):\n", " # Преобразуем данные и возвращаем имена фичей из DataFrame\n", " transformed_X = self.afreg.transform(pd.DataFrame(np.zeros((1, len(self.feateng_cols))), columns=self.feateng_cols))\n", " return transformed_X.columns.tolist()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "afreg_pipeline = Pipeline(steps=[\n", " ('autofeat', AutoFeatWrapper( feateng_steps=2, max_gb=16, transformations=[\"log\", \"sqrt\"],feateng_cols=num_features)),\n", " ('scaler', StandardScaler()),\n", "])" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "preprocessor_afr = ColumnTransformer(\n", " transformers=[\n", " ('num', s_scaler, num_features), # преобразования для числовых признаков\n", " ('cat', l_encoder, cat_features), # преобразования для категориальных признаков\n", " ('afr', afreg_pipeline, num_features), # преобразования autofeat\n", " ],\n", " remainder='drop', # Удаляем столбцы, которые не затронуты преобразованиями\n", " ) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_train_afr_raw = preprocessor_afr.fit_transform(X_train,y_train)\n", "X_train_afr = pd.DataFrame(X_train_afr_raw, columns=preprocessor_afr.get_feature_names_out())" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__geo_latnum__geo_lonnum__levelnum__levelsnum__roomsnum__areanum__kitchen_areacat__regioncat__building_typecat__object_typeafr__geo_latafr__geo_lonafr__levelafr__levelsafr__roomsafr__areaafr__kitchen_areaafr__area*roomsafr__area*geo_lonafr__levels*roomsafr__area*kitchen_areaafr__sqrt(area)*geo_latafr__sqrt(area)*log(level)afr__kitchen_area*log(level)afr__sqrt(area)*kitchen_areaafr__geo_lon*log(kitchen_area)afr__sqrt(area)*sqrt(kitchen_area)afr__sqrt(geo_lon)*sqrt(kitchen_area)afr__log(area)afr__rooms*log(level)afr__kitchen_area*roomsafr__kitchen_area*levelsafr__sqrt(geo_lon)*sqrt(level)afr__area**(3/2)afr__geo_lat*log(kitchen_area)afr__geo_lat*log(geo_lon)
00.495902-0.4497420.359235-0.2147890.2534130.063735-0.18628520.01.00.00.495902-0.4497420.359235-0.2147890.2534130.063735-0.1862850.006208-0.1951290.060916-0.1321880.3731510.6880760.044178-0.211335-0.481294-0.153548-0.4908050.3078350.690329-0.132529-0.3528340.323880-0.008748-0.0315290.068167
10.1778061.433673-0.246529-0.3677180.253413-0.114293-0.18628570.01.00.00.1778061.433673-0.246529-0.3677180.253413-0.114293-0.186285-0.0834020.655053-0.054279-0.1693700.0051140.071369-0.173647-0.2527751.191304-0.2672680.6157980.0319070.282625-0.132529-0.4186430.552794-0.056540-0.1438291.129118
...............................................................................................................
410773-0.748366-0.804077-0.6503710.7027880.2534131.3654411.50183352.03.00.0-0.748366-0.804077-0.6503710.7027880.2534131.3654411.5018330.6614270.3751990.7520881.5701631.274445-0.0025210.7455072.3822580.0715992.8288901.4312721.729715-0.1604911.5814362.432437-0.8431500.4114751.671069-1.052343
4107741.257769-1.101815-0.0446080.0910701.1759110.553789-0.14254414.01.00.01.257769-1.101815-0.0446080.0910701.1759110.553789-0.1425440.807887-0.3300700.982478-0.0027421.3389960.635065-0.040302-0.055435-1.0255880.202136-0.9160540.9406241.2179100.311575-0.174762-0.4153590.1356170.359680-0.246790
\n", "

410775 rows × 36 columns

\n", "
" ], "text/plain": [ " num__geo_lat num__geo_lon num__level num__levels num__rooms \\\n", "0 0.495902 -0.449742 0.359235 -0.214789 0.253413 \n", "1 0.177806 1.433673 -0.246529 -0.367718 0.253413 \n", "... ... ... ... ... ... \n", "410773 -0.748366 -0.804077 -0.650371 0.702788 0.253413 \n", "410774 1.257769 -1.101815 -0.044608 0.091070 1.175911 \n", "\n", " num__area num__kitchen_area cat__region cat__building_type \\\n", "0 0.063735 -0.186285 20.0 1.0 \n", "1 -0.114293 -0.186285 70.0 1.0 \n", "... ... ... ... ... \n", "410773 1.365441 1.501833 52.0 3.0 \n", "410774 0.553789 -0.142544 14.0 1.0 \n", "\n", " cat__object_type afr__geo_lat afr__geo_lon afr__level afr__levels \\\n", "0 0.0 0.495902 -0.449742 0.359235 -0.214789 \n", "1 0.0 0.177806 1.433673 -0.246529 -0.367718 \n", "... ... ... ... ... ... \n", "410773 0.0 -0.748366 -0.804077 -0.650371 0.702788 \n", "410774 0.0 1.257769 -1.101815 -0.044608 0.091070 \n", "\n", " afr__rooms afr__area afr__kitchen_area afr__area*rooms \\\n", "0 0.253413 0.063735 -0.186285 0.006208 \n", "1 0.253413 -0.114293 -0.186285 -0.083402 \n", "... ... ... ... ... \n", "410773 0.253413 1.365441 1.501833 0.661427 \n", "410774 1.175911 0.553789 -0.142544 0.807887 \n", "\n", " afr__area*geo_lon afr__levels*rooms afr__area*kitchen_area \\\n", "0 -0.195129 0.060916 -0.132188 \n", "1 0.655053 -0.054279 -0.169370 \n", "... ... ... ... \n", "410773 0.375199 0.752088 1.570163 \n", "410774 -0.330070 0.982478 -0.002742 \n", "\n", " afr__sqrt(area)*geo_lat afr__sqrt(area)*log(level) \\\n", "0 0.373151 0.688076 \n", "1 0.005114 0.071369 \n", "... ... ... \n", "410773 1.274445 -0.002521 \n", "410774 1.338996 0.635065 \n", "\n", " afr__kitchen_area*log(level) afr__sqrt(area)*kitchen_area \\\n", "0 0.044178 -0.211335 \n", "1 -0.173647 -0.252775 \n", "... ... ... \n", "410773 0.745507 2.382258 \n", "410774 -0.040302 -0.055435 \n", "\n", " afr__geo_lon*log(kitchen_area) afr__sqrt(area)*sqrt(kitchen_area) \\\n", "0 -0.481294 -0.153548 \n", "1 1.191304 -0.267268 \n", "... ... ... \n", "410773 0.071599 2.828890 \n", "410774 -1.025588 0.202136 \n", "\n", " afr__sqrt(geo_lon)*sqrt(kitchen_area) afr__log(area) \\\n", "0 -0.490805 0.307835 \n", "1 0.615798 0.031907 \n", "... ... ... \n", "410773 1.431272 1.729715 \n", "410774 -0.916054 0.940624 \n", "\n", " afr__rooms*log(level) afr__kitchen_area*rooms \\\n", "0 0.690329 -0.132529 \n", "1 0.282625 -0.132529 \n", "... ... ... \n", "410773 -0.160491 1.581436 \n", "410774 1.217910 0.311575 \n", "\n", " afr__kitchen_area*levels afr__sqrt(geo_lon)*sqrt(level) \\\n", "0 -0.352834 0.323880 \n", "1 -0.418643 0.552794 \n", "... ... ... \n", "410773 2.432437 -0.843150 \n", "410774 -0.174762 -0.415359 \n", "\n", " afr__area**(3/2) afr__geo_lat*log(kitchen_area) \\\n", "0 -0.008748 -0.031529 \n", "1 -0.056540 -0.143829 \n", "... ... ... \n", "410773 0.411475 1.671069 \n", "410774 0.135617 0.359680 \n", "\n", " afr__geo_lat*log(geo_lon) \n", "0 0.068167 \n", "1 1.129118 \n", "... ... \n", "410773 -1.052343 \n", "410774 -0.246790 \n", "\n", "[410775 rows x 36 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "with pd.option_context('display.max_rows', 5, 'display.max_columns', None):\n", " display (X_train_afr)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pipeline_afr = Pipeline(steps=[('preprocessor', preprocessor_afr), \n", " ('model', regressor)])\n", "\n", "pipeline_afr.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictions = pipeline_afr.predict(X_test) \n", "\n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n", "\n", "metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "\n", "with mlflow.start_run(run_name='autofeat', experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(pipeline_afr, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact(art)\n", " mlflow.log_params(pipeline_afr.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# FEATURE SELECTION\n", "## RFE\n", "### Используем autofeat признаки\n", "Поскольку autofeat дает разные совокупности сгенерированных признаков, мы можем добавить выбор информативных только как шаг пайплайна " ] }, { "cell_type": "code", "execution_count": 294, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__geo_latnum__geo_lonnum__levelnum__levelsnum__roomsnum__areanum__kitchen_areacat__regioncat__building_typecat__object_type...afr__sqrt(area)*sqrt(kitchen_area)afr__sqrt(geo_lon)*sqrt(kitchen_area)afr__log(area)afr__rooms*log(level)afr__kitchen_area*roomsafr__kitchen_area*levelsafr__sqrt(geo_lon)*sqrt(level)afr__area**(3/2)afr__geo_lat*log(kitchen_area)afr__geo_lat*log(geo_lon)
00.495902-0.4497420.359235-0.2147890.2534130.063735-0.18628520.01.00.0...-0.153548-0.4908050.3078350.690329-0.132529-0.3528340.323880-0.008748-0.0315290.068167
10.1778061.433673-0.246529-0.3677180.253413-0.114293-0.18628570.01.00.0...-0.2672680.6157980.0319070.282625-0.132529-0.4186430.552794-0.056540-0.1438291.129118
20.4405480.047222-0.448450-0.367718-0.669085-0.456947-0.14254415.03.01.0...-0.454880-0.067183-0.603122-0.512211-0.487813-0.383803-0.243092-0.1408000.0634640.460495
3-1.588818-0.722477-0.246529-0.9794360.253413-0.181292-0.14254418.01.00.0...-0.254514-0.607607-0.0803040.282625-0.088119-0.662523-0.369355-0.073838-0.672113-1.481033
41.4936621.1258190.1573130.5498580.2534130.615045-0.01132210.02.00.0...0.4386000.8913831.0096120.5744970.0451120.2084780.9459810.1549020.7808551.923382
..................................................................
4107700.5920110.3550140.5611561.0086460.253413-0.079836-0.09265354.02.00.0...-0.1200350.2375800.0877250.792500-0.0374630.3227970.974381-0.0474960.2430180.789871
4107710.2404780.392697-0.650371-0.9794360.253413-0.334434-0.40498945.03.00.0...-0.716150-0.510766-0.357277-0.160491-0.354582-0.778657-0.406361-0.111897-0.8081570.574534
410772-1.936771-0.6888300.3592350.855717-0.669085-0.456947-0.14254418.00.01.0...-0.454880-0.581851-0.603122-0.211576-0.4878130.1736380.170166-0.140800-0.798234-1.663294
410773-0.748366-0.804077-0.6503710.7027880.2534131.3654411.50183352.03.00.0...2.8288901.4312721.729715-0.1604911.5814362.432437-0.8431500.4114751.671069-1.052343
4107741.257769-1.101815-0.0446080.0910701.1759110.553789-0.14254414.01.00.0...0.202136-0.9160540.9406241.2179100.311575-0.174762-0.4153590.1356170.359680-0.246790
\n", "

410775 rows × 36 columns

\n", "
" ], "text/plain": [ " num__geo_lat num__geo_lon num__level num__levels num__rooms \\\n", "0 0.495902 -0.449742 0.359235 -0.214789 0.253413 \n", "1 0.177806 1.433673 -0.246529 -0.367718 0.253413 \n", "2 0.440548 0.047222 -0.448450 -0.367718 -0.669085 \n", "3 -1.588818 -0.722477 -0.246529 -0.979436 0.253413 \n", "4 1.493662 1.125819 0.157313 0.549858 0.253413 \n", "... ... ... ... ... ... \n", "410770 0.592011 0.355014 0.561156 1.008646 0.253413 \n", "410771 0.240478 0.392697 -0.650371 -0.979436 0.253413 \n", "410772 -1.936771 -0.688830 0.359235 0.855717 -0.669085 \n", "410773 -0.748366 -0.804077 -0.650371 0.702788 0.253413 \n", "410774 1.257769 -1.101815 -0.044608 0.091070 1.175911 \n", "\n", " num__area num__kitchen_area cat__region cat__building_type \\\n", "0 0.063735 -0.186285 20.0 1.0 \n", "1 -0.114293 -0.186285 70.0 1.0 \n", "2 -0.456947 -0.142544 15.0 3.0 \n", "3 -0.181292 -0.142544 18.0 1.0 \n", "4 0.615045 -0.011322 10.0 2.0 \n", "... ... ... ... ... \n", "410770 -0.079836 -0.092653 54.0 2.0 \n", "410771 -0.334434 -0.404989 45.0 3.0 \n", "410772 -0.456947 -0.142544 18.0 0.0 \n", "410773 1.365441 1.501833 52.0 3.0 \n", "410774 0.553789 -0.142544 14.0 1.0 \n", "\n", " cat__object_type ... afr__sqrt(area)*sqrt(kitchen_area) \\\n", "0 0.0 ... -0.153548 \n", "1 0.0 ... -0.267268 \n", "2 1.0 ... -0.454880 \n", "3 0.0 ... -0.254514 \n", "4 0.0 ... 0.438600 \n", "... ... ... ... \n", "410770 0.0 ... -0.120035 \n", "410771 0.0 ... -0.716150 \n", "410772 1.0 ... -0.454880 \n", "410773 0.0 ... 2.828890 \n", "410774 0.0 ... 0.202136 \n", "\n", " afr__sqrt(geo_lon)*sqrt(kitchen_area) afr__log(area) \\\n", "0 -0.490805 0.307835 \n", "1 0.615798 0.031907 \n", "2 -0.067183 -0.603122 \n", "3 -0.607607 -0.080304 \n", "4 0.891383 1.009612 \n", "... ... ... \n", "410770 0.237580 0.087725 \n", "410771 -0.510766 -0.357277 \n", "410772 -0.581851 -0.603122 \n", "410773 1.431272 1.729715 \n", "410774 -0.916054 0.940624 \n", "\n", " afr__rooms*log(level) afr__kitchen_area*rooms \\\n", "0 0.690329 -0.132529 \n", "1 0.282625 -0.132529 \n", "2 -0.512211 -0.487813 \n", "3 0.282625 -0.088119 \n", "4 0.574497 0.045112 \n", "... ... ... \n", "410770 0.792500 -0.037463 \n", "410771 -0.160491 -0.354582 \n", "410772 -0.211576 -0.487813 \n", "410773 -0.160491 1.581436 \n", "410774 1.217910 0.311575 \n", "\n", " afr__kitchen_area*levels afr__sqrt(geo_lon)*sqrt(level) \\\n", "0 -0.352834 0.323880 \n", "1 -0.418643 0.552794 \n", "2 -0.383803 -0.243092 \n", "3 -0.662523 -0.369355 \n", "4 0.208478 0.945981 \n", "... ... ... \n", "410770 0.322797 0.974381 \n", "410771 -0.778657 -0.406361 \n", "410772 0.173638 0.170166 \n", "410773 2.432437 -0.843150 \n", "410774 -0.174762 -0.415359 \n", "\n", " afr__area**(3/2) afr__geo_lat*log(kitchen_area) \\\n", "0 -0.008748 -0.031529 \n", "1 -0.056540 -0.143829 \n", "2 -0.140800 0.063464 \n", "3 -0.073838 -0.672113 \n", "4 0.154902 0.780855 \n", "... ... ... \n", "410770 -0.047496 0.243018 \n", "410771 -0.111897 -0.808157 \n", "410772 -0.140800 -0.798234 \n", "410773 0.411475 1.671069 \n", "410774 0.135617 0.359680 \n", "\n", " afr__geo_lat*log(geo_lon) \n", "0 0.068167 \n", "1 1.129118 \n", "2 0.460495 \n", "3 -1.481033 \n", "4 1.923382 \n", "... ... \n", "410770 0.789871 \n", "410771 0.574534 \n", "410772 -1.663294 \n", "410773 -1.052343 \n", "410774 -0.246790 \n", "\n", "[410775 rows x 36 columns]" ] }, "execution_count": 294, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.feature_selection import RFE\n", "X_train_afr" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "rfe_selector = RFE(estimator=regressor, n_features_to_select=12, step = 0.2) #drop 20% of features each iteration\n", "X_train_rfe = rfe_selector.fit_transform(X_train_afr,y_train)" ] }, { "cell_type": "code", "execution_count": 297, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__geo_latnum__geo_lonafr__geo_lonafr__area*kitchen_areaafr__sqrt(area)*geo_latafr__sqrt(area)*log(level)afr__kitchen_area*log(level)afr__sqrt(area)*sqrt(kitchen_area)afr__rooms*log(level)afr__kitchen_area*roomsafr__sqrt(geo_lon)*sqrt(level)afr__geo_lat*log(geo_lon)
00.495902-0.449742-0.449742-0.1321880.3731510.6880760.044178-0.1535480.690329-0.1325290.3238800.068167
10.1778061.4336731.433673-0.1693700.0051140.071369-0.173647-0.2672680.282625-0.1325290.5527941.129118
20.4405480.0472220.047222-0.226261-0.425530-0.335537-0.239271-0.454880-0.512211-0.487813-0.2430920.460495
3-1.588818-0.722477-0.722477-0.165302-0.7232250.034116-0.129771-0.2545140.282625-0.088119-0.369355-1.481033
41.4936621.1258191.1258190.0943421.5222650.8627730.1944900.4386000.5744970.0451120.9459811.923382
.......................................
4107700.5920110.3550140.355014-0.1208410.2069260.7144990.226990-0.1200350.792500-0.0374630.9743810.789871
4107710.2404780.3926970.392697-0.296252-0.297209-0.551021-0.560144-0.716150-0.160491-0.354582-0.4063610.574534
410772-1.936771-0.688830-0.688830-0.226261-1.1927060.3062800.100868-0.454880-0.211576-0.4878130.170166-1.663294
410773-0.748366-0.804077-0.8040771.5701631.274445-0.0025210.7455072.828890-0.1604911.581436-0.843150-1.052343
4107741.257769-1.101815-1.101815-0.0027421.3389960.635065-0.0403020.2021361.2179100.311575-0.415359-0.246790
\n", "

410775 rows × 12 columns

\n", "
" ], "text/plain": [ " num__geo_lat num__geo_lon afr__geo_lon afr__area*kitchen_area \\\n", "0 0.495902 -0.449742 -0.449742 -0.132188 \n", "1 0.177806 1.433673 1.433673 -0.169370 \n", "2 0.440548 0.047222 0.047222 -0.226261 \n", "3 -1.588818 -0.722477 -0.722477 -0.165302 \n", "4 1.493662 1.125819 1.125819 0.094342 \n", "... ... ... ... ... \n", "410770 0.592011 0.355014 0.355014 -0.120841 \n", "410771 0.240478 0.392697 0.392697 -0.296252 \n", "410772 -1.936771 -0.688830 -0.688830 -0.226261 \n", "410773 -0.748366 -0.804077 -0.804077 1.570163 \n", "410774 1.257769 -1.101815 -1.101815 -0.002742 \n", "\n", " afr__sqrt(area)*geo_lat afr__sqrt(area)*log(level) \\\n", "0 0.373151 0.688076 \n", "1 0.005114 0.071369 \n", "2 -0.425530 -0.335537 \n", "3 -0.723225 0.034116 \n", "4 1.522265 0.862773 \n", "... ... ... \n", "410770 0.206926 0.714499 \n", "410771 -0.297209 -0.551021 \n", "410772 -1.192706 0.306280 \n", "410773 1.274445 -0.002521 \n", "410774 1.338996 0.635065 \n", "\n", " afr__kitchen_area*log(level) afr__sqrt(area)*sqrt(kitchen_area) \\\n", "0 0.044178 -0.153548 \n", "1 -0.173647 -0.267268 \n", "2 -0.239271 -0.454880 \n", "3 -0.129771 -0.254514 \n", "4 0.194490 0.438600 \n", "... ... ... \n", "410770 0.226990 -0.120035 \n", "410771 -0.560144 -0.716150 \n", "410772 0.100868 -0.454880 \n", "410773 0.745507 2.828890 \n", "410774 -0.040302 0.202136 \n", "\n", " afr__rooms*log(level) afr__kitchen_area*rooms \\\n", "0 0.690329 -0.132529 \n", "1 0.282625 -0.132529 \n", "2 -0.512211 -0.487813 \n", "3 0.282625 -0.088119 \n", "4 0.574497 0.045112 \n", "... ... ... \n", "410770 0.792500 -0.037463 \n", "410771 -0.160491 -0.354582 \n", "410772 -0.211576 -0.487813 \n", "410773 -0.160491 1.581436 \n", "410774 1.217910 0.311575 \n", "\n", " afr__sqrt(geo_lon)*sqrt(level) afr__geo_lat*log(geo_lon) \n", "0 0.323880 0.068167 \n", "1 0.552794 1.129118 \n", "2 -0.243092 0.460495 \n", "3 -0.369355 -1.481033 \n", "4 0.945981 1.923382 \n", "... ... ... \n", "410770 0.974381 0.789871 \n", "410771 -0.406361 0.574534 \n", "410772 0.170166 -1.663294 \n", "410773 -0.843150 -1.052343 \n", "410774 -0.415359 -0.246790 \n", "\n", "[410775 rows x 12 columns]" ] }, "execution_count": 297, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train_afr_rfe = pd.DataFrame(X_train_rfe, columns=rfe_selector.get_feature_names_out())\n", "X_train_afr_rfe" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "rfe_pipeline = Pipeline(steps=[\n", " ('preprocessor', preprocessor_afr), \n", " ('rfe_extractor', RFE(estimator=regressor, n_features_to_select=12, step = 0.2)),\n", " ('model', regressor)\n", "])\n", "\n", "rfe_pipeline.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 301, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mae': 1431925.3203264712,\n", " 'mape': 1.239752923791043e+18,\n", " 'mse': 261947924998018.2}" ] }, "execution_count": 301, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions_rfe = rfe_pipeline.predict(X_test)\n", "\n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions_rfe) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions_rfe)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions_rfe)\n", "\n", "metrics" ] }, { "cell_type": "code", "execution_count": 302, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 40.15it/s]\n", "2024/10/17 14:26:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run rfe_feature_selection at: http://127.0.0.1:5000/#/experiments/1/runs/96f0bbcd6d88466abcf38f3b53f06ff1.\n", "2024/10/17 14:26:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1.\n" ] } ], "source": [ "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "RUN_NAME = 'rfe_feature_selection'\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(rfe_pipeline, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact(art)\n", " mlflow.log_params(model_sklearn.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Используем sklearn признаки\n", "Тут мы можем отобрать признаки один раз на обучении, а далее в качестве шага пайплайна использовать написанный класс ColumnExtractor для выбора нуных столбцов" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "rfe_skl_selector = RFE(estimator=regressor, n_features_to_select=12, step = 0.2) #drop 20% of features each iteration\n", "X_train_skl_rfe = rfe_skl_selector.fit_transform(X_train_sklearn,y_train)" ] }, { "cell_type": "code", "execution_count": 305, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__geo_latnum__geo_lonnum__levelnum__roomsnum__kitchen_areacat__regionquantile__geo_latquantile__geo_lonquantile__levelpoly__area kitchen_areaspline__area_sp_0spline__area_sp_2
00.495902-0.4497420.3592350.253413-0.18628520.00.7662570.5110280.717217-0.1321880.1558060.178013
10.1778061.433673-0.2465290.253413-0.18628570.00.2971420.8679990.522022-0.1693700.1569210.176803
20.4405480.047222-0.448450-0.669085-0.14254415.00.7323300.6299840.417417-0.2262610.1590800.174488
3-1.588818-0.722477-0.2465290.253413-0.14254418.00.1487890.2952620.522022-0.1653020.1573410.176349
41.4936621.1258190.1573130.253413-0.01132210.00.9859370.7583630.6626630.0943420.1523900.181792
.......................................
4107700.5920110.3550140.5611560.253413-0.09265354.00.7883930.6867280.771271-0.1208410.1567050.177037
4107710.2404780.392697-0.6503710.253413-0.40498945.00.4940620.7172400.309810-0.2962520.1583060.175314
410772-1.936771-0.6888300.359235-0.669085-0.14254418.00.1313520.3276130.717217-0.2262610.1590800.174488
410773-0.748366-0.804077-0.6503710.2534131.50183352.00.1931430.1147530.3098101.5701630.1478200.187011
4107741.257769-1.101815-0.0446081.175911-0.14254414.00.9080360.0757250.604605-0.0027420.1527670.181370
\n", "

410775 rows × 12 columns

\n", "
" ], "text/plain": [ " num__geo_lat num__geo_lon num__level num__rooms num__kitchen_area \\\n", "0 0.495902 -0.449742 0.359235 0.253413 -0.186285 \n", "1 0.177806 1.433673 -0.246529 0.253413 -0.186285 \n", "2 0.440548 0.047222 -0.448450 -0.669085 -0.142544 \n", "3 -1.588818 -0.722477 -0.246529 0.253413 -0.142544 \n", "4 1.493662 1.125819 0.157313 0.253413 -0.011322 \n", "... ... ... ... ... ... \n", "410770 0.592011 0.355014 0.561156 0.253413 -0.092653 \n", "410771 0.240478 0.392697 -0.650371 0.253413 -0.404989 \n", "410772 -1.936771 -0.688830 0.359235 -0.669085 -0.142544 \n", "410773 -0.748366 -0.804077 -0.650371 0.253413 1.501833 \n", "410774 1.257769 -1.101815 -0.044608 1.175911 -0.142544 \n", "\n", " cat__region quantile__geo_lat quantile__geo_lon quantile__level \\\n", "0 20.0 0.766257 0.511028 0.717217 \n", "1 70.0 0.297142 0.867999 0.522022 \n", "2 15.0 0.732330 0.629984 0.417417 \n", "3 18.0 0.148789 0.295262 0.522022 \n", "4 10.0 0.985937 0.758363 0.662663 \n", "... ... ... ... ... \n", "410770 54.0 0.788393 0.686728 0.771271 \n", "410771 45.0 0.494062 0.717240 0.309810 \n", "410772 18.0 0.131352 0.327613 0.717217 \n", "410773 52.0 0.193143 0.114753 0.309810 \n", "410774 14.0 0.908036 0.075725 0.604605 \n", "\n", " poly__area kitchen_area spline__area_sp_0 spline__area_sp_2 \n", "0 -0.132188 0.155806 0.178013 \n", "1 -0.169370 0.156921 0.176803 \n", "2 -0.226261 0.159080 0.174488 \n", "3 -0.165302 0.157341 0.176349 \n", "4 0.094342 0.152390 0.181792 \n", "... ... ... ... \n", "410770 -0.120841 0.156705 0.177037 \n", "410771 -0.296252 0.158306 0.175314 \n", "410772 -0.226261 0.159080 0.174488 \n", "410773 1.570163 0.147820 0.187011 \n", "410774 -0.002742 0.152767 0.181370 \n", "\n", "[410775 rows x 12 columns]" ] }, "execution_count": 305, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train_skl_rfe = pd.DataFrame(X_train_skl_rfe, columns=rfe_skl_selector.get_feature_names_out())\n", "X_train_skl_rfe" ] }, { "cell_type": "code", "execution_count": 306, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['num__geo_lat',\n", " 'num__geo_lon',\n", " 'num__level',\n", " 'num__rooms',\n", " 'num__kitchen_area',\n", " 'cat__region',\n", " 'quantile__geo_lat',\n", " 'quantile__geo_lon',\n", " 'quantile__level',\n", " 'poly__area kitchen_area',\n", " 'spline__area_sp_0',\n", " 'spline__area_sp_2']" ] }, "execution_count": 306, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rfe_cols = X_train_skl_rfe.columns.tolist()\n", "rfe_cols" ] }, { "cell_type": "code", "execution_count": 307, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ True, True, True, False, True, False, True, True, False,\n", " False, True, True, True, False, False, False, False, False,\n", " False, False, False, True, False, True, False, True, False,\n", " False])" ] }, "execution_count": 307, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rfe_idx = rfe_skl_selector.support_\n", "rfe_idx" ] }, { "cell_type": "code", "execution_count": 316, "metadata": {}, "outputs": [], "source": [ "# Отбираемые столбцы нужно залогировать, иначе мы потеряем информацию о том, какие призныки выбраны\n", "with open('rfe_skl_idx.txt', 'w+') as f:\n", " f.write(str(rfe_idx))\n", "with open('rfe_skl_cols.txt', 'w+') as f:\n", " f.write(str(rfe_cols))" ] }, { "cell_type": "code", "execution_count": 309, "metadata": {}, "outputs": [], "source": [ "class ColumnExtractor(object):\n", "\n", " def __init__(self, cols):\n", " self.cols = cols\n", "\n", " def transform(self, X):\n", " return X[:,self.cols]\n", " \n", " def fit(self, X, y=None):\n", " return self\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "rfe_skl_pipeline = Pipeline(steps=[\n", " ('preprocessor', preprocessor_sklearn), \n", " ('rfe_extractor', ColumnExtractor(rfe_idx)),\n", " ('model', regressor)\n", "])\n", "\n", "rfe_skl_pipeline.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 311, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 193.34it/s]\n", "2024/10/17 14:32:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run rfe_skl_feature_selection at: http://127.0.0.1:5000/#/experiments/1/runs/e55206caeb1549e4aa0d98343d5c1d4d.\n", "2024/10/17 14:32:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1.\n" ] } ], "source": [ "predictions_rfe_skl = rfe_skl_pipeline.predict(X_test)\n", "\n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions_rfe_skl) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions_rfe_skl)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions_rfe_skl)\n", "\n", "metrics\n", "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "RUN_NAME = 'rfe_skl_feature_selection'\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(rfe_pipeline, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact('rfe_skl_cols.txt')\n", " mlflow.log_artifact('rfe_skl_idx.txt')\n", " mlflow.log_params(model_sklearn.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## mlextend\n", "https://github.com/rasbt/mlxtend/blob/master/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb " ] }, { "cell_type": "code", "execution_count": 312, "metadata": {}, "outputs": [], "source": [ "from mlxtend.feature_selection import SequentialFeatureSelector \n", "#from sklearn.feature_selection import SequentialFeatureSelector" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sfs = SequentialFeatureSelector(RandomForestRegressor(n_estimators=3), \n", " k_features=3,\n", " forward=True,\n", " floating=False, # True to drop selected features\n", " scoring='neg_mean_absolute_error',\n", " cv=2)\n", "\n", "sfs.fit(X_train_sklearn,y_train)" ] }, { "cell_type": "code", "execution_count": 314, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__geo_lonquantile__geo_latspline__area_sp_3
0-0.4497420.7662571.826008e-06
11.4336730.2971421.310449e-06
20.0472220.7323306.098363e-07
3-0.7224770.1487891.144942e-06
41.1258190.9859374.240047e-06
............
4107700.3550140.7883931.401454e-06
4107710.3926970.4940628.202272e-07
410772-0.6888300.1313526.098363e-07
410773-0.8040770.1931431.004843e-05
410774-1.1018150.9080363.903343e-06
\n", "

410775 rows × 3 columns

\n", "
" ], "text/plain": [ " num__geo_lon quantile__geo_lat spline__area_sp_3\n", "0 -0.449742 0.766257 1.826008e-06\n", "1 1.433673 0.297142 1.310449e-06\n", "2 0.047222 0.732330 6.098363e-07\n", "3 -0.722477 0.148789 1.144942e-06\n", "4 1.125819 0.985937 4.240047e-06\n", "... ... ... ...\n", "410770 0.355014 0.788393 1.401454e-06\n", "410771 0.392697 0.494062 8.202272e-07\n", "410772 -0.688830 0.131352 6.098363e-07\n", "410773 -0.804077 0.193143 1.004843e-05\n", "410774 -1.101815 0.908036 3.903343e-06\n", "\n", "[410775 rows x 3 columns]" ] }, "execution_count": 314, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_features_sfs = X_train_sklearn.loc[:, sfs.k_feature_names_]\n", "selected_features_sfs" ] }, { "cell_type": "code", "execution_count": 315, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['num__geo_lon', 'quantile__geo_lat', 'spline__area_sp_3']" ] }, "execution_count": 315, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rfe_sfs_idx = list(sfs.k_feature_idx_)\n", "rfe_sfs_idx\n", "rfe_sfs_col = list(sfs.k_feature_names_)\n", "rfe_sfs_col" ] }, { "cell_type": "code", "execution_count": 317, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs\n", "\n", "fig = plot_sfs(sfs.get_metric_dict(), kind='std_dev')\n", "\n", "plt.title('Sequential Forward Selection (w. StdDev)')\n", "plt.grid()\n", "plt.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "rfe_sfs_pipeline = Pipeline(steps=[\n", " ('preprocessor', preprocessor_sklearn), \n", " ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n", " ('model', regressor)\n", "])\n", "\n", "rfe_sfs_pipeline.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictions_sfs = rfe_sfs_pipeline.predict(X_test)\n", "\n", "metrics = {}\n", "metrics[\"mae\"] = mean_absolute_error(y_test, predictions_sfs) \n", "metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions_sfs)\n", "metrics[\"mse\"] = mean_squared_error(y_test, predictions_sfs)\n", "\n", "metrics\n", "experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", "RUN_NAME = 'rfe_sfs_feature_selection'\n", "\n", "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", " # получаем уникальный идентификатор запуска эксперимента\n", " run_id = run.info.run_id \n", " mlflow.sklearn.log_model(rfe_sfs_pipeline, \n", " artifact_path=\"models\",\n", " signature=signature,\n", " input_example=input_example,\n", " pip_requirements=req_file\n", " )\n", " mlflow.log_metrics(metrics)\n", " mlflow.log_artifact('rfe_skl_cols.txt')\n", " mlflow.log_artifact('rfe_skl_idx.txt')\n", " mlflow.log_params(model_sklearn.get_params())\n", "\n", "run = mlflow.get_run(run_id) \n", "assert (run.info.status =='FINISHED')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "Можно совмещать признаки, выбранные по sfs и sbs: брать их объединение или пересечение. Можно комбинировать с признаками, выделенными разными подходами - целое поле для исследований" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# HYPERPARAMS\n", "## Gridsearch" ] }, { "cell_type": "code", "execution_count": 224, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import GridSearchCV" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "param_grid = {\n", " 'model__depth': [1,3,5]\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gs = GridSearchCV(rfe_sfs_pipeline, param_grid, cv=2, scoring='neg_mean_absolute_error')\n", "gs.fit(X_train, y_train)\n", "print(\"Лучшие гиперпараметры:\", gs.best_params_)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gs_pipeline = Pipeline(steps=[\n", " ('preprocessor', preprocessor_sklearn), \n", " ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n", " ('model', CatBoostRegressor(depth=5))\n", "])\n", "\n", "# Проведем стандартную проверку на тестовом множестве и залогируем run" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Вместо GridSearch можно использовать RandomSearch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Optuna" ] }, { "cell_type": "code", "execution_count": 292, "metadata": {}, "outputs": [], "source": [ "import optuna" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def objective(trial):\n", " # предлагаем гиперпараметры\n", " depth = trial.suggest_int('depth', 1, 10)\n", " learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1)\n", "\n", " # создаём и обучаем модель\n", " opt_pipeline = Pipeline(steps=[\n", " ('preprocessor', preprocessor_sklearn), \n", " ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n", " ('model', CatBoostRegressor(depth=depth, learning_rate=learning_rate, verbose=0))\n", " ])\n", "\n", " opt_pipeline.fit(X_train, y_train)\n", "\n", " # предсказываем и вычисляем RMSE\n", " preds = opt_pipeline.predict(X_test)\n", " mae = mean_absolute_error(y_test, preds) \n", "\n", " return mae" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "study = optuna.create_study(direction='minimize')\n", "study.optimize(objective, n_trials=10)\n", "\n", "# выводим результаты\n", "print('Number of finished trials:', len(study.trials))\n", "print('Best trial:', study.best_trial.params) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "opt_pipeline = Pipeline(steps=[\n", " ('preprocessor', preprocessor_sklearn), \n", " ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n", " ('model', CatBoostRegressor(depth=3, learning_rate=0.02789))\n", "])\n", "\n", "# Проведем стандартную проверку на тестовом множестве и залогируем run" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Выбираем лучшую модель.\n", "Обучаем ее на всей выборке (а не только на train-части). \n", "Далее будем деплоить именно её" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }