Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

4789 строки
258 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"import pandas as pd\n",
"import numpy\n",
"\n",
"from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder\n",
"\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.linear_model import LinearRegression\n",
"from catboost import CatBoostRegressor\n",
"\n",
"from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 547701 entries, 313199 to 690900\n",
"Data columns (total 13 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 price 547701 non-null int64 \n",
" 1 date 547701 non-null object \n",
" 2 time 547701 non-null object \n",
" 3 geo_lat 547701 non-null float32 \n",
" 4 geo_lon 547701 non-null float32 \n",
" 5 region 547701 non-null category\n",
" 6 building_type 547701 non-null category\n",
" 7 level 547701 non-null int8 \n",
" 8 levels 547701 non-null int8 \n",
" 9 rooms 547701 non-null int8 \n",
" 10 area 547701 non-null float16 \n",
" 11 kitchen_area 547701 non-null float16 \n",
" 12 object_type 547701 non-null category\n",
"dtypes: category(3), float16(2), float32(2), int64(1), int8(3), object(2)\n",
"memory usage: 26.1+ MB\n"
]
}
],
"source": [
"df = pd.read_pickle('data/clean_data.pkl').sample(frac=0.1, random_state = 2) # Уменьшаем размер чтобы модель быстрее обучалась на лекции\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = df.rename(columns={'price': 'target'})\n",
"df = df.drop(columns=['date', 'time'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target</th>\n",
" <th>geo_lat</th>\n",
" <th>geo_lon</th>\n",
" <th>region</th>\n",
" <th>building_type</th>\n",
" <th>level</th>\n",
" <th>levels</th>\n",
" <th>rooms</th>\n",
" <th>area</th>\n",
" <th>kitchen_area</th>\n",
" <th>object_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>313199</th>\n",
" <td>4999999</td>\n",
" <td>59.958458</td>\n",
" <td>30.215530</td>\n",
" <td>2661</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>36.00000</td>\n",
" <td>7.199219</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2437764</th>\n",
" <td>2150000</td>\n",
" <td>45.072674</td>\n",
" <td>41.936996</td>\n",
" <td>2900</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>52.00000</td>\n",
" <td>15.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4949072</th>\n",
" <td>8600000</td>\n",
" <td>59.939358</td>\n",
" <td>30.437069</td>\n",
" <td>2661</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>22</td>\n",
" <td>1</td>\n",
" <td>37.09375</td>\n",
" <td>9.796875</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4109465</th>\n",
" <td>5100000</td>\n",
" <td>59.740479</td>\n",
" <td>30.569540</td>\n",
" <td>2661</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>74.50000</td>\n",
" <td>9.500000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2187702</th>\n",
" <td>3470000</td>\n",
" <td>56.324062</td>\n",
" <td>44.005390</td>\n",
" <td>2871</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>26</td>\n",
" <td>2</td>\n",
" <td>54.00000</td>\n",
" <td>8.000000</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5188085</th>\n",
" <td>2300000</td>\n",
" <td>57.750603</td>\n",
" <td>40.866467</td>\n",
" <td>4189</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>38.00000</td>\n",
" <td>11.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4542014</th>\n",
" <td>6700000</td>\n",
" <td>55.911720</td>\n",
" <td>37.737419</td>\n",
" <td>81</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>66.37500</td>\n",
" <td>8.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3306731</th>\n",
" <td>3850000</td>\n",
" <td>51.704510</td>\n",
" <td>39.273037</td>\n",
" <td>2072</td>\n",
" <td>2</td>\n",
" <td>10</td>\n",
" <td>18</td>\n",
" <td>3</td>\n",
" <td>89.50000</td>\n",
" <td>14.203125</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>520293</th>\n",
" <td>1878885</td>\n",
" <td>54.943577</td>\n",
" <td>82.958862</td>\n",
" <td>9654</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>3</td>\n",
" <td>87.75000</td>\n",
" <td>12.921875</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690900</th>\n",
" <td>4097350</td>\n",
" <td>59.882702</td>\n",
" <td>30.451246</td>\n",
" <td>2661</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>23</td>\n",
" <td>1</td>\n",
" <td>36.09375</td>\n",
" <td>16.203125</td>\n",
" <td>11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>547701 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" target geo_lat geo_lon region building_type level levels \\\n",
"313199 4999999 59.958458 30.215530 2661 3 8 13 \n",
"2437764 2150000 45.072674 41.936996 2900 3 5 5 \n",
"4949072 8600000 59.939358 30.437069 2661 2 11 22 \n",
"4109465 5100000 59.740479 30.569540 2661 1 2 9 \n",
"2187702 3470000 56.324062 44.005390 2871 2 11 26 \n",
"... ... ... ... ... ... ... ... \n",
"5188085 2300000 57.750603 40.866467 4189 3 2 3 \n",
"4542014 6700000 55.911720 37.737419 81 3 2 5 \n",
"3306731 3850000 51.704510 39.273037 2072 2 10 18 \n",
"520293 1878885 54.943577 82.958862 9654 1 1 10 \n",
"690900 4097350 59.882702 30.451246 2661 2 6 23 \n",
"\n",
" rooms area kitchen_area object_type \n",
"313199 1 36.00000 7.199219 1 \n",
"2437764 1 52.00000 15.000000 1 \n",
"4949072 1 37.09375 9.796875 1 \n",
"4109465 3 74.50000 9.500000 1 \n",
"2187702 2 54.00000 8.000000 11 \n",
"... ... ... ... ... \n",
"5188085 1 38.00000 11.000000 1 \n",
"4542014 2 66.37500 8.000000 1 \n",
"3306731 3 89.50000 14.203125 1 \n",
"520293 3 87.75000 12.921875 11 \n",
"690900 1 36.09375 16.203125 11 \n",
"\n",
"[547701 rows x 11 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(df.drop('target', axis=1), df['target'], test_size=0.25, random_state=2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"geo_lat 56.327686\n",
"geo_lon 43.928062\n",
"region 2871.000000\n",
"building_type 1.000000\n",
"level 8.000000\n",
"levels 10.000000\n",
"rooms 2.000000\n",
"area 56.000000\n",
"kitchen_area 8.500000\n",
"object_type 1.000000\n",
"Name: 879487, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['region', 'building_type', 'object_type']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_features = X_train.select_dtypes(include=['category','object']).columns.to_list()\n",
"cat_features"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num_features = X_train.select_dtypes(include=['number']).columns.to_list()\n",
"num_features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://scikit-learn.org/stable/api/sklearn.preprocessing.html - разные способы кодирования и скалирования"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"s_scaler = StandardScaler()\n",
"l_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999) # unknown_value нужно выбирать с умом\n",
"regressor = CatBoostRegressor()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Column transformer"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Для удобной работы со столбцами\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', s_scaler, num_features), # преобразования для числовых признаков\n",
" ('cat', l_encoder, cat_features), # преобразования для категориальных признаков\n",
" ],\n",
" remainder='drop' ) # Удаляем столбцы, которые не затронуты преобразования"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Learning rate set to 0.105957\n",
"0:\tlearn: 22102085.4544239\ttotal: 67.8ms\tremaining: 1m 7s\n",
"1:\tlearn: 21994630.3403412\ttotal: 87.1ms\tremaining: 43.4s\n",
"2:\tlearn: 21906687.8196027\ttotal: 105ms\tremaining: 34.8s\n",
"3:\tlearn: 21834890.5050552\ttotal: 124ms\tremaining: 30.9s\n",
"4:\tlearn: 21770820.6751194\ttotal: 143ms\tremaining: 28.5s\n",
"5:\tlearn: 21719543.9330108\ttotal: 163ms\tremaining: 27s\n",
"6:\tlearn: 21676510.1666598\ttotal: 183ms\tremaining: 25.9s\n",
"7:\tlearn: 21641355.8079016\ttotal: 202ms\tremaining: 25.1s\n",
"8:\tlearn: 21612289.0494648\ttotal: 223ms\tremaining: 24.5s\n",
"9:\tlearn: 21583808.7061085\ttotal: 242ms\tremaining: 24s\n",
"10:\tlearn: 21559288.9618040\ttotal: 266ms\tremaining: 23.9s\n",
"11:\tlearn: 21537048.9920531\ttotal: 286ms\tremaining: 23.5s\n",
"12:\tlearn: 21444526.1629239\ttotal: 306ms\tremaining: 23.3s\n",
"13:\tlearn: 21426349.3370315\ttotal: 324ms\tremaining: 22.8s\n",
"14:\tlearn: 21411901.2338278\ttotal: 344ms\tremaining: 22.6s\n",
"15:\tlearn: 21399279.8023459\ttotal: 360ms\tremaining: 22.1s\n",
"16:\tlearn: 21299421.1434822\ttotal: 382ms\tremaining: 22.1s\n",
"17:\tlearn: 21288560.2595435\ttotal: 400ms\tremaining: 21.8s\n",
"18:\tlearn: 21277368.8876877\ttotal: 421ms\tremaining: 21.8s\n",
"19:\tlearn: 21229205.2938305\ttotal: 441ms\tremaining: 21.6s\n",
"20:\tlearn: 21220238.4828158\ttotal: 462ms\tremaining: 21.5s\n",
"21:\tlearn: 21212849.7885410\ttotal: 485ms\tremaining: 21.6s\n",
"22:\tlearn: 21205304.4132821\ttotal: 507ms\tremaining: 21.5s\n",
"23:\tlearn: 21198813.8508479\ttotal: 534ms\tremaining: 21.7s\n",
"24:\tlearn: 21184627.2326983\ttotal: 550ms\tremaining: 21.4s\n",
"25:\tlearn: 21172748.3410688\ttotal: 569ms\tremaining: 21.3s\n",
"26:\tlearn: 21103305.4766520\ttotal: 582ms\tremaining: 21s\n",
"27:\tlearn: 21096636.4037750\ttotal: 594ms\tremaining: 20.6s\n",
"28:\tlearn: 21082202.2892557\ttotal: 607ms\tremaining: 20.3s\n",
"29:\tlearn: 21077185.5274954\ttotal: 619ms\tremaining: 20s\n",
"30:\tlearn: 21071613.1691098\ttotal: 632ms\tremaining: 19.7s\n",
"31:\tlearn: 21067654.8502386\ttotal: 644ms\tremaining: 19.5s\n",
"32:\tlearn: 21053425.8947843\ttotal: 659ms\tremaining: 19.3s\n",
"33:\tlearn: 21038024.0563140\ttotal: 670ms\tremaining: 19s\n",
"34:\tlearn: 20961357.9814339\ttotal: 682ms\tremaining: 18.8s\n",
"35:\tlearn: 20946027.4479676\ttotal: 695ms\tremaining: 18.6s\n",
"36:\tlearn: 20866676.4104322\ttotal: 707ms\tremaining: 18.4s\n",
"37:\tlearn: 20863078.3182449\ttotal: 717ms\tremaining: 18.1s\n",
"38:\tlearn: 20859910.3609500\ttotal: 728ms\tremaining: 17.9s\n",
"39:\tlearn: 20853462.2703730\ttotal: 739ms\tremaining: 17.7s\n",
"40:\tlearn: 20851610.3209036\ttotal: 749ms\tremaining: 17.5s\n",
"41:\tlearn: 20847674.0809285\ttotal: 760ms\tremaining: 17.3s\n",
"42:\tlearn: 20845384.9263391\ttotal: 772ms\tremaining: 17.2s\n",
"43:\tlearn: 20843256.7428906\ttotal: 784ms\tremaining: 17s\n",
"44:\tlearn: 20841580.8594834\ttotal: 797ms\tremaining: 16.9s\n",
"45:\tlearn: 20819301.2718345\ttotal: 810ms\tremaining: 16.8s\n",
"46:\tlearn: 20812094.5913582\ttotal: 823ms\tremaining: 16.7s\n",
"47:\tlearn: 20808932.0866915\ttotal: 834ms\tremaining: 16.5s\n",
"48:\tlearn: 20763172.9200413\ttotal: 851ms\tremaining: 16.5s\n",
"49:\tlearn: 20729084.6574594\ttotal: 863ms\tremaining: 16.4s\n",
"50:\tlearn: 20721820.5403996\ttotal: 876ms\tremaining: 16.3s\n",
"51:\tlearn: 20715664.3732084\ttotal: 887ms\tremaining: 16.2s\n",
"52:\tlearn: 20712658.7025295\ttotal: 897ms\tremaining: 16s\n",
"53:\tlearn: 20704254.1704930\ttotal: 908ms\tremaining: 15.9s\n",
"54:\tlearn: 20690967.9220470\ttotal: 919ms\tremaining: 15.8s\n",
"55:\tlearn: 20686546.8978473\ttotal: 929ms\tremaining: 15.7s\n",
"56:\tlearn: 20682362.4255777\ttotal: 941ms\tremaining: 15.6s\n",
"57:\tlearn: 20680744.8113421\ttotal: 951ms\tremaining: 15.4s\n",
"58:\tlearn: 20677926.0871267\ttotal: 962ms\tremaining: 15.3s\n",
"59:\tlearn: 20658478.3098789\ttotal: 974ms\tremaining: 15.3s\n",
"60:\tlearn: 20641964.4472246\ttotal: 987ms\tremaining: 15.2s\n",
"61:\tlearn: 20639551.4216654\ttotal: 999ms\tremaining: 15.1s\n",
"62:\tlearn: 20638344.8919341\ttotal: 1.01s\tremaining: 15s\n",
"63:\tlearn: 20635991.3894815\ttotal: 1.02s\tremaining: 15s\n",
"64:\tlearn: 20595846.8116432\ttotal: 1.03s\tremaining: 14.9s\n",
"65:\tlearn: 20592198.9483046\ttotal: 1.05s\tremaining: 14.8s\n",
"66:\tlearn: 20565316.0060422\ttotal: 1.06s\tremaining: 14.8s\n",
"67:\tlearn: 20563073.6783517\ttotal: 1.07s\tremaining: 14.7s\n",
"68:\tlearn: 20553650.4649650\ttotal: 1.08s\tremaining: 14.6s\n",
"69:\tlearn: 20545510.8230653\ttotal: 1.09s\tremaining: 14.5s\n",
"70:\tlearn: 20544114.9272186\ttotal: 1.1s\tremaining: 14.5s\n",
"71:\tlearn: 20541689.8802451\ttotal: 1.11s\tremaining: 14.4s\n",
"72:\tlearn: 20538792.7074671\ttotal: 1.13s\tremaining: 14.3s\n",
"73:\tlearn: 20517134.0713648\ttotal: 1.14s\tremaining: 14.3s\n",
"74:\tlearn: 20510477.9089445\ttotal: 1.15s\tremaining: 14.2s\n",
"75:\tlearn: 20494649.9067257\ttotal: 1.17s\tremaining: 14.2s\n",
"76:\tlearn: 20490851.9879851\ttotal: 1.18s\tremaining: 14.1s\n",
"77:\tlearn: 20488939.9621874\ttotal: 1.19s\tremaining: 14.1s\n",
"78:\tlearn: 20432532.8171644\ttotal: 1.2s\tremaining: 14s\n",
"79:\tlearn: 20428397.7107150\ttotal: 1.22s\tremaining: 14s\n",
"80:\tlearn: 20421638.7734419\ttotal: 1.23s\tremaining: 13.9s\n",
"81:\tlearn: 20421021.7388457\ttotal: 1.24s\tremaining: 13.9s\n",
"82:\tlearn: 20406404.2376730\ttotal: 1.26s\tremaining: 13.9s\n",
"83:\tlearn: 20021682.5008511\ttotal: 1.28s\tremaining: 13.9s\n",
"84:\tlearn: 20018322.6048631\ttotal: 1.3s\tremaining: 14s\n",
"85:\tlearn: 20004841.3476490\ttotal: 1.31s\tremaining: 14s\n",
"86:\tlearn: 19985666.0092745\ttotal: 1.33s\tremaining: 13.9s\n",
"87:\tlearn: 19983778.1947243\ttotal: 1.34s\tremaining: 13.9s\n",
"88:\tlearn: 19982460.1107908\ttotal: 1.36s\tremaining: 13.9s\n",
"89:\tlearn: 19979128.5494690\ttotal: 1.37s\tremaining: 13.8s\n",
"90:\tlearn: 19974094.9707357\ttotal: 1.38s\tremaining: 13.8s\n",
"91:\tlearn: 19972006.9431031\ttotal: 1.4s\tremaining: 13.8s\n",
"92:\tlearn: 19970846.2845466\ttotal: 1.41s\tremaining: 13.7s\n",
"93:\tlearn: 19968858.0073042\ttotal: 1.42s\tremaining: 13.7s\n",
"94:\tlearn: 19921720.6252972\ttotal: 1.44s\tremaining: 13.7s\n",
"95:\tlearn: 19916568.5707839\ttotal: 1.45s\tremaining: 13.6s\n",
"96:\tlearn: 19913228.5247508\ttotal: 1.46s\tremaining: 13.6s\n",
"97:\tlearn: 19901982.4625895\ttotal: 1.48s\tremaining: 13.6s\n",
"98:\tlearn: 19836107.7247888\ttotal: 1.49s\tremaining: 13.6s\n",
"99:\tlearn: 19834724.7455166\ttotal: 1.5s\tremaining: 13.5s\n",
"100:\tlearn: 19832811.9745741\ttotal: 1.52s\tremaining: 13.5s\n",
"101:\tlearn: 19818491.2851567\ttotal: 1.53s\tremaining: 13.5s\n",
"102:\tlearn: 19815779.3719026\ttotal: 1.55s\tremaining: 13.5s\n",
"103:\tlearn: 19814215.0962787\ttotal: 1.56s\tremaining: 13.5s\n",
"104:\tlearn: 19782274.6892663\ttotal: 1.57s\tremaining: 13.4s\n",
"105:\tlearn: 19777945.6507456\ttotal: 1.59s\tremaining: 13.4s\n",
"106:\tlearn: 19770488.9772154\ttotal: 1.6s\tremaining: 13.4s\n",
"107:\tlearn: 19769758.0023174\ttotal: 1.61s\tremaining: 13.3s\n",
"108:\tlearn: 19767541.9303017\ttotal: 1.63s\tremaining: 13.3s\n",
"109:\tlearn: 19766992.0126300\ttotal: 1.64s\tremaining: 13.2s\n",
"110:\tlearn: 19765032.8837298\ttotal: 1.65s\tremaining: 13.2s\n",
"111:\tlearn: 19705204.6771073\ttotal: 1.66s\tremaining: 13.2s\n",
"112:\tlearn: 19703649.0394020\ttotal: 1.68s\tremaining: 13.2s\n",
"113:\tlearn: 19693038.0415419\ttotal: 1.69s\tremaining: 13.1s\n",
"114:\tlearn: 19690294.4304072\ttotal: 1.7s\tremaining: 13.1s\n",
"115:\tlearn: 19686529.4709294\ttotal: 1.71s\tremaining: 13.1s\n",
"116:\tlearn: 19684887.8267152\ttotal: 1.72s\tremaining: 13s\n",
"117:\tlearn: 19369465.6970761\ttotal: 1.74s\tremaining: 13s\n",
"118:\tlearn: 19368868.0416380\ttotal: 1.75s\tremaining: 13s\n",
"119:\tlearn: 19334590.5868513\ttotal: 1.77s\tremaining: 13s\n",
"120:\tlearn: 19332200.0832597\ttotal: 1.78s\tremaining: 12.9s\n",
"121:\tlearn: 19320130.9244745\ttotal: 1.79s\tremaining: 12.9s\n",
"122:\tlearn: 19318220.9448337\ttotal: 1.81s\tremaining: 12.9s\n",
"123:\tlearn: 18941546.2095714\ttotal: 1.82s\tremaining: 12.9s\n",
"124:\tlearn: 18941056.2836883\ttotal: 1.84s\tremaining: 12.9s\n",
"125:\tlearn: 18939637.9662976\ttotal: 1.85s\tremaining: 12.8s\n",
"126:\tlearn: 18938172.4621610\ttotal: 1.86s\tremaining: 12.8s\n",
"127:\tlearn: 18935889.3619752\ttotal: 1.88s\tremaining: 12.8s\n",
"128:\tlearn: 18928784.7025346\ttotal: 1.9s\tremaining: 12.8s\n",
"129:\tlearn: 18926981.6933453\ttotal: 1.91s\tremaining: 12.8s\n",
"130:\tlearn: 18830178.3173696\ttotal: 1.93s\tremaining: 12.8s\n",
"131:\tlearn: 18828102.3918672\ttotal: 1.94s\tremaining: 12.8s\n",
"132:\tlearn: 18825755.9987015\ttotal: 1.95s\tremaining: 12.7s\n",
"133:\tlearn: 18793049.5462155\ttotal: 1.97s\tremaining: 12.7s\n",
"134:\tlearn: 18791452.8400128\ttotal: 1.98s\tremaining: 12.7s\n",
"135:\tlearn: 18484591.4924421\ttotal: 1.99s\tremaining: 12.7s\n",
"136:\tlearn: 18482373.1605741\ttotal: 2s\tremaining: 12.6s\n",
"137:\tlearn: 18414571.2543321\ttotal: 2.02s\tremaining: 12.6s\n",
"138:\tlearn: 18412913.4160574\ttotal: 2.03s\tremaining: 12.6s\n",
"139:\tlearn: 18409214.1141794\ttotal: 2.04s\tremaining: 12.6s\n",
"140:\tlearn: 18395140.1008086\ttotal: 2.06s\tremaining: 12.5s\n",
"141:\tlearn: 18390939.2248151\ttotal: 2.07s\tremaining: 12.5s\n",
"142:\tlearn: 18377925.8298573\ttotal: 2.08s\tremaining: 12.5s\n",
"143:\tlearn: 18371775.1291009\ttotal: 2.09s\tremaining: 12.4s\n",
"144:\tlearn: 18370251.1042623\ttotal: 2.1s\tremaining: 12.4s\n",
"145:\tlearn: 18332707.1499911\ttotal: 2.12s\tremaining: 12.4s\n",
"146:\tlearn: 18330693.2665230\ttotal: 2.13s\tremaining: 12.3s\n",
"147:\tlearn: 18329408.2952767\ttotal: 2.14s\tremaining: 12.3s\n",
"148:\tlearn: 18321783.9892793\ttotal: 2.15s\tremaining: 12.3s\n",
"149:\tlearn: 18321270.4958267\ttotal: 2.16s\tremaining: 12.2s\n",
"150:\tlearn: 18310325.1681801\ttotal: 2.17s\tremaining: 12.2s\n",
"151:\tlearn: 18299986.9413893\ttotal: 2.18s\tremaining: 12.2s\n",
"152:\tlearn: 18290217.7479708\ttotal: 2.2s\tremaining: 12.2s\n",
"153:\tlearn: 18280975.8537910\ttotal: 2.21s\tremaining: 12.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"154:\tlearn: 18272215.6509019\ttotal: 2.22s\tremaining: 12.1s\n",
"155:\tlearn: 18263878.2178516\ttotal: 2.23s\tremaining: 12.1s\n",
"156:\tlearn: 18256009.4859248\ttotal: 2.25s\tremaining: 12.1s\n",
"157:\tlearn: 18248529.7799856\ttotal: 2.26s\tremaining: 12.1s\n",
"158:\tlearn: 18241388.0845094\ttotal: 2.28s\tremaining: 12s\n",
"159:\tlearn: 18234700.5127085\ttotal: 2.29s\tremaining: 12s\n",
"160:\tlearn: 18228095.5839778\ttotal: 2.3s\tremaining: 12s\n",
"161:\tlearn: 18222087.5153066\ttotal: 2.31s\tremaining: 12s\n",
"162:\tlearn: 18215963.2971261\ttotal: 2.33s\tremaining: 12s\n",
"163:\tlearn: 18210272.5545163\ttotal: 2.34s\tremaining: 11.9s\n",
"164:\tlearn: 18208920.7703569\ttotal: 2.35s\tremaining: 11.9s\n",
"165:\tlearn: 18204704.7145239\ttotal: 2.37s\tremaining: 11.9s\n",
"166:\tlearn: 18187135.8260335\ttotal: 2.38s\tremaining: 11.9s\n",
"167:\tlearn: 18183064.7135734\ttotal: 2.39s\tremaining: 11.9s\n",
"168:\tlearn: 18177887.1670860\ttotal: 2.41s\tremaining: 11.8s\n",
"169:\tlearn: 18173022.2110313\ttotal: 2.42s\tremaining: 11.8s\n",
"170:\tlearn: 18168573.4167384\ttotal: 2.44s\tremaining: 11.8s\n",
"171:\tlearn: 18165036.1971623\ttotal: 2.45s\tremaining: 11.8s\n",
"172:\tlearn: 18161841.9822954\ttotal: 2.46s\tremaining: 11.8s\n",
"173:\tlearn: 18129860.2061383\ttotal: 2.48s\tremaining: 11.8s\n",
"174:\tlearn: 18127931.5161091\ttotal: 2.49s\tremaining: 11.7s\n",
"175:\tlearn: 18124997.7778403\ttotal: 2.5s\tremaining: 11.7s\n",
"176:\tlearn: 18122975.2084322\ttotal: 2.51s\tremaining: 11.7s\n",
"177:\tlearn: 18120855.5325733\ttotal: 2.53s\tremaining: 11.7s\n",
"178:\tlearn: 18117907.6019994\ttotal: 2.54s\tremaining: 11.7s\n",
"179:\tlearn: 18116674.0864027\ttotal: 2.56s\tremaining: 11.7s\n",
"180:\tlearn: 18114086.9287957\ttotal: 2.57s\tremaining: 11.6s\n",
"181:\tlearn: 18087100.0827926\ttotal: 2.59s\tremaining: 11.6s\n",
"182:\tlearn: 18071944.2213105\ttotal: 2.6s\tremaining: 11.6s\n",
"183:\tlearn: 17952691.4261792\ttotal: 2.61s\tremaining: 11.6s\n",
"184:\tlearn: 17950298.6715866\ttotal: 2.63s\tremaining: 11.6s\n",
"185:\tlearn: 17949031.8169417\ttotal: 2.64s\tremaining: 11.6s\n",
"186:\tlearn: 17937943.5186847\ttotal: 2.66s\tremaining: 11.5s\n",
"187:\tlearn: 17937014.8027177\ttotal: 2.67s\tremaining: 11.5s\n",
"188:\tlearn: 17936493.5945773\ttotal: 2.68s\tremaining: 11.5s\n",
"189:\tlearn: 17935386.0093649\ttotal: 2.69s\tremaining: 11.5s\n",
"190:\tlearn: 17934203.8644718\ttotal: 2.7s\tremaining: 11.5s\n",
"191:\tlearn: 17928336.5184065\ttotal: 2.72s\tremaining: 11.4s\n",
"192:\tlearn: 17925443.1940046\ttotal: 2.73s\tremaining: 11.4s\n",
"193:\tlearn: 17924535.5533845\ttotal: 2.75s\tremaining: 11.4s\n",
"194:\tlearn: 17917225.8802206\ttotal: 2.76s\tremaining: 11.4s\n",
"195:\tlearn: 17904437.4148190\ttotal: 2.77s\tremaining: 11.4s\n",
"196:\tlearn: 17902915.3467923\ttotal: 2.79s\tremaining: 11.4s\n",
"197:\tlearn: 17900924.7512305\ttotal: 2.8s\tremaining: 11.3s\n",
"198:\tlearn: 17899976.2262471\ttotal: 2.81s\tremaining: 11.3s\n",
"199:\tlearn: 17896573.5977064\ttotal: 2.83s\tremaining: 11.3s\n",
"200:\tlearn: 17894480.1301072\ttotal: 2.84s\tremaining: 11.3s\n",
"201:\tlearn: 17891369.5414483\ttotal: 2.85s\tremaining: 11.3s\n",
"202:\tlearn: 17853776.3679239\ttotal: 2.86s\tremaining: 11.2s\n",
"203:\tlearn: 17851457.0828592\ttotal: 2.88s\tremaining: 11.2s\n",
"204:\tlearn: 17849621.6767992\ttotal: 2.89s\tremaining: 11.2s\n",
"205:\tlearn: 17848392.5509482\ttotal: 2.9s\tremaining: 11.2s\n",
"206:\tlearn: 17845597.2428619\ttotal: 2.91s\tremaining: 11.1s\n",
"207:\tlearn: 17841951.2763157\ttotal: 2.92s\tremaining: 11.1s\n",
"208:\tlearn: 17829332.8912371\ttotal: 2.93s\tremaining: 11.1s\n",
"209:\tlearn: 17825984.1152963\ttotal: 2.95s\tremaining: 11.1s\n",
"210:\tlearn: 17821360.2498463\ttotal: 2.96s\tremaining: 11.1s\n",
"211:\tlearn: 17816041.9633158\ttotal: 2.97s\tremaining: 11s\n",
"212:\tlearn: 17815089.0154101\ttotal: 2.98s\tremaining: 11s\n",
"213:\tlearn: 17812260.4222221\ttotal: 2.99s\tremaining: 11s\n",
"214:\tlearn: 17811642.1796060\ttotal: 3s\tremaining: 11s\n",
"215:\tlearn: 17811104.8656724\ttotal: 3.01s\tremaining: 10.9s\n",
"216:\tlearn: 17810456.2984828\ttotal: 3.02s\tremaining: 10.9s\n",
"217:\tlearn: 17809982.4909707\ttotal: 3.04s\tremaining: 10.9s\n",
"218:\tlearn: 17809543.7803178\ttotal: 3.05s\tremaining: 10.9s\n",
"219:\tlearn: 17809136.8325569\ttotal: 3.06s\tremaining: 10.9s\n",
"220:\tlearn: 17808758.7315278\ttotal: 3.07s\tremaining: 10.8s\n",
"221:\tlearn: 17808406.9145618\ttotal: 3.09s\tremaining: 10.8s\n",
"222:\tlearn: 17806754.0179687\ttotal: 3.1s\tremaining: 10.8s\n",
"223:\tlearn: 17806262.4885592\ttotal: 3.11s\tremaining: 10.8s\n",
"224:\tlearn: 17805319.3776209\ttotal: 3.13s\tremaining: 10.8s\n",
"225:\tlearn: 17805011.6013482\ttotal: 3.14s\tremaining: 10.7s\n",
"226:\tlearn: 17804724.0362310\ttotal: 3.15s\tremaining: 10.7s\n",
"227:\tlearn: 17793961.7547867\ttotal: 3.16s\tremaining: 10.7s\n",
"228:\tlearn: 17793044.3976904\ttotal: 3.18s\tremaining: 10.7s\n",
"229:\tlearn: 17791876.3449986\ttotal: 3.19s\tremaining: 10.7s\n",
"230:\tlearn: 17770039.2877531\ttotal: 3.21s\tremaining: 10.7s\n",
"231:\tlearn: 17769759.3423197\ttotal: 3.22s\tremaining: 10.7s\n",
"232:\tlearn: 17769498.1846872\ttotal: 3.23s\tremaining: 10.6s\n",
"233:\tlearn: 17769106.6516586\ttotal: 3.24s\tremaining: 10.6s\n",
"234:\tlearn: 17765866.7512613\ttotal: 3.25s\tremaining: 10.6s\n",
"235:\tlearn: 17763818.0836765\ttotal: 3.27s\tremaining: 10.6s\n",
"236:\tlearn: 17761637.5687877\ttotal: 3.29s\tremaining: 10.6s\n",
"237:\tlearn: 17755293.6166299\ttotal: 3.3s\tremaining: 10.6s\n",
"238:\tlearn: 17749597.6285121\ttotal: 3.32s\tremaining: 10.6s\n",
"239:\tlearn: 17731193.4780969\ttotal: 3.33s\tremaining: 10.6s\n",
"240:\tlearn: 17730941.1840209\ttotal: 3.35s\tremaining: 10.5s\n",
"241:\tlearn: 17730651.4109866\ttotal: 3.36s\tremaining: 10.5s\n",
"242:\tlearn: 17729951.1772204\ttotal: 3.38s\tremaining: 10.5s\n",
"243:\tlearn: 17725674.6169533\ttotal: 3.39s\tremaining: 10.5s\n",
"244:\tlearn: 17724397.3837970\ttotal: 3.41s\tremaining: 10.5s\n",
"245:\tlearn: 17723085.9667878\ttotal: 3.42s\tremaining: 10.5s\n",
"246:\tlearn: 17716068.0643361\ttotal: 3.44s\tremaining: 10.5s\n",
"247:\tlearn: 17685621.7941613\ttotal: 3.45s\tremaining: 10.5s\n",
"248:\tlearn: 17684272.6716694\ttotal: 3.46s\tremaining: 10.4s\n",
"249:\tlearn: 17683390.0888279\ttotal: 3.48s\tremaining: 10.4s\n",
"250:\tlearn: 17683052.4845925\ttotal: 3.49s\tremaining: 10.4s\n",
"251:\tlearn: 17678624.0868252\ttotal: 3.51s\tremaining: 10.4s\n",
"252:\tlearn: 17665657.9640584\ttotal: 3.52s\tremaining: 10.4s\n",
"253:\tlearn: 17664624.5487132\ttotal: 3.54s\tremaining: 10.4s\n",
"254:\tlearn: 17663925.0646167\ttotal: 3.55s\tremaining: 10.4s\n",
"255:\tlearn: 17653813.6196925\ttotal: 3.56s\tremaining: 10.4s\n",
"256:\tlearn: 17636698.5157040\ttotal: 3.58s\tremaining: 10.3s\n",
"257:\tlearn: 17634671.9750893\ttotal: 3.59s\tremaining: 10.3s\n",
"258:\tlearn: 17633930.6422340\ttotal: 3.61s\tremaining: 10.3s\n",
"259:\tlearn: 17633026.0861171\ttotal: 3.62s\tremaining: 10.3s\n",
"260:\tlearn: 17632489.1254856\ttotal: 3.63s\tremaining: 10.3s\n",
"261:\tlearn: 17628474.9187765\ttotal: 3.65s\tremaining: 10.3s\n",
"262:\tlearn: 17627320.9817928\ttotal: 3.66s\tremaining: 10.3s\n",
"263:\tlearn: 17626116.4772868\ttotal: 3.67s\tremaining: 10.2s\n",
"264:\tlearn: 17623329.0754817\ttotal: 3.69s\tremaining: 10.2s\n",
"265:\tlearn: 17622243.1901613\ttotal: 3.7s\tremaining: 10.2s\n",
"266:\tlearn: 17550321.8250878\ttotal: 3.71s\tremaining: 10.2s\n",
"267:\tlearn: 17549755.3651767\ttotal: 3.73s\tremaining: 10.2s\n",
"268:\tlearn: 17545607.1212430\ttotal: 3.74s\tremaining: 10.2s\n",
"269:\tlearn: 17541242.2629221\ttotal: 3.75s\tremaining: 10.1s\n",
"270:\tlearn: 17499407.7313592\ttotal: 3.76s\tremaining: 10.1s\n",
"271:\tlearn: 17499145.8282321\ttotal: 3.77s\tremaining: 10.1s\n",
"272:\tlearn: 17498934.5535116\ttotal: 3.78s\tremaining: 10.1s\n",
"273:\tlearn: 17498347.2546318\ttotal: 3.79s\tremaining: 10.1s\n",
"274:\tlearn: 17498149.7061684\ttotal: 3.8s\tremaining: 10s\n",
"275:\tlearn: 17497860.3337909\ttotal: 3.81s\tremaining: 10s\n",
"276:\tlearn: 17497134.2565818\ttotal: 3.83s\tremaining: 9.98s\n",
"277:\tlearn: 17496943.1446578\ttotal: 3.84s\tremaining: 9.96s\n",
"278:\tlearn: 17495461.7397646\ttotal: 3.85s\tremaining: 9.95s\n",
"279:\tlearn: 17492860.8467310\ttotal: 3.86s\tremaining: 9.94s\n",
"280:\tlearn: 17492256.7750564\ttotal: 3.88s\tremaining: 9.92s\n",
"281:\tlearn: 17491315.8920024\ttotal: 3.89s\tremaining: 9.91s\n",
"282:\tlearn: 17488802.8492737\ttotal: 3.9s\tremaining: 9.89s\n",
"283:\tlearn: 17479802.6541152\ttotal: 3.92s\tremaining: 9.87s\n",
"284:\tlearn: 17477169.5331720\ttotal: 3.93s\tremaining: 9.86s\n",
"285:\tlearn: 17474743.6190942\ttotal: 3.94s\tremaining: 9.84s\n",
"286:\tlearn: 17468342.7955232\ttotal: 3.96s\tremaining: 9.83s\n",
"287:\tlearn: 17467579.9985437\ttotal: 3.97s\tremaining: 9.82s\n",
"288:\tlearn: 17467009.9684055\ttotal: 3.98s\tremaining: 9.8s\n",
"289:\tlearn: 17464125.0260113\ttotal: 4s\tremaining: 9.79s\n",
"290:\tlearn: 17463508.0564477\ttotal: 4.01s\tremaining: 9.77s\n",
"291:\tlearn: 17453183.2620432\ttotal: 4.02s\tremaining: 9.75s\n",
"292:\tlearn: 17452971.0671546\ttotal: 4.03s\tremaining: 9.73s\n",
"293:\tlearn: 17452198.5884342\ttotal: 4.05s\tremaining: 9.72s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"294:\tlearn: 17450925.6159031\ttotal: 4.06s\tremaining: 9.7s\n",
"295:\tlearn: 17450685.1155343\ttotal: 4.07s\tremaining: 9.68s\n",
"296:\tlearn: 17447975.7379237\ttotal: 4.08s\tremaining: 9.66s\n",
"297:\tlearn: 17446417.7251561\ttotal: 4.1s\tremaining: 9.65s\n",
"298:\tlearn: 17446166.7629704\ttotal: 4.11s\tremaining: 9.63s\n",
"299:\tlearn: 17445963.1442260\ttotal: 4.12s\tremaining: 9.61s\n",
"300:\tlearn: 17445745.7958927\ttotal: 4.13s\tremaining: 9.58s\n",
"301:\tlearn: 17444963.9290154\ttotal: 4.14s\tremaining: 9.57s\n",
"302:\tlearn: 17432650.1591210\ttotal: 4.15s\tremaining: 9.55s\n",
"303:\tlearn: 17430525.1210288\ttotal: 4.17s\tremaining: 9.54s\n",
"304:\tlearn: 17418414.4601453\ttotal: 4.18s\tremaining: 9.52s\n",
"305:\tlearn: 17417977.4735651\ttotal: 4.19s\tremaining: 9.5s\n",
"306:\tlearn: 17335624.2943914\ttotal: 4.2s\tremaining: 9.49s\n",
"307:\tlearn: 17323558.9233681\ttotal: 4.21s\tremaining: 9.47s\n",
"308:\tlearn: 17323047.3527617\ttotal: 4.22s\tremaining: 9.45s\n",
"309:\tlearn: 17322403.3488620\ttotal: 4.24s\tremaining: 9.43s\n",
"310:\tlearn: 17322187.6973801\ttotal: 4.25s\tremaining: 9.41s\n",
"311:\tlearn: 17320898.8497406\ttotal: 4.26s\tremaining: 9.4s\n",
"312:\tlearn: 17312668.7000429\ttotal: 4.27s\tremaining: 9.38s\n",
"313:\tlearn: 17299277.5985403\ttotal: 4.29s\tremaining: 9.36s\n",
"314:\tlearn: 17298175.9786240\ttotal: 4.3s\tremaining: 9.35s\n",
"315:\tlearn: 17296005.0430765\ttotal: 4.31s\tremaining: 9.33s\n",
"316:\tlearn: 17295834.3986842\ttotal: 4.33s\tremaining: 9.32s\n",
"317:\tlearn: 17295646.8271436\ttotal: 4.33s\tremaining: 9.3s\n",
"318:\tlearn: 17295412.2240763\ttotal: 4.35s\tremaining: 9.28s\n",
"319:\tlearn: 17295269.3891063\ttotal: 4.36s\tremaining: 9.26s\n",
"320:\tlearn: 17294720.1427139\ttotal: 4.37s\tremaining: 9.24s\n",
"321:\tlearn: 17280405.8179874\ttotal: 4.38s\tremaining: 9.22s\n",
"322:\tlearn: 17279788.6705542\ttotal: 4.39s\tremaining: 9.2s\n",
"323:\tlearn: 17259578.2219214\ttotal: 4.4s\tremaining: 9.19s\n",
"324:\tlearn: 17258995.8851109\ttotal: 4.41s\tremaining: 9.16s\n",
"325:\tlearn: 17256802.0040208\ttotal: 4.42s\tremaining: 9.15s\n",
"326:\tlearn: 17245667.9352932\ttotal: 4.44s\tremaining: 9.13s\n",
"327:\tlearn: 17245157.2383849\ttotal: 4.45s\tremaining: 9.12s\n",
"328:\tlearn: 17244420.0505767\ttotal: 4.46s\tremaining: 9.11s\n",
"329:\tlearn: 17240620.9311856\ttotal: 4.48s\tremaining: 9.09s\n",
"330:\tlearn: 17240126.6382259\ttotal: 4.49s\tremaining: 9.07s\n",
"331:\tlearn: 17239554.3263042\ttotal: 4.5s\tremaining: 9.06s\n",
"332:\tlearn: 17239249.4122676\ttotal: 4.52s\tremaining: 9.05s\n",
"333:\tlearn: 17237315.5959603\ttotal: 4.53s\tremaining: 9.04s\n",
"334:\tlearn: 17237170.4183008\ttotal: 4.54s\tremaining: 9.02s\n",
"335:\tlearn: 17235498.1709182\ttotal: 4.55s\tremaining: 9s\n",
"336:\tlearn: 17154286.9322136\ttotal: 4.57s\tremaining: 8.98s\n",
"337:\tlearn: 17152860.5403583\ttotal: 4.58s\tremaining: 8.96s\n",
"338:\tlearn: 17139897.5803445\ttotal: 4.59s\tremaining: 8.95s\n",
"339:\tlearn: 17139685.6194353\ttotal: 4.6s\tremaining: 8.93s\n",
"340:\tlearn: 17129406.8909698\ttotal: 4.61s\tremaining: 8.92s\n",
"341:\tlearn: 17126386.5318429\ttotal: 4.63s\tremaining: 8.9s\n",
"342:\tlearn: 17125338.5826429\ttotal: 4.64s\tremaining: 8.89s\n",
"343:\tlearn: 17124937.1764028\ttotal: 4.65s\tremaining: 8.87s\n",
"344:\tlearn: 17124773.5128614\ttotal: 4.66s\tremaining: 8.85s\n",
"345:\tlearn: 17123822.0085471\ttotal: 4.67s\tremaining: 8.84s\n",
"346:\tlearn: 17122604.8415169\ttotal: 4.68s\tremaining: 8.82s\n",
"347:\tlearn: 17121767.5370013\ttotal: 4.7s\tremaining: 8.8s\n",
"348:\tlearn: 17109471.1428348\ttotal: 4.71s\tremaining: 8.79s\n",
"349:\tlearn: 17092688.7777393\ttotal: 4.73s\tremaining: 8.78s\n",
"350:\tlearn: 17081854.5539987\ttotal: 4.74s\tremaining: 8.76s\n",
"351:\tlearn: 17081117.2220910\ttotal: 4.75s\tremaining: 8.75s\n",
"352:\tlearn: 17079431.1991192\ttotal: 4.76s\tremaining: 8.73s\n",
"353:\tlearn: 17065749.4676464\ttotal: 4.78s\tremaining: 8.72s\n",
"354:\tlearn: 17050839.2238400\ttotal: 4.79s\tremaining: 8.7s\n",
"355:\tlearn: 17050106.8831270\ttotal: 4.8s\tremaining: 8.69s\n",
"356:\tlearn: 17046033.2332065\ttotal: 4.82s\tremaining: 8.67s\n",
"357:\tlearn: 17043704.2415802\ttotal: 4.83s\tremaining: 8.66s\n",
"358:\tlearn: 17034226.2631681\ttotal: 4.84s\tremaining: 8.64s\n",
"359:\tlearn: 17019515.6806659\ttotal: 4.85s\tremaining: 8.63s\n",
"360:\tlearn: 17018472.9763746\ttotal: 4.87s\tremaining: 8.61s\n",
"361:\tlearn: 17017909.7121151\ttotal: 4.88s\tremaining: 8.6s\n",
"362:\tlearn: 17017463.3942640\ttotal: 4.89s\tremaining: 8.58s\n",
"363:\tlearn: 17016467.4317116\ttotal: 4.9s\tremaining: 8.56s\n",
"364:\tlearn: 17016320.3746025\ttotal: 4.91s\tremaining: 8.55s\n",
"365:\tlearn: 17014043.0108512\ttotal: 4.93s\tremaining: 8.53s\n",
"366:\tlearn: 17013536.3710672\ttotal: 4.94s\tremaining: 8.51s\n",
"367:\tlearn: 17011993.2014165\ttotal: 4.95s\tremaining: 8.5s\n",
"368:\tlearn: 17011849.5641841\ttotal: 4.96s\tremaining: 8.48s\n",
"369:\tlearn: 17011403.7126883\ttotal: 4.97s\tremaining: 8.46s\n",
"370:\tlearn: 17009763.5741945\ttotal: 4.98s\tremaining: 8.45s\n",
"371:\tlearn: 17009382.7519630\ttotal: 4.99s\tremaining: 8.43s\n",
"372:\tlearn: 17008464.7915054\ttotal: 5s\tremaining: 8.41s\n",
"373:\tlearn: 17008143.8161261\ttotal: 5.01s\tremaining: 8.39s\n",
"374:\tlearn: 16996814.2215431\ttotal: 5.03s\tremaining: 8.38s\n",
"375:\tlearn: 16996377.3351825\ttotal: 5.04s\tremaining: 8.36s\n",
"376:\tlearn: 16996037.5806770\ttotal: 5.05s\tremaining: 8.34s\n",
"377:\tlearn: 16991953.6478199\ttotal: 5.06s\tremaining: 8.33s\n",
"378:\tlearn: 16961328.6727692\ttotal: 5.08s\tremaining: 8.32s\n",
"379:\tlearn: 16957664.4831621\ttotal: 5.09s\tremaining: 8.3s\n",
"380:\tlearn: 16956856.4526881\ttotal: 5.1s\tremaining: 8.29s\n",
"381:\tlearn: 16947754.5891887\ttotal: 5.12s\tremaining: 8.28s\n",
"382:\tlearn: 16937471.3061729\ttotal: 5.13s\tremaining: 8.26s\n",
"383:\tlearn: 16910717.2697228\ttotal: 5.14s\tremaining: 8.25s\n",
"384:\tlearn: 16883021.8749316\ttotal: 5.15s\tremaining: 8.23s\n",
"385:\tlearn: 16874077.6620256\ttotal: 5.17s\tremaining: 8.22s\n",
"386:\tlearn: 16859663.0508862\ttotal: 5.18s\tremaining: 8.2s\n",
"387:\tlearn: 16843794.6984628\ttotal: 5.19s\tremaining: 8.19s\n",
"388:\tlearn: 16843670.2191430\ttotal: 5.2s\tremaining: 8.17s\n",
"389:\tlearn: 16833049.2556840\ttotal: 5.21s\tremaining: 8.15s\n",
"390:\tlearn: 16821522.4443567\ttotal: 5.23s\tremaining: 8.14s\n",
"391:\tlearn: 16818181.1766856\ttotal: 5.24s\tremaining: 8.13s\n",
"392:\tlearn: 16817749.5049150\ttotal: 5.25s\tremaining: 8.12s\n",
"393:\tlearn: 16817402.3614282\ttotal: 5.27s\tremaining: 8.1s\n",
"394:\tlearn: 16815679.7151727\ttotal: 5.28s\tremaining: 8.09s\n",
"395:\tlearn: 16810641.8717564\ttotal: 5.29s\tremaining: 8.07s\n",
"396:\tlearn: 16810291.1871768\ttotal: 5.3s\tremaining: 8.06s\n",
"397:\tlearn: 16808056.2422004\ttotal: 5.32s\tremaining: 8.04s\n",
"398:\tlearn: 16807804.2454334\ttotal: 5.33s\tremaining: 8.03s\n",
"399:\tlearn: 16799998.1957230\ttotal: 5.34s\tremaining: 8.02s\n",
"400:\tlearn: 16799220.2656080\ttotal: 5.36s\tremaining: 8s\n",
"401:\tlearn: 16798913.0252067\ttotal: 5.37s\tremaining: 7.98s\n",
"402:\tlearn: 16798319.2545577\ttotal: 5.38s\tremaining: 7.97s\n",
"403:\tlearn: 16796848.5752647\ttotal: 5.39s\tremaining: 7.95s\n",
"404:\tlearn: 16757656.8985529\ttotal: 5.4s\tremaining: 7.93s\n",
"405:\tlearn: 16745513.4381725\ttotal: 5.41s\tremaining: 7.92s\n",
"406:\tlearn: 16735416.8114581\ttotal: 5.43s\tremaining: 7.91s\n",
"407:\tlearn: 16734295.1424370\ttotal: 5.44s\tremaining: 7.89s\n",
"408:\tlearn: 16733140.3781664\ttotal: 5.45s\tremaining: 7.88s\n",
"409:\tlearn: 16723800.8980695\ttotal: 5.46s\tremaining: 7.86s\n",
"410:\tlearn: 16721200.9625357\ttotal: 5.48s\tremaining: 7.85s\n",
"411:\tlearn: 16720027.8472987\ttotal: 5.49s\tremaining: 7.83s\n",
"412:\tlearn: 16717199.5760035\ttotal: 5.5s\tremaining: 7.82s\n",
"413:\tlearn: 16713362.4492616\ttotal: 5.52s\tremaining: 7.81s\n",
"414:\tlearn: 16712806.0473182\ttotal: 5.53s\tremaining: 7.8s\n",
"415:\tlearn: 16711241.9902750\ttotal: 5.55s\tremaining: 7.79s\n",
"416:\tlearn: 16710626.7325455\ttotal: 5.56s\tremaining: 7.77s\n",
"417:\tlearn: 16644768.4542531\ttotal: 5.57s\tremaining: 7.75s\n",
"418:\tlearn: 16644403.8081224\ttotal: 5.58s\tremaining: 7.74s\n",
"419:\tlearn: 16644106.9601552\ttotal: 5.59s\tremaining: 7.72s\n",
"420:\tlearn: 16643628.6346956\ttotal: 5.6s\tremaining: 7.71s\n",
"421:\tlearn: 16640073.3813320\ttotal: 5.62s\tremaining: 7.69s\n",
"422:\tlearn: 16639549.7950808\ttotal: 5.63s\tremaining: 7.68s\n",
"423:\tlearn: 16639069.1006878\ttotal: 5.64s\tremaining: 7.66s\n",
"424:\tlearn: 16638481.2382327\ttotal: 5.65s\tremaining: 7.65s\n",
"425:\tlearn: 16638208.9073863\ttotal: 5.66s\tremaining: 7.63s\n",
"426:\tlearn: 16609090.9227109\ttotal: 5.67s\tremaining: 7.61s\n",
"427:\tlearn: 16607897.8537223\ttotal: 5.69s\tremaining: 7.6s\n",
"428:\tlearn: 16607613.0069443\ttotal: 5.7s\tremaining: 7.58s\n",
"429:\tlearn: 16603866.7848843\ttotal: 5.71s\tremaining: 7.57s\n",
"430:\tlearn: 16566652.4020620\ttotal: 5.72s\tremaining: 7.55s\n",
"431:\tlearn: 16566149.6048169\ttotal: 5.74s\tremaining: 7.54s\n",
"432:\tlearn: 16564672.1011733\ttotal: 5.75s\tremaining: 7.53s\n",
"433:\tlearn: 16564610.7741058\ttotal: 5.76s\tremaining: 7.51s\n",
"434:\tlearn: 16564198.8911273\ttotal: 5.77s\tremaining: 7.49s\n",
"435:\tlearn: 16559675.2968062\ttotal: 5.78s\tremaining: 7.48s\n",
"436:\tlearn: 16558753.2346339\ttotal: 5.79s\tremaining: 7.46s\n",
"437:\tlearn: 16558452.1907641\ttotal: 5.8s\tremaining: 7.45s\n",
"438:\tlearn: 16546587.2383006\ttotal: 5.82s\tremaining: 7.43s\n",
"439:\tlearn: 16543823.0847287\ttotal: 5.83s\tremaining: 7.42s\n",
"440:\tlearn: 16542126.8424469\ttotal: 5.84s\tremaining: 7.4s\n",
"441:\tlearn: 16541624.1632076\ttotal: 5.85s\tremaining: 7.39s\n",
"442:\tlearn: 16540326.5322872\ttotal: 5.86s\tremaining: 7.37s\n",
"443:\tlearn: 16530336.2084291\ttotal: 5.88s\tremaining: 7.36s\n",
"444:\tlearn: 16530167.9665629\ttotal: 5.89s\tremaining: 7.34s\n",
"445:\tlearn: 16528821.2477933\ttotal: 5.9s\tremaining: 7.33s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"446:\tlearn: 16528766.2012617\ttotal: 5.91s\tremaining: 7.31s\n",
"447:\tlearn: 16518018.7193100\ttotal: 5.92s\tremaining: 7.3s\n",
"448:\tlearn: 16508723.6897544\ttotal: 5.94s\tremaining: 7.29s\n",
"449:\tlearn: 16508487.2637814\ttotal: 5.95s\tremaining: 7.27s\n",
"450:\tlearn: 16473955.6540161\ttotal: 5.96s\tremaining: 7.26s\n",
"451:\tlearn: 16453172.0203944\ttotal: 5.98s\tremaining: 7.25s\n",
"452:\tlearn: 16451483.6324413\ttotal: 5.99s\tremaining: 7.23s\n",
"453:\tlearn: 16451257.8036014\ttotal: 6s\tremaining: 7.22s\n",
"454:\tlearn: 16448369.9508352\ttotal: 6.02s\tremaining: 7.21s\n",
"455:\tlearn: 16446719.1385193\ttotal: 6.03s\tremaining: 7.19s\n",
"456:\tlearn: 16420736.1369659\ttotal: 6.04s\tremaining: 7.18s\n",
"457:\tlearn: 16420629.2824606\ttotal: 6.05s\tremaining: 7.16s\n",
"458:\tlearn: 16420336.6729748\ttotal: 6.07s\tremaining: 7.15s\n",
"459:\tlearn: 16420155.4584530\ttotal: 6.08s\tremaining: 7.13s\n",
"460:\tlearn: 16419734.8233202\ttotal: 6.09s\tremaining: 7.12s\n",
"461:\tlearn: 16419517.6225944\ttotal: 6.1s\tremaining: 7.1s\n",
"462:\tlearn: 16406145.7183320\ttotal: 6.12s\tremaining: 7.09s\n",
"463:\tlearn: 16404609.0651931\ttotal: 6.13s\tremaining: 7.08s\n",
"464:\tlearn: 16404332.0732862\ttotal: 6.14s\tremaining: 7.07s\n",
"465:\tlearn: 16404019.7507952\ttotal: 6.16s\tremaining: 7.05s\n",
"466:\tlearn: 16403507.0137349\ttotal: 6.17s\tremaining: 7.04s\n",
"467:\tlearn: 16402993.5886996\ttotal: 6.18s\tremaining: 7.03s\n",
"468:\tlearn: 16385955.8460101\ttotal: 6.19s\tremaining: 7.01s\n",
"469:\tlearn: 16373237.2004642\ttotal: 6.2s\tremaining: 7s\n",
"470:\tlearn: 16373038.3665164\ttotal: 6.21s\tremaining: 6.98s\n",
"471:\tlearn: 16372801.5860356\ttotal: 6.23s\tremaining: 6.97s\n",
"472:\tlearn: 16360759.6605520\ttotal: 6.24s\tremaining: 6.95s\n",
"473:\tlearn: 16360169.9657388\ttotal: 6.25s\tremaining: 6.94s\n",
"474:\tlearn: 16351841.0373273\ttotal: 6.26s\tremaining: 6.92s\n",
"475:\tlearn: 16349809.4004009\ttotal: 6.28s\tremaining: 6.91s\n",
"476:\tlearn: 16344483.1074475\ttotal: 6.3s\tremaining: 6.9s\n",
"477:\tlearn: 16340922.7262468\ttotal: 6.31s\tremaining: 6.89s\n",
"478:\tlearn: 16334736.4373107\ttotal: 6.32s\tremaining: 6.88s\n",
"479:\tlearn: 16334043.7402281\ttotal: 6.33s\tremaining: 6.86s\n",
"480:\tlearn: 16333745.0129155\ttotal: 6.35s\tremaining: 6.85s\n",
"481:\tlearn: 16332170.0024156\ttotal: 6.36s\tremaining: 6.83s\n",
"482:\tlearn: 16331680.4256261\ttotal: 6.37s\tremaining: 6.82s\n",
"483:\tlearn: 16321943.5880137\ttotal: 6.38s\tremaining: 6.81s\n",
"484:\tlearn: 16313566.1128530\ttotal: 6.4s\tremaining: 6.79s\n",
"485:\tlearn: 16312784.3783495\ttotal: 6.41s\tremaining: 6.78s\n",
"486:\tlearn: 16304256.8971602\ttotal: 6.42s\tremaining: 6.76s\n",
"487:\tlearn: 16299338.9360929\ttotal: 6.43s\tremaining: 6.75s\n",
"488:\tlearn: 16298399.2768748\ttotal: 6.45s\tremaining: 6.74s\n",
"489:\tlearn: 16282861.5959599\ttotal: 6.46s\tremaining: 6.72s\n",
"490:\tlearn: 16278027.7798172\ttotal: 6.47s\tremaining: 6.71s\n",
"491:\tlearn: 16262455.7433251\ttotal: 6.49s\tremaining: 6.7s\n",
"492:\tlearn: 16254609.6670435\ttotal: 6.5s\tremaining: 6.68s\n",
"493:\tlearn: 16250306.9197526\ttotal: 6.51s\tremaining: 6.67s\n",
"494:\tlearn: 16249855.9315045\ttotal: 6.53s\tremaining: 6.66s\n",
"495:\tlearn: 16248555.7562997\ttotal: 6.54s\tremaining: 6.64s\n",
"496:\tlearn: 16247555.1566330\ttotal: 6.55s\tremaining: 6.63s\n",
"497:\tlearn: 16247235.5993966\ttotal: 6.56s\tremaining: 6.62s\n",
"498:\tlearn: 16246264.7483105\ttotal: 6.57s\tremaining: 6.6s\n",
"499:\tlearn: 16246007.7491962\ttotal: 6.58s\tremaining: 6.58s\n",
"500:\tlearn: 16222867.6954421\ttotal: 6.59s\tremaining: 6.57s\n",
"501:\tlearn: 16222688.8853061\ttotal: 6.61s\tremaining: 6.55s\n",
"502:\tlearn: 16217885.3385915\ttotal: 6.62s\tremaining: 6.54s\n",
"503:\tlearn: 16217409.1580145\ttotal: 6.63s\tremaining: 6.52s\n",
"504:\tlearn: 16216838.3191240\ttotal: 6.64s\tremaining: 6.51s\n",
"505:\tlearn: 16216329.9777509\ttotal: 6.65s\tremaining: 6.5s\n",
"506:\tlearn: 16201534.4156055\ttotal: 6.67s\tremaining: 6.48s\n",
"507:\tlearn: 16198138.1904772\ttotal: 6.68s\tremaining: 6.47s\n",
"508:\tlearn: 16197904.2583705\ttotal: 6.69s\tremaining: 6.45s\n",
"509:\tlearn: 16193656.6407621\ttotal: 6.7s\tremaining: 6.44s\n",
"510:\tlearn: 16180805.8618897\ttotal: 6.71s\tremaining: 6.42s\n",
"511:\tlearn: 16176908.1769610\ttotal: 6.73s\tremaining: 6.41s\n",
"512:\tlearn: 16168261.0438871\ttotal: 6.74s\tremaining: 6.4s\n",
"513:\tlearn: 16167754.4165306\ttotal: 6.75s\tremaining: 6.38s\n",
"514:\tlearn: 16166295.0362243\ttotal: 6.77s\tremaining: 6.37s\n",
"515:\tlearn: 16166058.4053693\ttotal: 6.78s\tremaining: 6.36s\n",
"516:\tlearn: 16155412.7707338\ttotal: 6.79s\tremaining: 6.34s\n",
"517:\tlearn: 16152266.1742558\ttotal: 6.8s\tremaining: 6.33s\n",
"518:\tlearn: 16151552.8907870\ttotal: 6.82s\tremaining: 6.32s\n",
"519:\tlearn: 16140281.4351978\ttotal: 6.83s\tremaining: 6.3s\n",
"520:\tlearn: 16133450.4403783\ttotal: 6.84s\tremaining: 6.29s\n",
"521:\tlearn: 16132209.1334220\ttotal: 6.85s\tremaining: 6.27s\n",
"522:\tlearn: 16118104.6552795\ttotal: 6.86s\tremaining: 6.26s\n",
"523:\tlearn: 16108764.2393062\ttotal: 6.87s\tremaining: 6.25s\n",
"524:\tlearn: 16108234.0634605\ttotal: 6.89s\tremaining: 6.23s\n",
"525:\tlearn: 16107619.6760099\ttotal: 6.9s\tremaining: 6.22s\n",
"526:\tlearn: 16104870.7442280\ttotal: 6.91s\tremaining: 6.2s\n",
"527:\tlearn: 16102428.3934069\ttotal: 6.92s\tremaining: 6.19s\n",
"528:\tlearn: 16102157.2857565\ttotal: 6.93s\tremaining: 6.17s\n",
"529:\tlearn: 16101584.7403855\ttotal: 6.95s\tremaining: 6.16s\n",
"530:\tlearn: 16101480.3344969\ttotal: 6.96s\tremaining: 6.15s\n",
"531:\tlearn: 16100595.6548675\ttotal: 6.97s\tremaining: 6.13s\n",
"532:\tlearn: 16097511.0825233\ttotal: 6.99s\tremaining: 6.12s\n",
"533:\tlearn: 16096615.9743637\ttotal: 7s\tremaining: 6.11s\n",
"534:\tlearn: 16096369.6922988\ttotal: 7.01s\tremaining: 6.09s\n",
"535:\tlearn: 16095946.1647864\ttotal: 7.02s\tremaining: 6.08s\n",
"536:\tlearn: 16095637.6185090\ttotal: 7.03s\tremaining: 6.06s\n",
"537:\tlearn: 16094682.0243853\ttotal: 7.04s\tremaining: 6.05s\n",
"538:\tlearn: 16094291.9050311\ttotal: 7.05s\tremaining: 6.03s\n",
"539:\tlearn: 16093984.5280001\ttotal: 7.07s\tremaining: 6.02s\n",
"540:\tlearn: 16090374.6401334\ttotal: 7.08s\tremaining: 6.01s\n",
"541:\tlearn: 16090226.8772271\ttotal: 7.09s\tremaining: 5.99s\n",
"542:\tlearn: 16090050.1805201\ttotal: 7.1s\tremaining: 5.98s\n",
"543:\tlearn: 16069181.1048944\ttotal: 7.12s\tremaining: 5.96s\n",
"544:\tlearn: 16068504.9399291\ttotal: 7.13s\tremaining: 5.95s\n",
"545:\tlearn: 16068245.3744393\ttotal: 7.14s\tremaining: 5.94s\n",
"546:\tlearn: 16065773.4114093\ttotal: 7.15s\tremaining: 5.92s\n",
"547:\tlearn: 16051662.5046318\ttotal: 7.17s\tremaining: 5.91s\n",
"548:\tlearn: 16035327.2446945\ttotal: 7.18s\tremaining: 5.9s\n",
"549:\tlearn: 16035199.2858857\ttotal: 7.2s\tremaining: 5.89s\n",
"550:\tlearn: 16033842.9666151\ttotal: 7.21s\tremaining: 5.88s\n",
"551:\tlearn: 15995073.4381976\ttotal: 7.22s\tremaining: 5.86s\n",
"552:\tlearn: 15994812.5505379\ttotal: 7.23s\tremaining: 5.85s\n",
"553:\tlearn: 15994595.9921031\ttotal: 7.24s\tremaining: 5.83s\n",
"554:\tlearn: 15992248.3834318\ttotal: 7.26s\tremaining: 5.82s\n",
"555:\tlearn: 15992027.4484601\ttotal: 7.27s\tremaining: 5.8s\n",
"556:\tlearn: 15990566.0719983\ttotal: 7.28s\tremaining: 5.79s\n",
"557:\tlearn: 15985609.0920187\ttotal: 7.29s\tremaining: 5.78s\n",
"558:\tlearn: 15984517.8156083\ttotal: 7.3s\tremaining: 5.76s\n",
"559:\tlearn: 15958775.9803743\ttotal: 7.32s\tremaining: 5.75s\n",
"560:\tlearn: 15958166.8639855\ttotal: 7.33s\tremaining: 5.73s\n",
"561:\tlearn: 15949224.5334582\ttotal: 7.34s\tremaining: 5.72s\n",
"562:\tlearn: 15948769.9101270\ttotal: 7.35s\tremaining: 5.71s\n",
"563:\tlearn: 15930009.9576761\ttotal: 7.36s\tremaining: 5.69s\n",
"564:\tlearn: 15917439.6202170\ttotal: 7.38s\tremaining: 5.68s\n",
"565:\tlearn: 15908669.4567536\ttotal: 7.39s\tremaining: 5.67s\n",
"566:\tlearn: 15908084.2939630\ttotal: 7.4s\tremaining: 5.65s\n",
"567:\tlearn: 15906697.1590494\ttotal: 7.41s\tremaining: 5.64s\n",
"568:\tlearn: 15906522.4609846\ttotal: 7.43s\tremaining: 5.63s\n",
"569:\tlearn: 15906139.9138507\ttotal: 7.44s\tremaining: 5.61s\n",
"570:\tlearn: 15905855.0642382\ttotal: 7.45s\tremaining: 5.6s\n",
"571:\tlearn: 15897372.3501416\ttotal: 7.46s\tremaining: 5.58s\n",
"572:\tlearn: 15893536.4661240\ttotal: 7.47s\tremaining: 5.57s\n",
"573:\tlearn: 15893206.2810918\ttotal: 7.49s\tremaining: 5.56s\n",
"574:\tlearn: 15892918.2703602\ttotal: 7.5s\tremaining: 5.54s\n",
"575:\tlearn: 15892752.8029869\ttotal: 7.51s\tremaining: 5.53s\n",
"576:\tlearn: 15885169.7413434\ttotal: 7.52s\tremaining: 5.51s\n",
"577:\tlearn: 15884936.8745209\ttotal: 7.53s\tremaining: 5.5s\n",
"578:\tlearn: 15876877.1991641\ttotal: 7.54s\tremaining: 5.49s\n",
"579:\tlearn: 15865774.4061534\ttotal: 7.56s\tremaining: 5.47s\n",
"580:\tlearn: 15859212.9207966\ttotal: 7.57s\tremaining: 5.46s\n",
"581:\tlearn: 15858807.6511813\ttotal: 7.58s\tremaining: 5.45s\n",
"582:\tlearn: 15850129.9468116\ttotal: 7.6s\tremaining: 5.43s\n",
"583:\tlearn: 15845554.5689368\ttotal: 7.61s\tremaining: 5.42s\n",
"584:\tlearn: 15844986.6765475\ttotal: 7.62s\tremaining: 5.41s\n",
"585:\tlearn: 15844796.1180439\ttotal: 7.63s\tremaining: 5.39s\n",
"586:\tlearn: 15844586.1630771\ttotal: 7.65s\tremaining: 5.38s\n",
"587:\tlearn: 15827685.4584540\ttotal: 7.66s\tremaining: 5.37s\n",
"588:\tlearn: 15826910.6044821\ttotal: 7.68s\tremaining: 5.36s\n",
"589:\tlearn: 15824060.9875073\ttotal: 7.69s\tremaining: 5.34s\n",
"590:\tlearn: 15818523.6912985\ttotal: 7.7s\tremaining: 5.33s\n",
"591:\tlearn: 15810640.6921394\ttotal: 7.72s\tremaining: 5.32s\n",
"592:\tlearn: 15795481.4197185\ttotal: 7.73s\tremaining: 5.3s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"593:\tlearn: 15795256.4491006\ttotal: 7.74s\tremaining: 5.29s\n",
"594:\tlearn: 15784420.7363473\ttotal: 7.76s\tremaining: 5.28s\n",
"595:\tlearn: 15784290.1819258\ttotal: 7.78s\tremaining: 5.27s\n",
"596:\tlearn: 15783955.0773924\ttotal: 7.79s\tremaining: 5.26s\n",
"597:\tlearn: 15781518.5372107\ttotal: 7.8s\tremaining: 5.25s\n",
"598:\tlearn: 15779547.4210947\ttotal: 7.82s\tremaining: 5.24s\n",
"599:\tlearn: 15777334.3663340\ttotal: 7.83s\tremaining: 5.22s\n",
"600:\tlearn: 15774774.7721883\ttotal: 7.85s\tremaining: 5.21s\n",
"601:\tlearn: 15774672.2356339\ttotal: 7.86s\tremaining: 5.2s\n",
"602:\tlearn: 15773528.0736833\ttotal: 7.87s\tremaining: 5.18s\n",
"603:\tlearn: 15768721.3649454\ttotal: 7.89s\tremaining: 5.17s\n",
"604:\tlearn: 15768502.0877019\ttotal: 7.9s\tremaining: 5.16s\n",
"605:\tlearn: 15768057.4929247\ttotal: 7.92s\tremaining: 5.15s\n",
"606:\tlearn: 15767950.4285043\ttotal: 7.93s\tremaining: 5.14s\n",
"607:\tlearn: 15767445.1324607\ttotal: 7.95s\tremaining: 5.12s\n",
"608:\tlearn: 15767269.0628064\ttotal: 7.96s\tremaining: 5.11s\n",
"609:\tlearn: 15767020.8174624\ttotal: 7.97s\tremaining: 5.1s\n",
"610:\tlearn: 15762309.5160245\ttotal: 7.99s\tremaining: 5.09s\n",
"611:\tlearn: 15757527.7718093\ttotal: 8s\tremaining: 5.07s\n",
"612:\tlearn: 15757150.7731734\ttotal: 8.02s\tremaining: 5.06s\n",
"613:\tlearn: 15756885.6252756\ttotal: 8.03s\tremaining: 5.05s\n",
"614:\tlearn: 15755584.6816303\ttotal: 8.05s\tremaining: 5.04s\n",
"615:\tlearn: 15755485.6737331\ttotal: 8.06s\tremaining: 5.02s\n",
"616:\tlearn: 15754432.0517599\ttotal: 8.07s\tremaining: 5.01s\n",
"617:\tlearn: 15744535.8203508\ttotal: 8.09s\tremaining: 5s\n",
"618:\tlearn: 15740683.5538600\ttotal: 8.1s\tremaining: 4.99s\n",
"619:\tlearn: 15736903.5667213\ttotal: 8.12s\tremaining: 4.97s\n",
"620:\tlearn: 15736355.4210963\ttotal: 8.13s\tremaining: 4.96s\n",
"621:\tlearn: 15729940.4032081\ttotal: 8.15s\tremaining: 4.95s\n",
"622:\tlearn: 15729775.7542976\ttotal: 8.17s\tremaining: 4.94s\n",
"623:\tlearn: 15726578.4125003\ttotal: 8.18s\tremaining: 4.93s\n",
"624:\tlearn: 15713451.5317183\ttotal: 8.2s\tremaining: 4.92s\n",
"625:\tlearn: 15712116.7478338\ttotal: 8.22s\tremaining: 4.91s\n",
"626:\tlearn: 15712039.0336448\ttotal: 8.23s\tremaining: 4.9s\n",
"627:\tlearn: 15711687.4136682\ttotal: 8.25s\tremaining: 4.89s\n",
"628:\tlearn: 15711298.6681597\ttotal: 8.27s\tremaining: 4.88s\n",
"629:\tlearn: 15705228.0021081\ttotal: 8.28s\tremaining: 4.86s\n",
"630:\tlearn: 15705060.0247650\ttotal: 8.29s\tremaining: 4.85s\n",
"631:\tlearn: 15702513.1910574\ttotal: 8.31s\tremaining: 4.84s\n",
"632:\tlearn: 15702203.6145508\ttotal: 8.32s\tremaining: 4.82s\n",
"633:\tlearn: 15698975.2951288\ttotal: 8.33s\tremaining: 4.81s\n",
"634:\tlearn: 15694674.2341421\ttotal: 8.34s\tremaining: 4.8s\n",
"635:\tlearn: 15686913.9104937\ttotal: 8.36s\tremaining: 4.78s\n",
"636:\tlearn: 15686809.9586513\ttotal: 8.37s\tremaining: 4.77s\n",
"637:\tlearn: 15685604.3787689\ttotal: 8.38s\tremaining: 4.76s\n",
"638:\tlearn: 15685081.4917552\ttotal: 8.4s\tremaining: 4.74s\n",
"639:\tlearn: 15676541.2826685\ttotal: 8.41s\tremaining: 4.73s\n",
"640:\tlearn: 15672855.0180760\ttotal: 8.42s\tremaining: 4.72s\n",
"641:\tlearn: 15666780.7593096\ttotal: 8.44s\tremaining: 4.7s\n",
"642:\tlearn: 15659438.3508408\ttotal: 8.45s\tremaining: 4.69s\n",
"643:\tlearn: 15653755.4598701\ttotal: 8.46s\tremaining: 4.68s\n",
"644:\tlearn: 15652034.4638985\ttotal: 8.48s\tremaining: 4.67s\n",
"645:\tlearn: 15645095.6489597\ttotal: 8.49s\tremaining: 4.65s\n",
"646:\tlearn: 15641226.9420905\ttotal: 8.5s\tremaining: 4.64s\n",
"647:\tlearn: 15639833.9184524\ttotal: 8.52s\tremaining: 4.63s\n",
"648:\tlearn: 15639581.1651510\ttotal: 8.53s\tremaining: 4.61s\n",
"649:\tlearn: 15635923.3848062\ttotal: 8.54s\tremaining: 4.6s\n",
"650:\tlearn: 15635813.1152459\ttotal: 8.55s\tremaining: 4.58s\n",
"651:\tlearn: 15635469.3555938\ttotal: 8.56s\tremaining: 4.57s\n",
"652:\tlearn: 15635363.9174910\ttotal: 8.57s\tremaining: 4.56s\n",
"653:\tlearn: 15633936.7433448\ttotal: 8.59s\tremaining: 4.54s\n",
"654:\tlearn: 15633839.2271448\ttotal: 8.6s\tremaining: 4.53s\n",
"655:\tlearn: 15633735.8610291\ttotal: 8.61s\tremaining: 4.52s\n",
"656:\tlearn: 15633309.8063070\ttotal: 8.63s\tremaining: 4.5s\n",
"657:\tlearn: 15632683.8986677\ttotal: 8.64s\tremaining: 4.49s\n",
"658:\tlearn: 15632461.2639014\ttotal: 8.65s\tremaining: 4.48s\n",
"659:\tlearn: 15627123.1765533\ttotal: 8.66s\tremaining: 4.46s\n",
"660:\tlearn: 15626996.0787558\ttotal: 8.68s\tremaining: 4.45s\n",
"661:\tlearn: 15624291.0204091\ttotal: 8.69s\tremaining: 4.44s\n",
"662:\tlearn: 15617684.3098363\ttotal: 8.71s\tremaining: 4.42s\n",
"663:\tlearn: 15611967.2176796\ttotal: 8.72s\tremaining: 4.41s\n",
"664:\tlearn: 15598472.2546786\ttotal: 8.73s\tremaining: 4.4s\n",
"665:\tlearn: 15597526.0470563\ttotal: 8.74s\tremaining: 4.38s\n",
"666:\tlearn: 15597430.3920481\ttotal: 8.75s\tremaining: 4.37s\n",
"667:\tlearn: 15596422.7059295\ttotal: 8.77s\tremaining: 4.36s\n",
"668:\tlearn: 15591400.2242411\ttotal: 8.78s\tremaining: 4.34s\n",
"669:\tlearn: 15585199.5277811\ttotal: 8.79s\tremaining: 4.33s\n",
"670:\tlearn: 15585003.5063693\ttotal: 8.8s\tremaining: 4.32s\n",
"671:\tlearn: 15578765.7193891\ttotal: 8.81s\tremaining: 4.3s\n",
"672:\tlearn: 15577252.0151364\ttotal: 8.83s\tremaining: 4.29s\n",
"673:\tlearn: 15576511.8797514\ttotal: 8.84s\tremaining: 4.28s\n",
"674:\tlearn: 15576120.7606092\ttotal: 8.85s\tremaining: 4.26s\n",
"675:\tlearn: 15574398.5273782\ttotal: 8.86s\tremaining: 4.25s\n",
"676:\tlearn: 15565660.7493905\ttotal: 8.88s\tremaining: 4.24s\n",
"677:\tlearn: 15561009.3437211\ttotal: 8.89s\tremaining: 4.22s\n",
"678:\tlearn: 15548878.4770401\ttotal: 8.9s\tremaining: 4.21s\n",
"679:\tlearn: 15527713.9632219\ttotal: 8.91s\tremaining: 4.19s\n",
"680:\tlearn: 15519745.2151864\ttotal: 8.93s\tremaining: 4.18s\n",
"681:\tlearn: 15519391.2760902\ttotal: 8.94s\tremaining: 4.17s\n",
"682:\tlearn: 15514461.8611265\ttotal: 8.95s\tremaining: 4.15s\n",
"683:\tlearn: 15514296.1001141\ttotal: 8.96s\tremaining: 4.14s\n",
"684:\tlearn: 15514204.8658979\ttotal: 8.97s\tremaining: 4.13s\n",
"685:\tlearn: 15513977.3554214\ttotal: 8.98s\tremaining: 4.11s\n",
"686:\tlearn: 15513906.5046745\ttotal: 9s\tremaining: 4.1s\n",
"687:\tlearn: 15513701.8112778\ttotal: 9.01s\tremaining: 4.09s\n",
"688:\tlearn: 15513602.4959013\ttotal: 9.02s\tremaining: 4.07s\n",
"689:\tlearn: 15513510.7910896\ttotal: 9.03s\tremaining: 4.06s\n",
"690:\tlearn: 15513352.2070048\ttotal: 9.04s\tremaining: 4.04s\n",
"691:\tlearn: 15513238.8204588\ttotal: 9.05s\tremaining: 4.03s\n",
"692:\tlearn: 15513154.0618557\ttotal: 9.06s\tremaining: 4.01s\n",
"693:\tlearn: 15512878.9114412\ttotal: 9.07s\tremaining: 4s\n",
"694:\tlearn: 15509248.2055515\ttotal: 9.09s\tremaining: 3.99s\n",
"695:\tlearn: 15508734.7327170\ttotal: 9.1s\tremaining: 3.97s\n",
"696:\tlearn: 15508495.7881550\ttotal: 9.11s\tremaining: 3.96s\n",
"697:\tlearn: 15508349.6872134\ttotal: 9.12s\tremaining: 3.95s\n",
"698:\tlearn: 15508190.6588965\ttotal: 9.13s\tremaining: 3.93s\n",
"699:\tlearn: 15508018.8419773\ttotal: 9.14s\tremaining: 3.92s\n",
"700:\tlearn: 15507826.3791202\ttotal: 9.16s\tremaining: 3.9s\n",
"701:\tlearn: 15507705.8100928\ttotal: 9.17s\tremaining: 3.89s\n",
"702:\tlearn: 15507533.3512682\ttotal: 9.18s\tremaining: 3.88s\n",
"703:\tlearn: 15501571.2913355\ttotal: 9.19s\tremaining: 3.87s\n",
"704:\tlearn: 15495921.0773672\ttotal: 9.21s\tremaining: 3.85s\n",
"705:\tlearn: 15495385.0875416\ttotal: 9.22s\tremaining: 3.84s\n",
"706:\tlearn: 15495191.8032918\ttotal: 9.23s\tremaining: 3.83s\n",
"707:\tlearn: 15494128.9589635\ttotal: 9.24s\tremaining: 3.81s\n",
"708:\tlearn: 15493806.9566177\ttotal: 9.26s\tremaining: 3.8s\n",
"709:\tlearn: 15493694.0465547\ttotal: 9.27s\tremaining: 3.78s\n",
"710:\tlearn: 15493305.1729869\ttotal: 9.28s\tremaining: 3.77s\n",
"711:\tlearn: 15487948.0399475\ttotal: 9.3s\tremaining: 3.76s\n",
"712:\tlearn: 15487843.0916850\ttotal: 9.31s\tremaining: 3.75s\n",
"713:\tlearn: 15482765.7669785\ttotal: 9.32s\tremaining: 3.73s\n",
"714:\tlearn: 15474767.3580796\ttotal: 9.33s\tremaining: 3.72s\n",
"715:\tlearn: 15472407.8166003\ttotal: 9.35s\tremaining: 3.71s\n",
"716:\tlearn: 15467592.3874842\ttotal: 9.36s\tremaining: 3.69s\n",
"717:\tlearn: 15467435.4901525\ttotal: 9.37s\tremaining: 3.68s\n",
"718:\tlearn: 15462871.2869120\ttotal: 9.38s\tremaining: 3.67s\n",
"719:\tlearn: 15462771.0380185\ttotal: 9.39s\tremaining: 3.65s\n",
"720:\tlearn: 15462475.2715024\ttotal: 9.4s\tremaining: 3.64s\n",
"721:\tlearn: 15454885.7938423\ttotal: 9.42s\tremaining: 3.63s\n",
"722:\tlearn: 15450557.5824215\ttotal: 9.43s\tremaining: 3.61s\n",
"723:\tlearn: 15446455.2317749\ttotal: 9.44s\tremaining: 3.6s\n",
"724:\tlearn: 15445711.0004476\ttotal: 9.46s\tremaining: 3.59s\n",
"725:\tlearn: 15441822.5331613\ttotal: 9.47s\tremaining: 3.57s\n",
"726:\tlearn: 15441182.6843715\ttotal: 9.48s\tremaining: 3.56s\n",
"727:\tlearn: 15441088.8915881\ttotal: 9.49s\tremaining: 3.55s\n",
"728:\tlearn: 15441002.5272406\ttotal: 9.51s\tremaining: 3.53s\n",
"729:\tlearn: 15440884.2830869\ttotal: 9.52s\tremaining: 3.52s\n",
"730:\tlearn: 15440734.2579995\ttotal: 9.53s\tremaining: 3.51s\n",
"731:\tlearn: 15440611.8887909\ttotal: 9.54s\tremaining: 3.49s\n",
"732:\tlearn: 15440249.4221271\ttotal: 9.55s\tremaining: 3.48s\n",
"733:\tlearn: 15440158.8154476\ttotal: 9.56s\tremaining: 3.46s\n",
"734:\tlearn: 15436472.7845071\ttotal: 9.57s\tremaining: 3.45s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"735:\tlearn: 15433672.4484876\ttotal: 9.59s\tremaining: 3.44s\n",
"736:\tlearn: 15433490.1840146\ttotal: 9.6s\tremaining: 3.43s\n",
"737:\tlearn: 15433308.6881010\ttotal: 9.61s\tremaining: 3.41s\n",
"738:\tlearn: 15433042.7409848\ttotal: 9.63s\tremaining: 3.4s\n",
"739:\tlearn: 15432541.8769518\ttotal: 9.64s\tremaining: 3.39s\n",
"740:\tlearn: 15431856.6761047\ttotal: 9.66s\tremaining: 3.38s\n",
"741:\tlearn: 15431804.7359345\ttotal: 9.67s\tremaining: 3.36s\n",
"742:\tlearn: 15427355.2392047\ttotal: 9.68s\tremaining: 3.35s\n",
"743:\tlearn: 15427218.1028185\ttotal: 9.69s\tremaining: 3.33s\n",
"744:\tlearn: 15424332.1093472\ttotal: 9.7s\tremaining: 3.32s\n",
"745:\tlearn: 15388321.8033125\ttotal: 9.71s\tremaining: 3.31s\n",
"746:\tlearn: 15377267.9048803\ttotal: 9.73s\tremaining: 3.29s\n",
"747:\tlearn: 15374625.6198420\ttotal: 9.74s\tremaining: 3.28s\n",
"748:\tlearn: 15370386.0426691\ttotal: 9.75s\tremaining: 3.27s\n",
"749:\tlearn: 15359901.4299089\ttotal: 9.77s\tremaining: 3.25s\n",
"750:\tlearn: 15358774.7332579\ttotal: 9.78s\tremaining: 3.24s\n",
"751:\tlearn: 15358651.8711020\ttotal: 9.79s\tremaining: 3.23s\n",
"752:\tlearn: 15358300.8764559\ttotal: 9.8s\tremaining: 3.21s\n",
"753:\tlearn: 15357884.8170886\ttotal: 9.81s\tremaining: 3.2s\n",
"754:\tlearn: 15357643.0994172\ttotal: 9.83s\tremaining: 3.19s\n",
"755:\tlearn: 15357565.0887636\ttotal: 9.84s\tremaining: 3.17s\n",
"756:\tlearn: 15351820.0777339\ttotal: 9.85s\tremaining: 3.16s\n",
"757:\tlearn: 15351414.2517094\ttotal: 9.86s\tremaining: 3.15s\n",
"758:\tlearn: 15349501.7532204\ttotal: 9.88s\tremaining: 3.14s\n",
"759:\tlearn: 15348526.7586048\ttotal: 9.89s\tremaining: 3.12s\n",
"760:\tlearn: 15348352.7244253\ttotal: 9.9s\tremaining: 3.11s\n",
"761:\tlearn: 15347292.4488773\ttotal: 9.91s\tremaining: 3.1s\n",
"762:\tlearn: 15347207.8865499\ttotal: 9.93s\tremaining: 3.08s\n",
"763:\tlearn: 15342959.4790246\ttotal: 9.94s\tremaining: 3.07s\n",
"764:\tlearn: 15342697.6483068\ttotal: 9.95s\tremaining: 3.06s\n",
"765:\tlearn: 15313028.1511576\ttotal: 9.96s\tremaining: 3.04s\n",
"766:\tlearn: 15310947.6382018\ttotal: 9.98s\tremaining: 3.03s\n",
"767:\tlearn: 15263340.9642127\ttotal: 9.99s\tremaining: 3.02s\n",
"768:\tlearn: 15259807.2026083\ttotal: 10s\tremaining: 3s\n",
"769:\tlearn: 15259504.1148296\ttotal: 10s\tremaining: 2.99s\n",
"770:\tlearn: 15259395.2637694\ttotal: 10s\tremaining: 2.98s\n",
"771:\tlearn: 15256045.7141942\ttotal: 10s\tremaining: 2.96s\n",
"772:\tlearn: 15252870.9021417\ttotal: 10.1s\tremaining: 2.95s\n",
"773:\tlearn: 15248430.3201074\ttotal: 10.1s\tremaining: 2.94s\n",
"774:\tlearn: 15246752.4177458\ttotal: 10.1s\tremaining: 2.93s\n",
"775:\tlearn: 15245960.1417687\ttotal: 10.1s\tremaining: 2.91s\n",
"776:\tlearn: 15245917.6645107\ttotal: 10.1s\tremaining: 2.9s\n",
"777:\tlearn: 15245817.5185452\ttotal: 10.1s\tremaining: 2.89s\n",
"778:\tlearn: 15245619.8351855\ttotal: 10.1s\tremaining: 2.87s\n",
"779:\tlearn: 15244869.5667520\ttotal: 10.1s\tremaining: 2.86s\n",
"780:\tlearn: 15244818.8943236\ttotal: 10.2s\tremaining: 2.85s\n",
"781:\tlearn: 15244254.3637038\ttotal: 10.2s\tremaining: 2.83s\n",
"782:\tlearn: 15243818.2939855\ttotal: 10.2s\tremaining: 2.82s\n",
"783:\tlearn: 15243668.1645179\ttotal: 10.2s\tremaining: 2.81s\n",
"784:\tlearn: 15240656.8617467\ttotal: 10.2s\tremaining: 2.8s\n",
"785:\tlearn: 15237802.6637690\ttotal: 10.2s\tremaining: 2.78s\n",
"786:\tlearn: 15235097.3769887\ttotal: 10.2s\tremaining: 2.77s\n",
"787:\tlearn: 15231063.4576018\ttotal: 10.3s\tremaining: 2.76s\n",
"788:\tlearn: 15224406.4239600\ttotal: 10.3s\tremaining: 2.74s\n",
"789:\tlearn: 15220791.2846445\ttotal: 10.3s\tremaining: 2.73s\n",
"790:\tlearn: 15220221.4180094\ttotal: 10.3s\tremaining: 2.72s\n",
"791:\tlearn: 15220121.7499013\ttotal: 10.3s\tremaining: 2.71s\n",
"792:\tlearn: 15218396.9325757\ttotal: 10.3s\tremaining: 2.69s\n",
"793:\tlearn: 15213830.8844557\ttotal: 10.3s\tremaining: 2.68s\n",
"794:\tlearn: 15212644.4009126\ttotal: 10.3s\tremaining: 2.67s\n",
"795:\tlearn: 15212570.5272286\ttotal: 10.4s\tremaining: 2.65s\n",
"796:\tlearn: 15172874.1552397\ttotal: 10.4s\tremaining: 2.64s\n",
"797:\tlearn: 15164671.3501787\ttotal: 10.4s\tremaining: 2.63s\n",
"798:\tlearn: 15162711.8871221\ttotal: 10.4s\tremaining: 2.62s\n",
"799:\tlearn: 15162618.0050229\ttotal: 10.4s\tremaining: 2.6s\n",
"800:\tlearn: 15161186.8924011\ttotal: 10.4s\tremaining: 2.59s\n",
"801:\tlearn: 15160994.4738412\ttotal: 10.4s\tremaining: 2.58s\n",
"802:\tlearn: 15159385.3831268\ttotal: 10.4s\tremaining: 2.56s\n",
"803:\tlearn: 15159166.1576231\ttotal: 10.5s\tremaining: 2.55s\n",
"804:\tlearn: 15156764.1801770\ttotal: 10.5s\tremaining: 2.54s\n",
"805:\tlearn: 15146691.8394282\ttotal: 10.5s\tremaining: 2.52s\n",
"806:\tlearn: 15146533.6706853\ttotal: 10.5s\tremaining: 2.51s\n",
"807:\tlearn: 15146408.7773292\ttotal: 10.5s\tremaining: 2.5s\n",
"808:\tlearn: 15142359.7678728\ttotal: 10.5s\tremaining: 2.48s\n",
"809:\tlearn: 15142322.1248825\ttotal: 10.5s\tremaining: 2.47s\n",
"810:\tlearn: 15132770.1153732\ttotal: 10.5s\tremaining: 2.46s\n",
"811:\tlearn: 15101480.9924963\ttotal: 10.6s\tremaining: 2.44s\n",
"812:\tlearn: 15101445.1875248\ttotal: 10.6s\tremaining: 2.43s\n",
"813:\tlearn: 15075376.4419388\ttotal: 10.6s\tremaining: 2.42s\n",
"814:\tlearn: 15073160.0820287\ttotal: 10.6s\tremaining: 2.4s\n",
"815:\tlearn: 15072725.5140996\ttotal: 10.6s\tremaining: 2.39s\n",
"816:\tlearn: 15072585.7018342\ttotal: 10.6s\tremaining: 2.38s\n",
"817:\tlearn: 15071522.0001919\ttotal: 10.6s\tremaining: 2.37s\n",
"818:\tlearn: 15071382.2097110\ttotal: 10.6s\tremaining: 2.35s\n",
"819:\tlearn: 15071301.2886091\ttotal: 10.7s\tremaining: 2.34s\n",
"820:\tlearn: 15071025.2992144\ttotal: 10.7s\tremaining: 2.33s\n",
"821:\tlearn: 15069498.2268762\ttotal: 10.7s\tremaining: 2.31s\n",
"822:\tlearn: 15061575.7065075\ttotal: 10.7s\tremaining: 2.3s\n",
"823:\tlearn: 15061416.4068476\ttotal: 10.7s\tremaining: 2.29s\n",
"824:\tlearn: 15060945.6687130\ttotal: 10.7s\tremaining: 2.27s\n",
"825:\tlearn: 15051099.8538783\ttotal: 10.7s\tremaining: 2.26s\n",
"826:\tlearn: 15050450.9663299\ttotal: 10.7s\tremaining: 2.25s\n",
"827:\tlearn: 15049722.9751983\ttotal: 10.8s\tremaining: 2.23s\n",
"828:\tlearn: 15049467.7452535\ttotal: 10.8s\tremaining: 2.22s\n",
"829:\tlearn: 15049412.7697933\ttotal: 10.8s\tremaining: 2.21s\n",
"830:\tlearn: 15048891.7740041\ttotal: 10.8s\tremaining: 2.19s\n",
"831:\tlearn: 15048043.0994998\ttotal: 10.8s\tremaining: 2.18s\n",
"832:\tlearn: 15046697.3368860\ttotal: 10.8s\tremaining: 2.17s\n",
"833:\tlearn: 15038272.8803419\ttotal: 10.8s\tremaining: 2.15s\n",
"834:\tlearn: 15034639.9951102\ttotal: 10.8s\tremaining: 2.14s\n",
"835:\tlearn: 15030153.8614245\ttotal: 10.9s\tremaining: 2.13s\n",
"836:\tlearn: 15027964.0190757\ttotal: 10.9s\tremaining: 2.12s\n",
"837:\tlearn: 15023890.1409211\ttotal: 10.9s\tremaining: 2.1s\n",
"838:\tlearn: 15022954.0613643\ttotal: 10.9s\tremaining: 2.09s\n",
"839:\tlearn: 15022653.7321874\ttotal: 10.9s\tremaining: 2.08s\n",
"840:\tlearn: 15021763.5899870\ttotal: 10.9s\tremaining: 2.06s\n",
"841:\tlearn: 15021552.9666208\ttotal: 10.9s\tremaining: 2.05s\n",
"842:\tlearn: 15017213.3112838\ttotal: 10.9s\tremaining: 2.04s\n",
"843:\tlearn: 15006868.9919636\ttotal: 11s\tremaining: 2.02s\n",
"844:\tlearn: 15006047.4873296\ttotal: 11s\tremaining: 2.01s\n",
"845:\tlearn: 15003167.3995596\ttotal: 11s\tremaining: 2s\n",
"846:\tlearn: 15001516.1719277\ttotal: 11s\tremaining: 1.99s\n",
"847:\tlearn: 15000023.6971343\ttotal: 11s\tremaining: 1.97s\n",
"848:\tlearn: 14996097.9901016\ttotal: 11s\tremaining: 1.96s\n",
"849:\tlearn: 14995809.9617414\ttotal: 11s\tremaining: 1.95s\n",
"850:\tlearn: 14991694.0680204\ttotal: 11s\tremaining: 1.93s\n",
"851:\tlearn: 14990806.5441048\ttotal: 11.1s\tremaining: 1.92s\n",
"852:\tlearn: 14990539.4146062\ttotal: 11.1s\tremaining: 1.91s\n",
"853:\tlearn: 14990428.7975864\ttotal: 11.1s\tremaining: 1.89s\n",
"854:\tlearn: 14989061.7567162\ttotal: 11.1s\tremaining: 1.88s\n",
"855:\tlearn: 14983131.0103419\ttotal: 11.1s\tremaining: 1.87s\n",
"856:\tlearn: 14982655.3316759\ttotal: 11.1s\tremaining: 1.86s\n",
"857:\tlearn: 14977099.0608273\ttotal: 11.1s\tremaining: 1.84s\n",
"858:\tlearn: 14976713.9058693\ttotal: 11.2s\tremaining: 1.83s\n",
"859:\tlearn: 14976613.3364184\ttotal: 11.2s\tremaining: 1.82s\n",
"860:\tlearn: 14964115.6999829\ttotal: 11.2s\tremaining: 1.81s\n",
"861:\tlearn: 14961152.7626425\ttotal: 11.2s\tremaining: 1.79s\n",
"862:\tlearn: 14960316.8698796\ttotal: 11.2s\tremaining: 1.78s\n",
"863:\tlearn: 14960206.5805103\ttotal: 11.2s\tremaining: 1.77s\n",
"864:\tlearn: 14948350.2065232\ttotal: 11.2s\tremaining: 1.75s\n",
"865:\tlearn: 14948237.8225238\ttotal: 11.3s\tremaining: 1.74s\n",
"866:\tlearn: 14948145.1280412\ttotal: 11.3s\tremaining: 1.73s\n",
"867:\tlearn: 14947479.9936319\ttotal: 11.3s\tremaining: 1.72s\n",
"868:\tlearn: 14946706.9144290\ttotal: 11.3s\tremaining: 1.7s\n",
"869:\tlearn: 14946008.4529886\ttotal: 11.3s\tremaining: 1.69s\n",
"870:\tlearn: 14938382.9733130\ttotal: 11.3s\tremaining: 1.68s\n",
"871:\tlearn: 14935923.0018589\ttotal: 11.3s\tremaining: 1.66s\n",
"872:\tlearn: 14935763.9386719\ttotal: 11.4s\tremaining: 1.65s\n",
"873:\tlearn: 14935390.9032799\ttotal: 11.4s\tremaining: 1.64s\n",
"874:\tlearn: 14924136.4999000\ttotal: 11.4s\tremaining: 1.63s\n",
"875:\tlearn: 14923231.4975181\ttotal: 11.4s\tremaining: 1.61s\n",
"876:\tlearn: 14920764.7489123\ttotal: 11.4s\tremaining: 1.6s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"877:\tlearn: 14920619.6869935\ttotal: 11.4s\tremaining: 1.59s\n",
"878:\tlearn: 14920259.9887151\ttotal: 11.4s\tremaining: 1.57s\n",
"879:\tlearn: 14918671.6063618\ttotal: 11.4s\tremaining: 1.56s\n",
"880:\tlearn: 14909484.3446534\ttotal: 11.5s\tremaining: 1.55s\n",
"881:\tlearn: 14909331.3722806\ttotal: 11.5s\tremaining: 1.53s\n",
"882:\tlearn: 14909000.4744294\ttotal: 11.5s\tremaining: 1.52s\n",
"883:\tlearn: 14907810.4215534\ttotal: 11.5s\tremaining: 1.51s\n",
"884:\tlearn: 14907739.9399244\ttotal: 11.5s\tremaining: 1.5s\n",
"885:\tlearn: 14907643.8234156\ttotal: 11.5s\tremaining: 1.48s\n",
"886:\tlearn: 14902234.6414918\ttotal: 11.5s\tremaining: 1.47s\n",
"887:\tlearn: 14899639.1808572\ttotal: 11.5s\tremaining: 1.46s\n",
"888:\tlearn: 14898572.6302420\ttotal: 11.6s\tremaining: 1.44s\n",
"889:\tlearn: 14898481.9011232\ttotal: 11.6s\tremaining: 1.43s\n",
"890:\tlearn: 14898396.0569341\ttotal: 11.6s\tremaining: 1.42s\n",
"891:\tlearn: 14897825.8306216\ttotal: 11.6s\tremaining: 1.4s\n",
"892:\tlearn: 14897667.5383103\ttotal: 11.6s\tremaining: 1.39s\n",
"893:\tlearn: 14894977.2967186\ttotal: 11.6s\tremaining: 1.38s\n",
"894:\tlearn: 14894451.1015405\ttotal: 11.6s\tremaining: 1.36s\n",
"895:\tlearn: 14894302.6357933\ttotal: 11.6s\tremaining: 1.35s\n",
"896:\tlearn: 14892042.1460828\ttotal: 11.7s\tremaining: 1.34s\n",
"897:\tlearn: 14891913.2181914\ttotal: 11.7s\tremaining: 1.32s\n",
"898:\tlearn: 14891548.8875063\ttotal: 11.7s\tremaining: 1.31s\n",
"899:\tlearn: 14891535.2806629\ttotal: 11.7s\tremaining: 1.3s\n",
"900:\tlearn: 14885531.4554658\ttotal: 11.7s\tremaining: 1.29s\n",
"901:\tlearn: 14885300.5490787\ttotal: 11.7s\tremaining: 1.27s\n",
"902:\tlearn: 14882237.8064495\ttotal: 11.7s\tremaining: 1.26s\n",
"903:\tlearn: 14871534.0201501\ttotal: 11.7s\tremaining: 1.25s\n",
"904:\tlearn: 14870231.5883229\ttotal: 11.8s\tremaining: 1.23s\n",
"905:\tlearn: 14870176.1224648\ttotal: 11.8s\tremaining: 1.22s\n",
"906:\tlearn: 14869680.0980501\ttotal: 11.8s\tremaining: 1.21s\n",
"907:\tlearn: 14869457.0180442\ttotal: 11.8s\tremaining: 1.2s\n",
"908:\tlearn: 14868933.3692698\ttotal: 11.8s\tremaining: 1.18s\n",
"909:\tlearn: 14863961.6816683\ttotal: 11.8s\tremaining: 1.17s\n",
"910:\tlearn: 14863914.5911026\ttotal: 11.8s\tremaining: 1.16s\n",
"911:\tlearn: 14855476.3946477\ttotal: 11.8s\tremaining: 1.14s\n",
"912:\tlearn: 14855166.2145584\ttotal: 11.9s\tremaining: 1.13s\n",
"913:\tlearn: 14850934.1124148\ttotal: 11.9s\tremaining: 1.12s\n",
"914:\tlearn: 14846483.1667184\ttotal: 11.9s\tremaining: 1.1s\n",
"915:\tlearn: 14837736.3329095\ttotal: 11.9s\tremaining: 1.09s\n",
"916:\tlearn: 14837595.6179847\ttotal: 11.9s\tremaining: 1.08s\n",
"917:\tlearn: 14830817.0386636\ttotal: 11.9s\tremaining: 1.06s\n",
"918:\tlearn: 14830573.0969752\ttotal: 11.9s\tremaining: 1.05s\n",
"919:\tlearn: 14830470.0025192\ttotal: 12s\tremaining: 1.04s\n",
"920:\tlearn: 14829815.6285131\ttotal: 12s\tremaining: 1.03s\n",
"921:\tlearn: 14825434.2189552\ttotal: 12s\tremaining: 1.01s\n",
"922:\tlearn: 14819445.0181126\ttotal: 12s\tremaining: 1s\n",
"923:\tlearn: 14819398.6279561\ttotal: 12s\tremaining: 988ms\n",
"924:\tlearn: 14818813.5923928\ttotal: 12s\tremaining: 975ms\n",
"925:\tlearn: 14815956.9177135\ttotal: 12s\tremaining: 962ms\n",
"926:\tlearn: 14815788.2679741\ttotal: 12s\tremaining: 948ms\n",
"927:\tlearn: 14815448.9260298\ttotal: 12.1s\tremaining: 935ms\n",
"928:\tlearn: 14815074.5372959\ttotal: 12.1s\tremaining: 923ms\n",
"929:\tlearn: 14814229.2585638\ttotal: 12.1s\tremaining: 910ms\n",
"930:\tlearn: 14804039.0241152\ttotal: 12.1s\tremaining: 897ms\n",
"931:\tlearn: 14803929.0738285\ttotal: 12.1s\tremaining: 883ms\n",
"932:\tlearn: 14803822.0317935\ttotal: 12.1s\tremaining: 870ms\n",
"933:\tlearn: 14802847.7620639\ttotal: 12.1s\tremaining: 857ms\n",
"934:\tlearn: 14802644.0143811\ttotal: 12.1s\tremaining: 844ms\n",
"935:\tlearn: 14801390.4240818\ttotal: 12.2s\tremaining: 831ms\n",
"936:\tlearn: 14801273.4225706\ttotal: 12.2s\tremaining: 818ms\n",
"937:\tlearn: 14801021.8599058\ttotal: 12.2s\tremaining: 805ms\n",
"938:\tlearn: 14800715.3029627\ttotal: 12.2s\tremaining: 792ms\n",
"939:\tlearn: 14799551.6406369\ttotal: 12.2s\tremaining: 779ms\n",
"940:\tlearn: 14795598.5613345\ttotal: 12.2s\tremaining: 766ms\n",
"941:\tlearn: 14794465.0278834\ttotal: 12.2s\tremaining: 753ms\n",
"942:\tlearn: 14794259.7563387\ttotal: 12.2s\tremaining: 740ms\n",
"943:\tlearn: 14794156.6214413\ttotal: 12.3s\tremaining: 727ms\n",
"944:\tlearn: 14792982.4344262\ttotal: 12.3s\tremaining: 714ms\n",
"945:\tlearn: 14792468.8012658\ttotal: 12.3s\tremaining: 701ms\n",
"946:\tlearn: 14792139.9923168\ttotal: 12.3s\tremaining: 688ms\n",
"947:\tlearn: 14786295.7938911\ttotal: 12.3s\tremaining: 675ms\n",
"948:\tlearn: 14783272.8762359\ttotal: 12.3s\tremaining: 662ms\n",
"949:\tlearn: 14782689.5513664\ttotal: 12.3s\tremaining: 649ms\n",
"950:\tlearn: 14782664.1266181\ttotal: 12.3s\tremaining: 636ms\n",
"951:\tlearn: 14774339.7873426\ttotal: 12.4s\tremaining: 623ms\n",
"952:\tlearn: 14769038.5403572\ttotal: 12.4s\tremaining: 610ms\n",
"953:\tlearn: 14761312.5028488\ttotal: 12.4s\tremaining: 597ms\n",
"954:\tlearn: 14760414.3496721\ttotal: 12.4s\tremaining: 584ms\n",
"955:\tlearn: 14752950.6860631\ttotal: 12.4s\tremaining: 571ms\n",
"956:\tlearn: 14752691.2767919\ttotal: 12.4s\tremaining: 558ms\n",
"957:\tlearn: 14752196.9268404\ttotal: 12.4s\tremaining: 545ms\n",
"958:\tlearn: 14750149.4071752\ttotal: 12.5s\tremaining: 532ms\n",
"959:\tlearn: 14749691.8632556\ttotal: 12.5s\tremaining: 519ms\n",
"960:\tlearn: 14749496.6164671\ttotal: 12.5s\tremaining: 507ms\n",
"961:\tlearn: 14749338.8909588\ttotal: 12.5s\tremaining: 494ms\n",
"962:\tlearn: 14749291.1143099\ttotal: 12.5s\tremaining: 481ms\n",
"963:\tlearn: 14739582.8672605\ttotal: 12.5s\tremaining: 468ms\n",
"964:\tlearn: 14739542.4442574\ttotal: 12.5s\tremaining: 455ms\n",
"965:\tlearn: 14739306.4391584\ttotal: 12.5s\tremaining: 442ms\n",
"966:\tlearn: 14708630.7344942\ttotal: 12.6s\tremaining: 429ms\n",
"967:\tlearn: 14680984.5320050\ttotal: 12.6s\tremaining: 416ms\n",
"968:\tlearn: 14677319.7390898\ttotal: 12.6s\tremaining: 403ms\n",
"969:\tlearn: 14677109.6650390\ttotal: 12.6s\tremaining: 389ms\n",
"970:\tlearn: 14676355.6242387\ttotal: 12.6s\tremaining: 376ms\n",
"971:\tlearn: 14667109.8761175\ttotal: 12.6s\tremaining: 363ms\n",
"972:\tlearn: 14643934.7657464\ttotal: 12.6s\tremaining: 350ms\n",
"973:\tlearn: 14642633.8347823\ttotal: 12.6s\tremaining: 337ms\n",
"974:\tlearn: 14642167.8071045\ttotal: 12.7s\tremaining: 324ms\n",
"975:\tlearn: 14641845.3365852\ttotal: 12.7s\tremaining: 311ms\n",
"976:\tlearn: 14640306.5144587\ttotal: 12.7s\tremaining: 298ms\n",
"977:\tlearn: 14640184.9147582\ttotal: 12.7s\tremaining: 286ms\n",
"978:\tlearn: 14640008.8483425\ttotal: 12.7s\tremaining: 273ms\n",
"979:\tlearn: 14636344.5519428\ttotal: 12.7s\tremaining: 260ms\n",
"980:\tlearn: 14633576.2663352\ttotal: 12.7s\tremaining: 247ms\n",
"981:\tlearn: 14624796.2134142\ttotal: 12.7s\tremaining: 234ms\n",
"982:\tlearn: 14624250.6774943\ttotal: 12.8s\tremaining: 221ms\n",
"983:\tlearn: 14615917.0650799\ttotal: 12.8s\tremaining: 208ms\n",
"984:\tlearn: 14615782.8388140\ttotal: 12.8s\tremaining: 195ms\n",
"985:\tlearn: 14612724.2754075\ttotal: 12.8s\tremaining: 182ms\n",
"986:\tlearn: 14609973.2837772\ttotal: 12.8s\tremaining: 169ms\n",
"987:\tlearn: 14605203.8050795\ttotal: 12.8s\tremaining: 156ms\n",
"988:\tlearn: 14605011.6874159\ttotal: 12.8s\tremaining: 143ms\n",
"989:\tlearn: 14594552.7887146\ttotal: 12.8s\tremaining: 130ms\n",
"990:\tlearn: 14591881.9316489\ttotal: 12.9s\tremaining: 117ms\n",
"991:\tlearn: 14581962.3358039\ttotal: 12.9s\tremaining: 104ms\n",
"992:\tlearn: 14581829.5331587\ttotal: 12.9s\tremaining: 90.8ms\n",
"993:\tlearn: 14581669.7347033\ttotal: 12.9s\tremaining: 77.9ms\n",
"994:\tlearn: 14577373.3119596\ttotal: 12.9s\tremaining: 64.9ms\n",
"995:\tlearn: 14577196.6327960\ttotal: 12.9s\tremaining: 51.9ms\n",
"996:\tlearn: 14577122.1536884\ttotal: 12.9s\tremaining: 38.9ms\n",
"997:\tlearn: 14574702.1336653\ttotal: 12.9s\tremaining: 25.9ms\n",
"998:\tlearn: 14574660.6060510\ttotal: 13s\tremaining: 13ms\n",
"999:\tlearn: 14574625.9856659\ttotal: 13s\tremaining: 0us\n"
]
},
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;, StandardScaler(),\n",
" [&#x27;geo_lat&#x27;, &#x27;geo_lon&#x27;,\n",
" &#x27;level&#x27;, &#x27;levels&#x27;, &#x27;rooms&#x27;,\n",
" &#x27;area&#x27;, &#x27;kitchen_area&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" OrdinalEncoder(handle_unknown=&#x27;use_encoded_value&#x27;,\n",
" unknown_value=99999999),\n",
" [&#x27;region&#x27;, &#x27;building_type&#x27;,\n",
" &#x27;object_type&#x27;])])),\n",
" (&#x27;model&#x27;,\n",
" &lt;catboost.core.CatBoostRegressor object at 0x7be8319969b0&gt;)])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;, StandardScaler(),\n",
" [&#x27;geo_lat&#x27;, &#x27;geo_lon&#x27;,\n",
" &#x27;level&#x27;, &#x27;levels&#x27;, &#x27;rooms&#x27;,\n",
" &#x27;area&#x27;, &#x27;kitchen_area&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" OrdinalEncoder(handle_unknown=&#x27;use_encoded_value&#x27;,\n",
" unknown_value=99999999),\n",
" [&#x27;region&#x27;, &#x27;building_type&#x27;,\n",
" &#x27;object_type&#x27;])])),\n",
" (&#x27;model&#x27;,\n",
" &lt;catboost.core.CatBoostRegressor object at 0x7be8319969b0&gt;)])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">preprocessor: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;, StandardScaler(),\n",
" [&#x27;geo_lat&#x27;, &#x27;geo_lon&#x27;, &#x27;level&#x27;, &#x27;levels&#x27;,\n",
" &#x27;rooms&#x27;, &#x27;area&#x27;, &#x27;kitchen_area&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" OrdinalEncoder(handle_unknown=&#x27;use_encoded_value&#x27;,\n",
" unknown_value=99999999),\n",
" [&#x27;region&#x27;, &#x27;building_type&#x27;, &#x27;object_type&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">num</label><div class=\"sk-toggleable__content\"><pre>[&#x27;geo_lat&#x27;, &#x27;geo_lon&#x27;, &#x27;level&#x27;, &#x27;levels&#x27;, &#x27;rooms&#x27;, &#x27;area&#x27;, &#x27;kitchen_area&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>[&#x27;region&#x27;, &#x27;building_type&#x27;, &#x27;object_type&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(handle_unknown=&#x27;use_encoded_value&#x27;, unknown_value=99999999)</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CatBoostRegressor</label><div class=\"sk-toggleable__content\"><pre>&lt;catboost.core.CatBoostRegressor object at 0x7be8319969b0&gt;</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num', StandardScaler(),\n",
" ['geo_lat', 'geo_lon',\n",
" 'level', 'levels', 'rooms',\n",
" 'area', 'kitchen_area']),\n",
" ('cat',\n",
" OrdinalEncoder(handle_unknown='use_encoded_value',\n",
" unknown_value=99999999),\n",
" ['region', 'building_type',\n",
" 'object_type'])])),\n",
" ('model',\n",
" <catboost.core.CatBoostRegressor object at 0x7be8319969b0>)])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"pipeline = Pipeline(steps=[('preprocessor', preprocessor), \n",
" ('model', regressor)])\n",
"\n",
"pipeline.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
" model_params = {\n",
" \"geo_lat\":56.327686,\n",
" \"geo_lon\":43.928062,\n",
" \"region\":2871.000000,\n",
" \"building_type\":1.000000,\n",
" \"level\":8.000000,\n",
" \"levels\":10.000000,\n",
" \"rooms\":2.000000,\n",
" \"area\":56.000000,\n",
" \"kitchen_area\":8.500000,\n",
" \"object_type\":1.000000,\n",
" \"floor_level\":0,\n",
" }\n",
" df_pred = pd.DataFrame(model_params, index=[0])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([3414175.89042869])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"pipeline.predict(df_pred)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'mae': 1447931.3425270966,\n",
" 'mape': 1.6294525363466488e+18,\n",
" 'mse': 281898017343454.56}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predictions = pipeline.predict(X_test) \n",
"\n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n",
"\n",
"metrics"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Работаем с MLflow локально\n",
"TRACKING_SERVER_HOST = \"127.0.0.1\"\n",
"TRACKING_SERVER_PORT = 5000\n",
"\n",
"registry_uri = f\"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}\"\n",
"tracking_uri = f\"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}\"\n",
"\n",
"mlflow.set_tracking_uri(tracking_uri) \n",
"mlflow.set_registry_uri(registry_uri) \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# название тестового эксперимента, запуска (run) внутри него, имени, под которым модель будет регистрироваться\n",
"EXPERIMENT_NAME = \"estate_project\"\n",
"RUN_NAME = \"baseline model\"\n",
"REGISTRY_MODEL_NAME = \"estate_model_rf\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Логируем вручную"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Обязательно логируем сигнатуру модели и пример входных данных. Подготовим их\n",
"from mlflow.models import infer_signature\n",
"\n",
"signature = infer_signature(model_input = X_train.head(5))\n",
"input_example = X_train.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# Будем логировать requirements и артефакт - текстовый файл\n",
"req_file = 'requirements.txt'\n",
"art = 'comment.txt'"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# Параметры, котороые будут залогированы, можем задавать вручную или полностью взять из модели\n",
"#params_dict = {'n_estimators': 10, 'max_depth': 10}\n",
"params_dict = pipeline.get_params()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Когда создаем новый эксперимент, то: \n",
"experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n",
"\n",
"# Впоследствии. чтобы добавлять запуски в этот же эксепримент мы должны получить его id:\n",
"#experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(pipeline, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact(art)\n",
" mlflow.log_params(params_dict)\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Удаление runs, experiments\n",
"\n",
"Использовать осторожно"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"#mlflow.delete_experiment(experiment_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mlflow.search_runs(\n",
" #experiment_ids=[experiment_id],\n",
" experiment_names=[EXPERIMENT_NAME],\n",
" # filter_string='status = \"FAILED\"'\n",
" #filter_string='metrics.mae > 1'\n",
" \n",
")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"\n",
"#mlflow.delete_run('74d2a7a40c07413c9cf65df841164356')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Автологирование\n",
"После включения будет срабатывать на каждом обучении модели (на методе fit()).\n",
"\n",
"Есть плюсы, есть и минусы. Предлагается сделать прогон и сравнить с результатами вручную "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mlflow.sklearn.autolog()\n",
"\n",
"with mlflow.start_run(run_name='auto', experiment_id=experiment_id) as run:\n",
" pipeline.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Отключаем автологирование\n",
"mlflow.sklearn.autolog(disable=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model #2\n",
"Обучим вторую \"маленькую\" модель\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"regressor2 = RandomForestRegressor(n_estimators=10, max_depth=6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline = Pipeline(steps=[('preprocessor', preprocessor), \n",
" ('model', regressor2)])\n",
"\n",
"pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictions = pipeline.predict(X_test) \n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n",
"\n",
"metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !!! Проверить название прогона а также все логируемые параметры и артефакты, что они соответствуют второй \"маленькой\" модели. \n",
"\n",
"\n",
"RUN_NAME = 'smaller_model'\n",
"\n",
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(pipeline, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact(art)\n",
" mlflow.log_params(pipeline.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# No model\n",
"# Логировать можно только артефакты, без модели. Например, залогироавть графики после этапа EDA\n",
"\n",
"RUN_NAME = 'no_model'\n",
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" run_id = run.info.run_id \n",
" mlflow.log_artifact(art)\n",
"\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_id = '06fa7ec1f1b74aedb3509c88dc4ee1c0' # Указываем run id\n",
"mlflow.register_model(f\"runs:/{run_id}/models\", REGISTRY_MODEL_NAME)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Можно регистрировать сразу при создании прогона\n",
"\n",
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"\n",
"with mlflow.start_run(run_name='register_at_run', experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(pipeline, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file,\n",
" registered_model_name = REGISTRY_MODEL_NAME # Указываем для какой модели регистрируем\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact(art)\n",
" mlflow.log_params(pipeline.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Можно найти зарегистрированные модели\n",
"model_reg = mlflow.search_registered_models()\n",
"model_reg[0]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"\n",
"model_name = REGISTRY_MODEL_NAME\n",
"model_version = 1\n",
"\n",
"model_loaded = mlflow.sklearn.load_model(model_uri=f\"models:/{model_name}/{model_version}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_loaded.predict(X_test.iloc[0:1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_test.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Feature engineering"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sklearn"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import QuantileTransformer, SplineTransformer, PolynomialFeatures, MinMaxScaler"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"X_train_sklearn = X_train.copy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### PolynomialFeatures\n",
"Создает полином степени `degree` из указанных признаков\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"pf = PolynomialFeatures(degree=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X_train_sklearn"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.fit_transform(X_train_sklearn[['area','kitchen_area']])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### SplineTransformer\n",
"Cоздаёт новую матрицу признаков, состоящую из сплайнов порядка degree. Количество сгенерированных сплайнов равно `n_splines=n_knots + degree - 1` для каждого признака, где\n",
"\n",
"`n_knots` определяет количество узлов (точек, в которых сопрягаются сплайны) для каждого признака. \n",
"\n",
"`degree` определяет порядок полинома, используемого для построения сплайнов. "
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"sp = SplineTransformer(n_knots=3, degree=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sp.fit_transform(X_train_sklearn[['area']])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### QuantileTransformer\n",
"Этот метод преобразует признаки, чтобы они распределялись равномерно или нормально — так данные меньше подвергаются влиянию выбросов. Преобразование применяется к каждому признаку независимо. Идея метода такова: оценить функцию распределения признака, чтобы преобразовать исходные значения в равномерное или нормальное распределение. \n",
"\n",
"`output_distribution='uniform'` или\n",
"`output_distribution='normal'` соответственно\n",
"\n",
"\n",
"Пример использования: если у вас есть данные о доходах с широким диапазоном значений, квантильное преобразование сделает их более сопоставимыми и устойчивыми к выбросам."
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"qt = QuantileTransformer()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"qt.fit_transform(X_train_sklearn[['area']])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Объединяем в ColumnTransformer и создаем Pipeline "
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"pf = PolynomialFeatures(degree=2)\n",
"qt = QuantileTransformer()\n",
"sp = SplineTransformer(n_knots=3, degree=3)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"# Значения преобразованных признаков нужно отскейлить, поэтому создаем pipeline из двух шагов - преобразование и скейлинг\n",
"pf_pipeline = Pipeline(steps=[\n",
" ('poly', pf),\n",
" ('scale', StandardScaler())\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"preprocessor_sklearn = ColumnTransformer(\n",
" transformers=[\n",
" ('num', s_scaler, num_features), # преобразования для числовых признаков\n",
" ('cat', l_encoder, cat_features), # преобразования для категориальных признаков\n",
" ('quantile', qt,num_features),\n",
" ('poly', pf_pipeline, ['area', 'kitchen_area']), # В преобразования добавляем созданный ранее pipeline\n",
" ('spline', sp, ['area'])\n",
" ],\n",
" remainder='drop',\n",
" ) # Удаляем столбцы, которые не затронуты преобразования"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Посмотрим что из себя теперь представляет датафрейм"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"## не влезаем в float64 в полиномальном преобразовании. Использовать его нужно с умом!\n",
"X_train_sklearn[['area', 'kitchen_area']] = X_train_sklearn[['area', 'kitchen_area']].astype('float128')\n",
"X_train_sklearn[['area', 'kitchen_area']] = X_train_sklearn[['area', 'kitchen_area']].astype('float128')"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"X_train_sklearn_raw = preprocessor_sklearn.fit_transform(X_train_sklearn)\n",
"X_train_sklearn = pd.DataFrame(X_train_sklearn_raw, columns=preprocessor_sklearn.get_feature_names_out())"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>num__geo_lat</th>\n",
" <th>num__geo_lon</th>\n",
" <th>num__level</th>\n",
" <th>num__levels</th>\n",
" <th>num__rooms</th>\n",
" <th>num__area</th>\n",
" <th>num__kitchen_area</th>\n",
" <th>cat__region</th>\n",
" <th>cat__building_type</th>\n",
" <th>cat__object_type</th>\n",
" <th>quantile__geo_lat</th>\n",
" <th>quantile__geo_lon</th>\n",
" <th>quantile__level</th>\n",
" <th>quantile__levels</th>\n",
" <th>quantile__rooms</th>\n",
" <th>quantile__area</th>\n",
" <th>quantile__kitchen_area</th>\n",
" <th>poly__1</th>\n",
" <th>poly__area</th>\n",
" <th>poly__kitchen_area</th>\n",
" <th>poly__area^2</th>\n",
" <th>poly__area kitchen_area</th>\n",
" <th>poly__kitchen_area^2</th>\n",
" <th>spline__area_sp_0</th>\n",
" <th>spline__area_sp_1</th>\n",
" <th>spline__area_sp_2</th>\n",
" <th>spline__area_sp_3</th>\n",
" <th>spline__area_sp_4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.495902</td>\n",
" <td>-0.449742</td>\n",
" <td>0.359235</td>\n",
" <td>-0.214789</td>\n",
" <td>0.253413</td>\n",
" <td>0.063735</td>\n",
" <td>-0.186285</td>\n",
" <td>20.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.766257</td>\n",
" <td>0.511028</td>\n",
" <td>0.717217</td>\n",
" <td>0.536537</td>\n",
" <td>0.600601</td>\n",
" <td>0.623624</td>\n",
" <td>0.374875</td>\n",
" <td>0.0</td>\n",
" <td>0.063735</td>\n",
" <td>-0.186285</td>\n",
" <td>-0.010002</td>\n",
" <td>-0.132188</td>\n",
" <td>-0.002792</td>\n",
" <td>0.155806</td>\n",
" <td>0.666179</td>\n",
" <td>0.178013</td>\n",
" <td>0.000002</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.177806</td>\n",
" <td>1.433673</td>\n",
" <td>-0.246529</td>\n",
" <td>-0.367718</td>\n",
" <td>0.253413</td>\n",
" <td>-0.114293</td>\n",
" <td>-0.186285</td>\n",
" <td>70.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.297142</td>\n",
" <td>0.867999</td>\n",
" <td>0.522022</td>\n",
" <td>0.386887</td>\n",
" <td>0.600601</td>\n",
" <td>0.541542</td>\n",
" <td>0.374875</td>\n",
" <td>0.0</td>\n",
" <td>-0.114293</td>\n",
" <td>-0.186285</td>\n",
" <td>-0.017375</td>\n",
" <td>-0.169370</td>\n",
" <td>-0.002792</td>\n",
" <td>0.156921</td>\n",
" <td>0.666275</td>\n",
" <td>0.176803</td>\n",
" <td>0.000001</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410773</th>\n",
" <td>-0.748366</td>\n",
" <td>-0.804077</td>\n",
" <td>-0.650371</td>\n",
" <td>0.702788</td>\n",
" <td>0.253413</td>\n",
" <td>1.365441</td>\n",
" <td>1.501833</td>\n",
" <td>52.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.193143</td>\n",
" <td>0.114753</td>\n",
" <td>0.309810</td>\n",
" <td>0.741742</td>\n",
" <td>0.600601</td>\n",
" <td>0.961367</td>\n",
" <td>0.984535</td>\n",
" <td>0.0</td>\n",
" <td>1.365441</td>\n",
" <td>1.501833</td>\n",
" <td>0.068438</td>\n",
" <td>1.570163</td>\n",
" <td>0.008616</td>\n",
" <td>0.147820</td>\n",
" <td>0.665159</td>\n",
" <td>0.187011</td>\n",
" <td>0.000010</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410774</th>\n",
" <td>1.257769</td>\n",
" <td>-1.101815</td>\n",
" <td>-0.044608</td>\n",
" <td>0.091070</td>\n",
" <td>1.175911</td>\n",
" <td>0.553789</td>\n",
" <td>-0.142544</td>\n",
" <td>14.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.908036</td>\n",
" <td>0.075725</td>\n",
" <td>0.604605</td>\n",
" <td>0.645646</td>\n",
" <td>0.867367</td>\n",
" <td>0.841842</td>\n",
" <td>0.436436</td>\n",
" <td>0.0</td>\n",
" <td>0.553789</td>\n",
" <td>-0.142544</td>\n",
" <td>0.014463</td>\n",
" <td>-0.002742</td>\n",
" <td>-0.002649</td>\n",
" <td>0.152767</td>\n",
" <td>0.665860</td>\n",
" <td>0.181370</td>\n",
" <td>0.000004</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410775 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" num__geo_lat num__geo_lon num__level num__levels num__rooms \\\n",
"0 0.495902 -0.449742 0.359235 -0.214789 0.253413 \n",
"1 0.177806 1.433673 -0.246529 -0.367718 0.253413 \n",
"... ... ... ... ... ... \n",
"410773 -0.748366 -0.804077 -0.650371 0.702788 0.253413 \n",
"410774 1.257769 -1.101815 -0.044608 0.091070 1.175911 \n",
"\n",
" num__area num__kitchen_area cat__region cat__building_type \\\n",
"0 0.063735 -0.186285 20.0 1.0 \n",
"1 -0.114293 -0.186285 70.0 1.0 \n",
"... ... ... ... ... \n",
"410773 1.365441 1.501833 52.0 3.0 \n",
"410774 0.553789 -0.142544 14.0 1.0 \n",
"\n",
" cat__object_type quantile__geo_lat quantile__geo_lon \\\n",
"0 0.0 0.766257 0.511028 \n",
"1 0.0 0.297142 0.867999 \n",
"... ... ... ... \n",
"410773 0.0 0.193143 0.114753 \n",
"410774 0.0 0.908036 0.075725 \n",
"\n",
" quantile__level quantile__levels quantile__rooms quantile__area \\\n",
"0 0.717217 0.536537 0.600601 0.623624 \n",
"1 0.522022 0.386887 0.600601 0.541542 \n",
"... ... ... ... ... \n",
"410773 0.309810 0.741742 0.600601 0.961367 \n",
"410774 0.604605 0.645646 0.867367 0.841842 \n",
"\n",
" quantile__kitchen_area poly__1 poly__area poly__kitchen_area \\\n",
"0 0.374875 0.0 0.063735 -0.186285 \n",
"1 0.374875 0.0 -0.114293 -0.186285 \n",
"... ... ... ... ... \n",
"410773 0.984535 0.0 1.365441 1.501833 \n",
"410774 0.436436 0.0 0.553789 -0.142544 \n",
"\n",
" poly__area^2 poly__area kitchen_area poly__kitchen_area^2 \\\n",
"0 -0.010002 -0.132188 -0.002792 \n",
"1 -0.017375 -0.169370 -0.002792 \n",
"... ... ... ... \n",
"410773 0.068438 1.570163 0.008616 \n",
"410774 0.014463 -0.002742 -0.002649 \n",
"\n",
" spline__area_sp_0 spline__area_sp_1 spline__area_sp_2 \\\n",
"0 0.155806 0.666179 0.178013 \n",
"1 0.156921 0.666275 0.176803 \n",
"... ... ... ... \n",
"410773 0.147820 0.665159 0.187011 \n",
"410774 0.152767 0.665860 0.181370 \n",
"\n",
" spline__area_sp_3 spline__area_sp_4 \n",
"0 0.000002 0.0 \n",
"1 0.000001 0.0 \n",
"... ... ... \n",
"410773 0.000010 0.0 \n",
"410774 0.000004 0.0 \n",
"\n",
"[410775 rows x 28 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Удобно использовать для отображения всех строк\\столбцов в DataFrame\n",
"with pd.option_context('display.max_rows', 5, 'display.max_columns', None):\n",
" display (X_train_sklearn)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Создаем пайплайн с препроцессингом и моделью"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline_sklearn = Pipeline(steps=[\n",
" ('transform', preprocessor_sklearn),\n",
" ('model', regressor)\n",
"])\n",
"\n",
"model_sklearn = pipeline_sklearn.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_sklearn"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictions = model_sklearn.predict(X_test) \n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n",
"\n",
"metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"RUN_NAME = 'fe_sklearn'\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(model_sklearn, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact(art)\n",
" mlflow.log_params(model_sklearn.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Autofeat"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"from autofeat import AutoFeatRegressor\n",
"transformations = [\"1/\", \"exp\", \"log\", \"abs\", \"sqrt\", \"^2\", \"^3\", \"1+\", \"1-\", \"sin\", \"cos\", \"exp-\", \"2^\"] "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"afreg = AutoFeatRegressor(verbose=1, feateng_steps=2, max_gb=8, transformations=[\"log\", \"sqrt\"],feateng_cols=num_features)\n",
"X_train_arf = afreg.fit_transform(X_train,y_train)\n",
"X_train_arf"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"# Создаем обертку, в которой добавляем метод get_feature_names_out() для получения названий признаков\n",
"import numpy as np\n",
"\n",
"class AutoFeatWrapper():\n",
" def __init__(self, feateng_cols, feateng_steps=1, max_gb=16, transformations=[\"1/\", \"exp\", \"log\"], n_jobs=-1, verbose=1):\n",
" self.feateng_cols = feateng_cols\n",
" self.feateng_steps = feateng_steps\n",
" self.max_gb = max_gb\n",
" self.transformations = transformations\n",
" self.n_jobs = n_jobs\n",
" self.afreg = AutoFeatRegressor(feateng_cols=self.feateng_cols,\n",
" feateng_steps=self.feateng_steps,\n",
" max_gb=self.max_gb,\n",
" transformations=self.transformations,\n",
" n_jobs=self.n_jobs)\n",
" \n",
" def fit(self, X, y=None):\n",
" self.afreg.fit(X, y)\n",
" return self\n",
" \n",
" def transform(self, X):\n",
" return self.afreg.transform(X)\n",
" \n",
" def get_feature_names_out(self, input_features=None):\n",
" # Преобразуем данные и возвращаем имена фичей из DataFrame\n",
" transformed_X = self.afreg.transform(pd.DataFrame(np.zeros((1, len(self.feateng_cols))), columns=self.feateng_cols))\n",
" return transformed_X.columns.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"afreg_pipeline = Pipeline(steps=[\n",
" ('autofeat', AutoFeatWrapper( feateng_steps=2, max_gb=16, transformations=[\"log\", \"sqrt\"],feateng_cols=num_features)),\n",
" ('scaler', StandardScaler()),\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"preprocessor_afr = ColumnTransformer(\n",
" transformers=[\n",
" ('num', s_scaler, num_features), # преобразования для числовых признаков\n",
" ('cat', l_encoder, cat_features), # преобразования для категориальных признаков\n",
" ('afr', afreg_pipeline, num_features), # преобразования autofeat\n",
" ],\n",
" remainder='drop', # Удаляем столбцы, которые не затронуты преобразованиями\n",
" ) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X_train_afr_raw = preprocessor_afr.fit_transform(X_train,y_train)\n",
"X_train_afr = pd.DataFrame(X_train_afr_raw, columns=preprocessor_afr.get_feature_names_out())"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>num__geo_lat</th>\n",
" <th>num__geo_lon</th>\n",
" <th>num__level</th>\n",
" <th>num__levels</th>\n",
" <th>num__rooms</th>\n",
" <th>num__area</th>\n",
" <th>num__kitchen_area</th>\n",
" <th>cat__region</th>\n",
" <th>cat__building_type</th>\n",
" <th>cat__object_type</th>\n",
" <th>afr__geo_lat</th>\n",
" <th>afr__geo_lon</th>\n",
" <th>afr__level</th>\n",
" <th>afr__levels</th>\n",
" <th>afr__rooms</th>\n",
" <th>afr__area</th>\n",
" <th>afr__kitchen_area</th>\n",
" <th>afr__area*rooms</th>\n",
" <th>afr__area*geo_lon</th>\n",
" <th>afr__levels*rooms</th>\n",
" <th>afr__area*kitchen_area</th>\n",
" <th>afr__sqrt(area)*geo_lat</th>\n",
" <th>afr__sqrt(area)*log(level)</th>\n",
" <th>afr__kitchen_area*log(level)</th>\n",
" <th>afr__sqrt(area)*kitchen_area</th>\n",
" <th>afr__geo_lon*log(kitchen_area)</th>\n",
" <th>afr__sqrt(area)*sqrt(kitchen_area)</th>\n",
" <th>afr__sqrt(geo_lon)*sqrt(kitchen_area)</th>\n",
" <th>afr__log(area)</th>\n",
" <th>afr__rooms*log(level)</th>\n",
" <th>afr__kitchen_area*rooms</th>\n",
" <th>afr__kitchen_area*levels</th>\n",
" <th>afr__sqrt(geo_lon)*sqrt(level)</th>\n",
" <th>afr__area**(3/2)</th>\n",
" <th>afr__geo_lat*log(kitchen_area)</th>\n",
" <th>afr__geo_lat*log(geo_lon)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.495902</td>\n",
" <td>-0.449742</td>\n",
" <td>0.359235</td>\n",
" <td>-0.214789</td>\n",
" <td>0.253413</td>\n",
" <td>0.063735</td>\n",
" <td>-0.186285</td>\n",
" <td>20.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.495902</td>\n",
" <td>-0.449742</td>\n",
" <td>0.359235</td>\n",
" <td>-0.214789</td>\n",
" <td>0.253413</td>\n",
" <td>0.063735</td>\n",
" <td>-0.186285</td>\n",
" <td>0.006208</td>\n",
" <td>-0.195129</td>\n",
" <td>0.060916</td>\n",
" <td>-0.132188</td>\n",
" <td>0.373151</td>\n",
" <td>0.688076</td>\n",
" <td>0.044178</td>\n",
" <td>-0.211335</td>\n",
" <td>-0.481294</td>\n",
" <td>-0.153548</td>\n",
" <td>-0.490805</td>\n",
" <td>0.307835</td>\n",
" <td>0.690329</td>\n",
" <td>-0.132529</td>\n",
" <td>-0.352834</td>\n",
" <td>0.323880</td>\n",
" <td>-0.008748</td>\n",
" <td>-0.031529</td>\n",
" <td>0.068167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.177806</td>\n",
" <td>1.433673</td>\n",
" <td>-0.246529</td>\n",
" <td>-0.367718</td>\n",
" <td>0.253413</td>\n",
" <td>-0.114293</td>\n",
" <td>-0.186285</td>\n",
" <td>70.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.177806</td>\n",
" <td>1.433673</td>\n",
" <td>-0.246529</td>\n",
" <td>-0.367718</td>\n",
" <td>0.253413</td>\n",
" <td>-0.114293</td>\n",
" <td>-0.186285</td>\n",
" <td>-0.083402</td>\n",
" <td>0.655053</td>\n",
" <td>-0.054279</td>\n",
" <td>-0.169370</td>\n",
" <td>0.005114</td>\n",
" <td>0.071369</td>\n",
" <td>-0.173647</td>\n",
" <td>-0.252775</td>\n",
" <td>1.191304</td>\n",
" <td>-0.267268</td>\n",
" <td>0.615798</td>\n",
" <td>0.031907</td>\n",
" <td>0.282625</td>\n",
" <td>-0.132529</td>\n",
" <td>-0.418643</td>\n",
" <td>0.552794</td>\n",
" <td>-0.056540</td>\n",
" <td>-0.143829</td>\n",
" <td>1.129118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410773</th>\n",
" <td>-0.748366</td>\n",
" <td>-0.804077</td>\n",
" <td>-0.650371</td>\n",
" <td>0.702788</td>\n",
" <td>0.253413</td>\n",
" <td>1.365441</td>\n",
" <td>1.501833</td>\n",
" <td>52.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.748366</td>\n",
" <td>-0.804077</td>\n",
" <td>-0.650371</td>\n",
" <td>0.702788</td>\n",
" <td>0.253413</td>\n",
" <td>1.365441</td>\n",
" <td>1.501833</td>\n",
" <td>0.661427</td>\n",
" <td>0.375199</td>\n",
" <td>0.752088</td>\n",
" <td>1.570163</td>\n",
" <td>1.274445</td>\n",
" <td>-0.002521</td>\n",
" <td>0.745507</td>\n",
" <td>2.382258</td>\n",
" <td>0.071599</td>\n",
" <td>2.828890</td>\n",
" <td>1.431272</td>\n",
" <td>1.729715</td>\n",
" <td>-0.160491</td>\n",
" <td>1.581436</td>\n",
" <td>2.432437</td>\n",
" <td>-0.843150</td>\n",
" <td>0.411475</td>\n",
" <td>1.671069</td>\n",
" <td>-1.052343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410774</th>\n",
" <td>1.257769</td>\n",
" <td>-1.101815</td>\n",
" <td>-0.044608</td>\n",
" <td>0.091070</td>\n",
" <td>1.175911</td>\n",
" <td>0.553789</td>\n",
" <td>-0.142544</td>\n",
" <td>14.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.257769</td>\n",
" <td>-1.101815</td>\n",
" <td>-0.044608</td>\n",
" <td>0.091070</td>\n",
" <td>1.175911</td>\n",
" <td>0.553789</td>\n",
" <td>-0.142544</td>\n",
" <td>0.807887</td>\n",
" <td>-0.330070</td>\n",
" <td>0.982478</td>\n",
" <td>-0.002742</td>\n",
" <td>1.338996</td>\n",
" <td>0.635065</td>\n",
" <td>-0.040302</td>\n",
" <td>-0.055435</td>\n",
" <td>-1.025588</td>\n",
" <td>0.202136</td>\n",
" <td>-0.916054</td>\n",
" <td>0.940624</td>\n",
" <td>1.217910</td>\n",
" <td>0.311575</td>\n",
" <td>-0.174762</td>\n",
" <td>-0.415359</td>\n",
" <td>0.135617</td>\n",
" <td>0.359680</td>\n",
" <td>-0.246790</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410775 rows × 36 columns</p>\n",
"</div>"
],
"text/plain": [
" num__geo_lat num__geo_lon num__level num__levels num__rooms \\\n",
"0 0.495902 -0.449742 0.359235 -0.214789 0.253413 \n",
"1 0.177806 1.433673 -0.246529 -0.367718 0.253413 \n",
"... ... ... ... ... ... \n",
"410773 -0.748366 -0.804077 -0.650371 0.702788 0.253413 \n",
"410774 1.257769 -1.101815 -0.044608 0.091070 1.175911 \n",
"\n",
" num__area num__kitchen_area cat__region cat__building_type \\\n",
"0 0.063735 -0.186285 20.0 1.0 \n",
"1 -0.114293 -0.186285 70.0 1.0 \n",
"... ... ... ... ... \n",
"410773 1.365441 1.501833 52.0 3.0 \n",
"410774 0.553789 -0.142544 14.0 1.0 \n",
"\n",
" cat__object_type afr__geo_lat afr__geo_lon afr__level afr__levels \\\n",
"0 0.0 0.495902 -0.449742 0.359235 -0.214789 \n",
"1 0.0 0.177806 1.433673 -0.246529 -0.367718 \n",
"... ... ... ... ... ... \n",
"410773 0.0 -0.748366 -0.804077 -0.650371 0.702788 \n",
"410774 0.0 1.257769 -1.101815 -0.044608 0.091070 \n",
"\n",
" afr__rooms afr__area afr__kitchen_area afr__area*rooms \\\n",
"0 0.253413 0.063735 -0.186285 0.006208 \n",
"1 0.253413 -0.114293 -0.186285 -0.083402 \n",
"... ... ... ... ... \n",
"410773 0.253413 1.365441 1.501833 0.661427 \n",
"410774 1.175911 0.553789 -0.142544 0.807887 \n",
"\n",
" afr__area*geo_lon afr__levels*rooms afr__area*kitchen_area \\\n",
"0 -0.195129 0.060916 -0.132188 \n",
"1 0.655053 -0.054279 -0.169370 \n",
"... ... ... ... \n",
"410773 0.375199 0.752088 1.570163 \n",
"410774 -0.330070 0.982478 -0.002742 \n",
"\n",
" afr__sqrt(area)*geo_lat afr__sqrt(area)*log(level) \\\n",
"0 0.373151 0.688076 \n",
"1 0.005114 0.071369 \n",
"... ... ... \n",
"410773 1.274445 -0.002521 \n",
"410774 1.338996 0.635065 \n",
"\n",
" afr__kitchen_area*log(level) afr__sqrt(area)*kitchen_area \\\n",
"0 0.044178 -0.211335 \n",
"1 -0.173647 -0.252775 \n",
"... ... ... \n",
"410773 0.745507 2.382258 \n",
"410774 -0.040302 -0.055435 \n",
"\n",
" afr__geo_lon*log(kitchen_area) afr__sqrt(area)*sqrt(kitchen_area) \\\n",
"0 -0.481294 -0.153548 \n",
"1 1.191304 -0.267268 \n",
"... ... ... \n",
"410773 0.071599 2.828890 \n",
"410774 -1.025588 0.202136 \n",
"\n",
" afr__sqrt(geo_lon)*sqrt(kitchen_area) afr__log(area) \\\n",
"0 -0.490805 0.307835 \n",
"1 0.615798 0.031907 \n",
"... ... ... \n",
"410773 1.431272 1.729715 \n",
"410774 -0.916054 0.940624 \n",
"\n",
" afr__rooms*log(level) afr__kitchen_area*rooms \\\n",
"0 0.690329 -0.132529 \n",
"1 0.282625 -0.132529 \n",
"... ... ... \n",
"410773 -0.160491 1.581436 \n",
"410774 1.217910 0.311575 \n",
"\n",
" afr__kitchen_area*levels afr__sqrt(geo_lon)*sqrt(level) \\\n",
"0 -0.352834 0.323880 \n",
"1 -0.418643 0.552794 \n",
"... ... ... \n",
"410773 2.432437 -0.843150 \n",
"410774 -0.174762 -0.415359 \n",
"\n",
" afr__area**(3/2) afr__geo_lat*log(kitchen_area) \\\n",
"0 -0.008748 -0.031529 \n",
"1 -0.056540 -0.143829 \n",
"... ... ... \n",
"410773 0.411475 1.671069 \n",
"410774 0.135617 0.359680 \n",
"\n",
" afr__geo_lat*log(geo_lon) \n",
"0 0.068167 \n",
"1 1.129118 \n",
"... ... \n",
"410773 -1.052343 \n",
"410774 -0.246790 \n",
"\n",
"[410775 rows x 36 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"with pd.option_context('display.max_rows', 5, 'display.max_columns', None):\n",
" display (X_train_afr)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline_afr = Pipeline(steps=[('preprocessor', preprocessor_afr), \n",
" ('model', regressor)])\n",
"\n",
"pipeline_afr.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictions = pipeline_afr.predict(X_test) \n",
"\n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions)\n",
"\n",
"metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"\n",
"with mlflow.start_run(run_name='autofeat', experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(pipeline_afr, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact(art)\n",
" mlflow.log_params(pipeline_afr.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# FEATURE SELECTION\n",
"## RFE\n",
"### Используем autofeat признаки\n",
"Поскольку autofeat дает разные совокупности сгенерированных признаков, мы можем добавить выбор информативных только как шаг пайплайна "
]
},
{
"cell_type": "code",
"execution_count": 294,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>num__geo_lat</th>\n",
" <th>num__geo_lon</th>\n",
" <th>num__level</th>\n",
" <th>num__levels</th>\n",
" <th>num__rooms</th>\n",
" <th>num__area</th>\n",
" <th>num__kitchen_area</th>\n",
" <th>cat__region</th>\n",
" <th>cat__building_type</th>\n",
" <th>cat__object_type</th>\n",
" <th>...</th>\n",
" <th>afr__sqrt(area)*sqrt(kitchen_area)</th>\n",
" <th>afr__sqrt(geo_lon)*sqrt(kitchen_area)</th>\n",
" <th>afr__log(area)</th>\n",
" <th>afr__rooms*log(level)</th>\n",
" <th>afr__kitchen_area*rooms</th>\n",
" <th>afr__kitchen_area*levels</th>\n",
" <th>afr__sqrt(geo_lon)*sqrt(level)</th>\n",
" <th>afr__area**(3/2)</th>\n",
" <th>afr__geo_lat*log(kitchen_area)</th>\n",
" <th>afr__geo_lat*log(geo_lon)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.495902</td>\n",
" <td>-0.449742</td>\n",
" <td>0.359235</td>\n",
" <td>-0.214789</td>\n",
" <td>0.253413</td>\n",
" <td>0.063735</td>\n",
" <td>-0.186285</td>\n",
" <td>20.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>-0.153548</td>\n",
" <td>-0.490805</td>\n",
" <td>0.307835</td>\n",
" <td>0.690329</td>\n",
" <td>-0.132529</td>\n",
" <td>-0.352834</td>\n",
" <td>0.323880</td>\n",
" <td>-0.008748</td>\n",
" <td>-0.031529</td>\n",
" <td>0.068167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.177806</td>\n",
" <td>1.433673</td>\n",
" <td>-0.246529</td>\n",
" <td>-0.367718</td>\n",
" <td>0.253413</td>\n",
" <td>-0.114293</td>\n",
" <td>-0.186285</td>\n",
" <td>70.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>-0.267268</td>\n",
" <td>0.615798</td>\n",
" <td>0.031907</td>\n",
" <td>0.282625</td>\n",
" <td>-0.132529</td>\n",
" <td>-0.418643</td>\n",
" <td>0.552794</td>\n",
" <td>-0.056540</td>\n",
" <td>-0.143829</td>\n",
" <td>1.129118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.440548</td>\n",
" <td>0.047222</td>\n",
" <td>-0.448450</td>\n",
" <td>-0.367718</td>\n",
" <td>-0.669085</td>\n",
" <td>-0.456947</td>\n",
" <td>-0.142544</td>\n",
" <td>15.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>-0.454880</td>\n",
" <td>-0.067183</td>\n",
" <td>-0.603122</td>\n",
" <td>-0.512211</td>\n",
" <td>-0.487813</td>\n",
" <td>-0.383803</td>\n",
" <td>-0.243092</td>\n",
" <td>-0.140800</td>\n",
" <td>0.063464</td>\n",
" <td>0.460495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-1.588818</td>\n",
" <td>-0.722477</td>\n",
" <td>-0.246529</td>\n",
" <td>-0.979436</td>\n",
" <td>0.253413</td>\n",
" <td>-0.181292</td>\n",
" <td>-0.142544</td>\n",
" <td>18.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>-0.254514</td>\n",
" <td>-0.607607</td>\n",
" <td>-0.080304</td>\n",
" <td>0.282625</td>\n",
" <td>-0.088119</td>\n",
" <td>-0.662523</td>\n",
" <td>-0.369355</td>\n",
" <td>-0.073838</td>\n",
" <td>-0.672113</td>\n",
" <td>-1.481033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.493662</td>\n",
" <td>1.125819</td>\n",
" <td>0.157313</td>\n",
" <td>0.549858</td>\n",
" <td>0.253413</td>\n",
" <td>0.615045</td>\n",
" <td>-0.011322</td>\n",
" <td>10.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.438600</td>\n",
" <td>0.891383</td>\n",
" <td>1.009612</td>\n",
" <td>0.574497</td>\n",
" <td>0.045112</td>\n",
" <td>0.208478</td>\n",
" <td>0.945981</td>\n",
" <td>0.154902</td>\n",
" <td>0.780855</td>\n",
" <td>1.923382</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410770</th>\n",
" <td>0.592011</td>\n",
" <td>0.355014</td>\n",
" <td>0.561156</td>\n",
" <td>1.008646</td>\n",
" <td>0.253413</td>\n",
" <td>-0.079836</td>\n",
" <td>-0.092653</td>\n",
" <td>54.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>-0.120035</td>\n",
" <td>0.237580</td>\n",
" <td>0.087725</td>\n",
" <td>0.792500</td>\n",
" <td>-0.037463</td>\n",
" <td>0.322797</td>\n",
" <td>0.974381</td>\n",
" <td>-0.047496</td>\n",
" <td>0.243018</td>\n",
" <td>0.789871</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410771</th>\n",
" <td>0.240478</td>\n",
" <td>0.392697</td>\n",
" <td>-0.650371</td>\n",
" <td>-0.979436</td>\n",
" <td>0.253413</td>\n",
" <td>-0.334434</td>\n",
" <td>-0.404989</td>\n",
" <td>45.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>-0.716150</td>\n",
" <td>-0.510766</td>\n",
" <td>-0.357277</td>\n",
" <td>-0.160491</td>\n",
" <td>-0.354582</td>\n",
" <td>-0.778657</td>\n",
" <td>-0.406361</td>\n",
" <td>-0.111897</td>\n",
" <td>-0.808157</td>\n",
" <td>0.574534</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410772</th>\n",
" <td>-1.936771</td>\n",
" <td>-0.688830</td>\n",
" <td>0.359235</td>\n",
" <td>0.855717</td>\n",
" <td>-0.669085</td>\n",
" <td>-0.456947</td>\n",
" <td>-0.142544</td>\n",
" <td>18.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>-0.454880</td>\n",
" <td>-0.581851</td>\n",
" <td>-0.603122</td>\n",
" <td>-0.211576</td>\n",
" <td>-0.487813</td>\n",
" <td>0.173638</td>\n",
" <td>0.170166</td>\n",
" <td>-0.140800</td>\n",
" <td>-0.798234</td>\n",
" <td>-1.663294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410773</th>\n",
" <td>-0.748366</td>\n",
" <td>-0.804077</td>\n",
" <td>-0.650371</td>\n",
" <td>0.702788</td>\n",
" <td>0.253413</td>\n",
" <td>1.365441</td>\n",
" <td>1.501833</td>\n",
" <td>52.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>2.828890</td>\n",
" <td>1.431272</td>\n",
" <td>1.729715</td>\n",
" <td>-0.160491</td>\n",
" <td>1.581436</td>\n",
" <td>2.432437</td>\n",
" <td>-0.843150</td>\n",
" <td>0.411475</td>\n",
" <td>1.671069</td>\n",
" <td>-1.052343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410774</th>\n",
" <td>1.257769</td>\n",
" <td>-1.101815</td>\n",
" <td>-0.044608</td>\n",
" <td>0.091070</td>\n",
" <td>1.175911</td>\n",
" <td>0.553789</td>\n",
" <td>-0.142544</td>\n",
" <td>14.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.202136</td>\n",
" <td>-0.916054</td>\n",
" <td>0.940624</td>\n",
" <td>1.217910</td>\n",
" <td>0.311575</td>\n",
" <td>-0.174762</td>\n",
" <td>-0.415359</td>\n",
" <td>0.135617</td>\n",
" <td>0.359680</td>\n",
" <td>-0.246790</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410775 rows × 36 columns</p>\n",
"</div>"
],
"text/plain": [
" num__geo_lat num__geo_lon num__level num__levels num__rooms \\\n",
"0 0.495902 -0.449742 0.359235 -0.214789 0.253413 \n",
"1 0.177806 1.433673 -0.246529 -0.367718 0.253413 \n",
"2 0.440548 0.047222 -0.448450 -0.367718 -0.669085 \n",
"3 -1.588818 -0.722477 -0.246529 -0.979436 0.253413 \n",
"4 1.493662 1.125819 0.157313 0.549858 0.253413 \n",
"... ... ... ... ... ... \n",
"410770 0.592011 0.355014 0.561156 1.008646 0.253413 \n",
"410771 0.240478 0.392697 -0.650371 -0.979436 0.253413 \n",
"410772 -1.936771 -0.688830 0.359235 0.855717 -0.669085 \n",
"410773 -0.748366 -0.804077 -0.650371 0.702788 0.253413 \n",
"410774 1.257769 -1.101815 -0.044608 0.091070 1.175911 \n",
"\n",
" num__area num__kitchen_area cat__region cat__building_type \\\n",
"0 0.063735 -0.186285 20.0 1.0 \n",
"1 -0.114293 -0.186285 70.0 1.0 \n",
"2 -0.456947 -0.142544 15.0 3.0 \n",
"3 -0.181292 -0.142544 18.0 1.0 \n",
"4 0.615045 -0.011322 10.0 2.0 \n",
"... ... ... ... ... \n",
"410770 -0.079836 -0.092653 54.0 2.0 \n",
"410771 -0.334434 -0.404989 45.0 3.0 \n",
"410772 -0.456947 -0.142544 18.0 0.0 \n",
"410773 1.365441 1.501833 52.0 3.0 \n",
"410774 0.553789 -0.142544 14.0 1.0 \n",
"\n",
" cat__object_type ... afr__sqrt(area)*sqrt(kitchen_area) \\\n",
"0 0.0 ... -0.153548 \n",
"1 0.0 ... -0.267268 \n",
"2 1.0 ... -0.454880 \n",
"3 0.0 ... -0.254514 \n",
"4 0.0 ... 0.438600 \n",
"... ... ... ... \n",
"410770 0.0 ... -0.120035 \n",
"410771 0.0 ... -0.716150 \n",
"410772 1.0 ... -0.454880 \n",
"410773 0.0 ... 2.828890 \n",
"410774 0.0 ... 0.202136 \n",
"\n",
" afr__sqrt(geo_lon)*sqrt(kitchen_area) afr__log(area) \\\n",
"0 -0.490805 0.307835 \n",
"1 0.615798 0.031907 \n",
"2 -0.067183 -0.603122 \n",
"3 -0.607607 -0.080304 \n",
"4 0.891383 1.009612 \n",
"... ... ... \n",
"410770 0.237580 0.087725 \n",
"410771 -0.510766 -0.357277 \n",
"410772 -0.581851 -0.603122 \n",
"410773 1.431272 1.729715 \n",
"410774 -0.916054 0.940624 \n",
"\n",
" afr__rooms*log(level) afr__kitchen_area*rooms \\\n",
"0 0.690329 -0.132529 \n",
"1 0.282625 -0.132529 \n",
"2 -0.512211 -0.487813 \n",
"3 0.282625 -0.088119 \n",
"4 0.574497 0.045112 \n",
"... ... ... \n",
"410770 0.792500 -0.037463 \n",
"410771 -0.160491 -0.354582 \n",
"410772 -0.211576 -0.487813 \n",
"410773 -0.160491 1.581436 \n",
"410774 1.217910 0.311575 \n",
"\n",
" afr__kitchen_area*levels afr__sqrt(geo_lon)*sqrt(level) \\\n",
"0 -0.352834 0.323880 \n",
"1 -0.418643 0.552794 \n",
"2 -0.383803 -0.243092 \n",
"3 -0.662523 -0.369355 \n",
"4 0.208478 0.945981 \n",
"... ... ... \n",
"410770 0.322797 0.974381 \n",
"410771 -0.778657 -0.406361 \n",
"410772 0.173638 0.170166 \n",
"410773 2.432437 -0.843150 \n",
"410774 -0.174762 -0.415359 \n",
"\n",
" afr__area**(3/2) afr__geo_lat*log(kitchen_area) \\\n",
"0 -0.008748 -0.031529 \n",
"1 -0.056540 -0.143829 \n",
"2 -0.140800 0.063464 \n",
"3 -0.073838 -0.672113 \n",
"4 0.154902 0.780855 \n",
"... ... ... \n",
"410770 -0.047496 0.243018 \n",
"410771 -0.111897 -0.808157 \n",
"410772 -0.140800 -0.798234 \n",
"410773 0.411475 1.671069 \n",
"410774 0.135617 0.359680 \n",
"\n",
" afr__geo_lat*log(geo_lon) \n",
"0 0.068167 \n",
"1 1.129118 \n",
"2 0.460495 \n",
"3 -1.481033 \n",
"4 1.923382 \n",
"... ... \n",
"410770 0.789871 \n",
"410771 0.574534 \n",
"410772 -1.663294 \n",
"410773 -1.052343 \n",
"410774 -0.246790 \n",
"\n",
"[410775 rows x 36 columns]"
]
},
"execution_count": 294,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.feature_selection import RFE\n",
"X_train_afr"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"rfe_selector = RFE(estimator=regressor, n_features_to_select=12, step = 0.2) #drop 20% of features each iteration\n",
"X_train_rfe = rfe_selector.fit_transform(X_train_afr,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 297,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>num__geo_lat</th>\n",
" <th>num__geo_lon</th>\n",
" <th>afr__geo_lon</th>\n",
" <th>afr__area*kitchen_area</th>\n",
" <th>afr__sqrt(area)*geo_lat</th>\n",
" <th>afr__sqrt(area)*log(level)</th>\n",
" <th>afr__kitchen_area*log(level)</th>\n",
" <th>afr__sqrt(area)*sqrt(kitchen_area)</th>\n",
" <th>afr__rooms*log(level)</th>\n",
" <th>afr__kitchen_area*rooms</th>\n",
" <th>afr__sqrt(geo_lon)*sqrt(level)</th>\n",
" <th>afr__geo_lat*log(geo_lon)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.495902</td>\n",
" <td>-0.449742</td>\n",
" <td>-0.449742</td>\n",
" <td>-0.132188</td>\n",
" <td>0.373151</td>\n",
" <td>0.688076</td>\n",
" <td>0.044178</td>\n",
" <td>-0.153548</td>\n",
" <td>0.690329</td>\n",
" <td>-0.132529</td>\n",
" <td>0.323880</td>\n",
" <td>0.068167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.177806</td>\n",
" <td>1.433673</td>\n",
" <td>1.433673</td>\n",
" <td>-0.169370</td>\n",
" <td>0.005114</td>\n",
" <td>0.071369</td>\n",
" <td>-0.173647</td>\n",
" <td>-0.267268</td>\n",
" <td>0.282625</td>\n",
" <td>-0.132529</td>\n",
" <td>0.552794</td>\n",
" <td>1.129118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.440548</td>\n",
" <td>0.047222</td>\n",
" <td>0.047222</td>\n",
" <td>-0.226261</td>\n",
" <td>-0.425530</td>\n",
" <td>-0.335537</td>\n",
" <td>-0.239271</td>\n",
" <td>-0.454880</td>\n",
" <td>-0.512211</td>\n",
" <td>-0.487813</td>\n",
" <td>-0.243092</td>\n",
" <td>0.460495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-1.588818</td>\n",
" <td>-0.722477</td>\n",
" <td>-0.722477</td>\n",
" <td>-0.165302</td>\n",
" <td>-0.723225</td>\n",
" <td>0.034116</td>\n",
" <td>-0.129771</td>\n",
" <td>-0.254514</td>\n",
" <td>0.282625</td>\n",
" <td>-0.088119</td>\n",
" <td>-0.369355</td>\n",
" <td>-1.481033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.493662</td>\n",
" <td>1.125819</td>\n",
" <td>1.125819</td>\n",
" <td>0.094342</td>\n",
" <td>1.522265</td>\n",
" <td>0.862773</td>\n",
" <td>0.194490</td>\n",
" <td>0.438600</td>\n",
" <td>0.574497</td>\n",
" <td>0.045112</td>\n",
" <td>0.945981</td>\n",
" <td>1.923382</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410770</th>\n",
" <td>0.592011</td>\n",
" <td>0.355014</td>\n",
" <td>0.355014</td>\n",
" <td>-0.120841</td>\n",
" <td>0.206926</td>\n",
" <td>0.714499</td>\n",
" <td>0.226990</td>\n",
" <td>-0.120035</td>\n",
" <td>0.792500</td>\n",
" <td>-0.037463</td>\n",
" <td>0.974381</td>\n",
" <td>0.789871</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410771</th>\n",
" <td>0.240478</td>\n",
" <td>0.392697</td>\n",
" <td>0.392697</td>\n",
" <td>-0.296252</td>\n",
" <td>-0.297209</td>\n",
" <td>-0.551021</td>\n",
" <td>-0.560144</td>\n",
" <td>-0.716150</td>\n",
" <td>-0.160491</td>\n",
" <td>-0.354582</td>\n",
" <td>-0.406361</td>\n",
" <td>0.574534</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410772</th>\n",
" <td>-1.936771</td>\n",
" <td>-0.688830</td>\n",
" <td>-0.688830</td>\n",
" <td>-0.226261</td>\n",
" <td>-1.192706</td>\n",
" <td>0.306280</td>\n",
" <td>0.100868</td>\n",
" <td>-0.454880</td>\n",
" <td>-0.211576</td>\n",
" <td>-0.487813</td>\n",
" <td>0.170166</td>\n",
" <td>-1.663294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410773</th>\n",
" <td>-0.748366</td>\n",
" <td>-0.804077</td>\n",
" <td>-0.804077</td>\n",
" <td>1.570163</td>\n",
" <td>1.274445</td>\n",
" <td>-0.002521</td>\n",
" <td>0.745507</td>\n",
" <td>2.828890</td>\n",
" <td>-0.160491</td>\n",
" <td>1.581436</td>\n",
" <td>-0.843150</td>\n",
" <td>-1.052343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410774</th>\n",
" <td>1.257769</td>\n",
" <td>-1.101815</td>\n",
" <td>-1.101815</td>\n",
" <td>-0.002742</td>\n",
" <td>1.338996</td>\n",
" <td>0.635065</td>\n",
" <td>-0.040302</td>\n",
" <td>0.202136</td>\n",
" <td>1.217910</td>\n",
" <td>0.311575</td>\n",
" <td>-0.415359</td>\n",
" <td>-0.246790</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410775 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" num__geo_lat num__geo_lon afr__geo_lon afr__area*kitchen_area \\\n",
"0 0.495902 -0.449742 -0.449742 -0.132188 \n",
"1 0.177806 1.433673 1.433673 -0.169370 \n",
"2 0.440548 0.047222 0.047222 -0.226261 \n",
"3 -1.588818 -0.722477 -0.722477 -0.165302 \n",
"4 1.493662 1.125819 1.125819 0.094342 \n",
"... ... ... ... ... \n",
"410770 0.592011 0.355014 0.355014 -0.120841 \n",
"410771 0.240478 0.392697 0.392697 -0.296252 \n",
"410772 -1.936771 -0.688830 -0.688830 -0.226261 \n",
"410773 -0.748366 -0.804077 -0.804077 1.570163 \n",
"410774 1.257769 -1.101815 -1.101815 -0.002742 \n",
"\n",
" afr__sqrt(area)*geo_lat afr__sqrt(area)*log(level) \\\n",
"0 0.373151 0.688076 \n",
"1 0.005114 0.071369 \n",
"2 -0.425530 -0.335537 \n",
"3 -0.723225 0.034116 \n",
"4 1.522265 0.862773 \n",
"... ... ... \n",
"410770 0.206926 0.714499 \n",
"410771 -0.297209 -0.551021 \n",
"410772 -1.192706 0.306280 \n",
"410773 1.274445 -0.002521 \n",
"410774 1.338996 0.635065 \n",
"\n",
" afr__kitchen_area*log(level) afr__sqrt(area)*sqrt(kitchen_area) \\\n",
"0 0.044178 -0.153548 \n",
"1 -0.173647 -0.267268 \n",
"2 -0.239271 -0.454880 \n",
"3 -0.129771 -0.254514 \n",
"4 0.194490 0.438600 \n",
"... ... ... \n",
"410770 0.226990 -0.120035 \n",
"410771 -0.560144 -0.716150 \n",
"410772 0.100868 -0.454880 \n",
"410773 0.745507 2.828890 \n",
"410774 -0.040302 0.202136 \n",
"\n",
" afr__rooms*log(level) afr__kitchen_area*rooms \\\n",
"0 0.690329 -0.132529 \n",
"1 0.282625 -0.132529 \n",
"2 -0.512211 -0.487813 \n",
"3 0.282625 -0.088119 \n",
"4 0.574497 0.045112 \n",
"... ... ... \n",
"410770 0.792500 -0.037463 \n",
"410771 -0.160491 -0.354582 \n",
"410772 -0.211576 -0.487813 \n",
"410773 -0.160491 1.581436 \n",
"410774 1.217910 0.311575 \n",
"\n",
" afr__sqrt(geo_lon)*sqrt(level) afr__geo_lat*log(geo_lon) \n",
"0 0.323880 0.068167 \n",
"1 0.552794 1.129118 \n",
"2 -0.243092 0.460495 \n",
"3 -0.369355 -1.481033 \n",
"4 0.945981 1.923382 \n",
"... ... ... \n",
"410770 0.974381 0.789871 \n",
"410771 -0.406361 0.574534 \n",
"410772 0.170166 -1.663294 \n",
"410773 -0.843150 -1.052343 \n",
"410774 -0.415359 -0.246790 \n",
"\n",
"[410775 rows x 12 columns]"
]
},
"execution_count": 297,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train_afr_rfe = pd.DataFrame(X_train_rfe, columns=rfe_selector.get_feature_names_out())\n",
"X_train_afr_rfe"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rfe_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_afr), \n",
" ('rfe_extractor', RFE(estimator=regressor, n_features_to_select=12, step = 0.2)),\n",
" ('model', regressor)\n",
"])\n",
"\n",
"rfe_pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 301,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'mae': 1431925.3203264712,\n",
" 'mape': 1.239752923791043e+18,\n",
" 'mse': 261947924998018.2}"
]
},
"execution_count": 301,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predictions_rfe = rfe_pipeline.predict(X_test)\n",
"\n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions_rfe) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions_rfe)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions_rfe)\n",
"\n",
"metrics"
]
},
{
"cell_type": "code",
"execution_count": 302,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 40.15it/s]\n",
"2024/10/17 14:26:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run rfe_feature_selection at: http://127.0.0.1:5000/#/experiments/1/runs/96f0bbcd6d88466abcf38f3b53f06ff1.\n",
"2024/10/17 14:26:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1.\n"
]
}
],
"source": [
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"RUN_NAME = 'rfe_feature_selection'\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(rfe_pipeline, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact(art)\n",
" mlflow.log_params(model_sklearn.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Используем sklearn признаки\n",
"Тут мы можем отобрать признаки один раз на обучении, а далее в качестве шага пайплайна использовать написанный класс ColumnExtractor для выбора нуных столбцов"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"rfe_skl_selector = RFE(estimator=regressor, n_features_to_select=12, step = 0.2) #drop 20% of features each iteration\n",
"X_train_skl_rfe = rfe_skl_selector.fit_transform(X_train_sklearn,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 305,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>num__geo_lat</th>\n",
" <th>num__geo_lon</th>\n",
" <th>num__level</th>\n",
" <th>num__rooms</th>\n",
" <th>num__kitchen_area</th>\n",
" <th>cat__region</th>\n",
" <th>quantile__geo_lat</th>\n",
" <th>quantile__geo_lon</th>\n",
" <th>quantile__level</th>\n",
" <th>poly__area kitchen_area</th>\n",
" <th>spline__area_sp_0</th>\n",
" <th>spline__area_sp_2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.495902</td>\n",
" <td>-0.449742</td>\n",
" <td>0.359235</td>\n",
" <td>0.253413</td>\n",
" <td>-0.186285</td>\n",
" <td>20.0</td>\n",
" <td>0.766257</td>\n",
" <td>0.511028</td>\n",
" <td>0.717217</td>\n",
" <td>-0.132188</td>\n",
" <td>0.155806</td>\n",
" <td>0.178013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.177806</td>\n",
" <td>1.433673</td>\n",
" <td>-0.246529</td>\n",
" <td>0.253413</td>\n",
" <td>-0.186285</td>\n",
" <td>70.0</td>\n",
" <td>0.297142</td>\n",
" <td>0.867999</td>\n",
" <td>0.522022</td>\n",
" <td>-0.169370</td>\n",
" <td>0.156921</td>\n",
" <td>0.176803</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.440548</td>\n",
" <td>0.047222</td>\n",
" <td>-0.448450</td>\n",
" <td>-0.669085</td>\n",
" <td>-0.142544</td>\n",
" <td>15.0</td>\n",
" <td>0.732330</td>\n",
" <td>0.629984</td>\n",
" <td>0.417417</td>\n",
" <td>-0.226261</td>\n",
" <td>0.159080</td>\n",
" <td>0.174488</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-1.588818</td>\n",
" <td>-0.722477</td>\n",
" <td>-0.246529</td>\n",
" <td>0.253413</td>\n",
" <td>-0.142544</td>\n",
" <td>18.0</td>\n",
" <td>0.148789</td>\n",
" <td>0.295262</td>\n",
" <td>0.522022</td>\n",
" <td>-0.165302</td>\n",
" <td>0.157341</td>\n",
" <td>0.176349</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.493662</td>\n",
" <td>1.125819</td>\n",
" <td>0.157313</td>\n",
" <td>0.253413</td>\n",
" <td>-0.011322</td>\n",
" <td>10.0</td>\n",
" <td>0.985937</td>\n",
" <td>0.758363</td>\n",
" <td>0.662663</td>\n",
" <td>0.094342</td>\n",
" <td>0.152390</td>\n",
" <td>0.181792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410770</th>\n",
" <td>0.592011</td>\n",
" <td>0.355014</td>\n",
" <td>0.561156</td>\n",
" <td>0.253413</td>\n",
" <td>-0.092653</td>\n",
" <td>54.0</td>\n",
" <td>0.788393</td>\n",
" <td>0.686728</td>\n",
" <td>0.771271</td>\n",
" <td>-0.120841</td>\n",
" <td>0.156705</td>\n",
" <td>0.177037</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410771</th>\n",
" <td>0.240478</td>\n",
" <td>0.392697</td>\n",
" <td>-0.650371</td>\n",
" <td>0.253413</td>\n",
" <td>-0.404989</td>\n",
" <td>45.0</td>\n",
" <td>0.494062</td>\n",
" <td>0.717240</td>\n",
" <td>0.309810</td>\n",
" <td>-0.296252</td>\n",
" <td>0.158306</td>\n",
" <td>0.175314</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410772</th>\n",
" <td>-1.936771</td>\n",
" <td>-0.688830</td>\n",
" <td>0.359235</td>\n",
" <td>-0.669085</td>\n",
" <td>-0.142544</td>\n",
" <td>18.0</td>\n",
" <td>0.131352</td>\n",
" <td>0.327613</td>\n",
" <td>0.717217</td>\n",
" <td>-0.226261</td>\n",
" <td>0.159080</td>\n",
" <td>0.174488</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410773</th>\n",
" <td>-0.748366</td>\n",
" <td>-0.804077</td>\n",
" <td>-0.650371</td>\n",
" <td>0.253413</td>\n",
" <td>1.501833</td>\n",
" <td>52.0</td>\n",
" <td>0.193143</td>\n",
" <td>0.114753</td>\n",
" <td>0.309810</td>\n",
" <td>1.570163</td>\n",
" <td>0.147820</td>\n",
" <td>0.187011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410774</th>\n",
" <td>1.257769</td>\n",
" <td>-1.101815</td>\n",
" <td>-0.044608</td>\n",
" <td>1.175911</td>\n",
" <td>-0.142544</td>\n",
" <td>14.0</td>\n",
" <td>0.908036</td>\n",
" <td>0.075725</td>\n",
" <td>0.604605</td>\n",
" <td>-0.002742</td>\n",
" <td>0.152767</td>\n",
" <td>0.181370</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410775 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" num__geo_lat num__geo_lon num__level num__rooms num__kitchen_area \\\n",
"0 0.495902 -0.449742 0.359235 0.253413 -0.186285 \n",
"1 0.177806 1.433673 -0.246529 0.253413 -0.186285 \n",
"2 0.440548 0.047222 -0.448450 -0.669085 -0.142544 \n",
"3 -1.588818 -0.722477 -0.246529 0.253413 -0.142544 \n",
"4 1.493662 1.125819 0.157313 0.253413 -0.011322 \n",
"... ... ... ... ... ... \n",
"410770 0.592011 0.355014 0.561156 0.253413 -0.092653 \n",
"410771 0.240478 0.392697 -0.650371 0.253413 -0.404989 \n",
"410772 -1.936771 -0.688830 0.359235 -0.669085 -0.142544 \n",
"410773 -0.748366 -0.804077 -0.650371 0.253413 1.501833 \n",
"410774 1.257769 -1.101815 -0.044608 1.175911 -0.142544 \n",
"\n",
" cat__region quantile__geo_lat quantile__geo_lon quantile__level \\\n",
"0 20.0 0.766257 0.511028 0.717217 \n",
"1 70.0 0.297142 0.867999 0.522022 \n",
"2 15.0 0.732330 0.629984 0.417417 \n",
"3 18.0 0.148789 0.295262 0.522022 \n",
"4 10.0 0.985937 0.758363 0.662663 \n",
"... ... ... ... ... \n",
"410770 54.0 0.788393 0.686728 0.771271 \n",
"410771 45.0 0.494062 0.717240 0.309810 \n",
"410772 18.0 0.131352 0.327613 0.717217 \n",
"410773 52.0 0.193143 0.114753 0.309810 \n",
"410774 14.0 0.908036 0.075725 0.604605 \n",
"\n",
" poly__area kitchen_area spline__area_sp_0 spline__area_sp_2 \n",
"0 -0.132188 0.155806 0.178013 \n",
"1 -0.169370 0.156921 0.176803 \n",
"2 -0.226261 0.159080 0.174488 \n",
"3 -0.165302 0.157341 0.176349 \n",
"4 0.094342 0.152390 0.181792 \n",
"... ... ... ... \n",
"410770 -0.120841 0.156705 0.177037 \n",
"410771 -0.296252 0.158306 0.175314 \n",
"410772 -0.226261 0.159080 0.174488 \n",
"410773 1.570163 0.147820 0.187011 \n",
"410774 -0.002742 0.152767 0.181370 \n",
"\n",
"[410775 rows x 12 columns]"
]
},
"execution_count": 305,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train_skl_rfe = pd.DataFrame(X_train_skl_rfe, columns=rfe_skl_selector.get_feature_names_out())\n",
"X_train_skl_rfe"
]
},
{
"cell_type": "code",
"execution_count": 306,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['num__geo_lat',\n",
" 'num__geo_lon',\n",
" 'num__level',\n",
" 'num__rooms',\n",
" 'num__kitchen_area',\n",
" 'cat__region',\n",
" 'quantile__geo_lat',\n",
" 'quantile__geo_lon',\n",
" 'quantile__level',\n",
" 'poly__area kitchen_area',\n",
" 'spline__area_sp_0',\n",
" 'spline__area_sp_2']"
]
},
"execution_count": 306,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rfe_cols = X_train_skl_rfe.columns.tolist()\n",
"rfe_cols"
]
},
{
"cell_type": "code",
"execution_count": 307,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ True, True, True, False, True, False, True, True, False,\n",
" False, True, True, True, False, False, False, False, False,\n",
" False, False, False, True, False, True, False, True, False,\n",
" False])"
]
},
"execution_count": 307,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rfe_idx = rfe_skl_selector.support_\n",
"rfe_idx"
]
},
{
"cell_type": "code",
"execution_count": 316,
"metadata": {},
"outputs": [],
"source": [
"# Отбираемые столбцы нужно залогировать, иначе мы потеряем информацию о том, какие призныки выбраны\n",
"with open('rfe_skl_idx.txt', 'w+') as f:\n",
" f.write(str(rfe_idx))\n",
"with open('rfe_skl_cols.txt', 'w+') as f:\n",
" f.write(str(rfe_cols))"
]
},
{
"cell_type": "code",
"execution_count": 309,
"metadata": {},
"outputs": [],
"source": [
"class ColumnExtractor(object):\n",
"\n",
" def __init__(self, cols):\n",
" self.cols = cols\n",
"\n",
" def transform(self, X):\n",
" return X[:,self.cols]\n",
" \n",
" def fit(self, X, y=None):\n",
" return self\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rfe_skl_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_sklearn), \n",
" ('rfe_extractor', ColumnExtractor(rfe_idx)),\n",
" ('model', regressor)\n",
"])\n",
"\n",
"rfe_skl_pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 311,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 193.34it/s]\n",
"2024/10/17 14:32:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run rfe_skl_feature_selection at: http://127.0.0.1:5000/#/experiments/1/runs/e55206caeb1549e4aa0d98343d5c1d4d.\n",
"2024/10/17 14:32:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1.\n"
]
}
],
"source": [
"predictions_rfe_skl = rfe_skl_pipeline.predict(X_test)\n",
"\n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions_rfe_skl) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions_rfe_skl)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions_rfe_skl)\n",
"\n",
"metrics\n",
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"RUN_NAME = 'rfe_skl_feature_selection'\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(rfe_pipeline, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact('rfe_skl_cols.txt')\n",
" mlflow.log_artifact('rfe_skl_idx.txt')\n",
" mlflow.log_params(model_sklearn.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## mlextend\n",
"https://github.com/rasbt/mlxtend/blob/master/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb "
]
},
{
"cell_type": "code",
"execution_count": 312,
"metadata": {},
"outputs": [],
"source": [
"from mlxtend.feature_selection import SequentialFeatureSelector \n",
"#from sklearn.feature_selection import SequentialFeatureSelector"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sfs = SequentialFeatureSelector(RandomForestRegressor(n_estimators=3), \n",
" k_features=3,\n",
" forward=True,\n",
" floating=False, # True to drop selected features\n",
" scoring='neg_mean_absolute_error',\n",
" cv=2)\n",
"\n",
"sfs.fit(X_train_sklearn,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 314,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>num__geo_lon</th>\n",
" <th>quantile__geo_lat</th>\n",
" <th>spline__area_sp_3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.449742</td>\n",
" <td>0.766257</td>\n",
" <td>1.826008e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.433673</td>\n",
" <td>0.297142</td>\n",
" <td>1.310449e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.047222</td>\n",
" <td>0.732330</td>\n",
" <td>6.098363e-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.722477</td>\n",
" <td>0.148789</td>\n",
" <td>1.144942e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.125819</td>\n",
" <td>0.985937</td>\n",
" <td>4.240047e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410770</th>\n",
" <td>0.355014</td>\n",
" <td>0.788393</td>\n",
" <td>1.401454e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410771</th>\n",
" <td>0.392697</td>\n",
" <td>0.494062</td>\n",
" <td>8.202272e-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410772</th>\n",
" <td>-0.688830</td>\n",
" <td>0.131352</td>\n",
" <td>6.098363e-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410773</th>\n",
" <td>-0.804077</td>\n",
" <td>0.193143</td>\n",
" <td>1.004843e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410774</th>\n",
" <td>-1.101815</td>\n",
" <td>0.908036</td>\n",
" <td>3.903343e-06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410775 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" num__geo_lon quantile__geo_lat spline__area_sp_3\n",
"0 -0.449742 0.766257 1.826008e-06\n",
"1 1.433673 0.297142 1.310449e-06\n",
"2 0.047222 0.732330 6.098363e-07\n",
"3 -0.722477 0.148789 1.144942e-06\n",
"4 1.125819 0.985937 4.240047e-06\n",
"... ... ... ...\n",
"410770 0.355014 0.788393 1.401454e-06\n",
"410771 0.392697 0.494062 8.202272e-07\n",
"410772 -0.688830 0.131352 6.098363e-07\n",
"410773 -0.804077 0.193143 1.004843e-05\n",
"410774 -1.101815 0.908036 3.903343e-06\n",
"\n",
"[410775 rows x 3 columns]"
]
},
"execution_count": 314,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_features_sfs = X_train_sklearn.loc[:, sfs.k_feature_names_]\n",
"selected_features_sfs"
]
},
{
"cell_type": "code",
"execution_count": 315,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['num__geo_lon', 'quantile__geo_lat', 'spline__area_sp_3']"
]
},
"execution_count": 315,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rfe_sfs_idx = list(sfs.k_feature_idx_)\n",
"rfe_sfs_idx\n",
"rfe_sfs_col = list(sfs.k_feature_names_)\n",
"rfe_sfs_col"
]
},
{
"cell_type": "code",
"execution_count": 317,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs\n",
"\n",
"fig = plot_sfs(sfs.get_metric_dict(), kind='std_dev')\n",
"\n",
"plt.title('Sequential Forward Selection (w. StdDev)')\n",
"plt.grid()\n",
"plt.show()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rfe_sfs_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_sklearn), \n",
" ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n",
" ('model', regressor)\n",
"])\n",
"\n",
"rfe_sfs_pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictions_sfs = rfe_sfs_pipeline.predict(X_test)\n",
"\n",
"metrics = {}\n",
"metrics[\"mae\"] = mean_absolute_error(y_test, predictions_sfs) \n",
"metrics[\"mape\"] = mean_absolute_percentage_error(y_test, predictions_sfs)\n",
"metrics[\"mse\"] = mean_squared_error(y_test, predictions_sfs)\n",
"\n",
"metrics\n",
"experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n",
"RUN_NAME = 'rfe_sfs_feature_selection'\n",
"\n",
"with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n",
" # получаем уникальный идентификатор запуска эксперимента\n",
" run_id = run.info.run_id \n",
" mlflow.sklearn.log_model(rfe_sfs_pipeline, \n",
" artifact_path=\"models\",\n",
" signature=signature,\n",
" input_example=input_example,\n",
" pip_requirements=req_file\n",
" )\n",
" mlflow.log_metrics(metrics)\n",
" mlflow.log_artifact('rfe_skl_cols.txt')\n",
" mlflow.log_artifact('rfe_skl_idx.txt')\n",
" mlflow.log_params(model_sklearn.get_params())\n",
"\n",
"run = mlflow.get_run(run_id) \n",
"assert (run.info.status =='FINISHED')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"Можно совмещать признаки, выбранные по sfs и sbs: брать их объединение или пересечение. Можно комбинировать с признаками, выделенными разными подходами - целое поле для исследований"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# HYPERPARAMS\n",
"## Gridsearch"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import GridSearchCV"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"param_grid = {\n",
" 'model__depth': [1,3,5]\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gs = GridSearchCV(rfe_sfs_pipeline, param_grid, cv=2, scoring='neg_mean_absolute_error')\n",
"gs.fit(X_train, y_train)\n",
"print(\"Лучшие гиперпараметры:\", gs.best_params_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gs_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_sklearn), \n",
" ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n",
" ('model', CatBoostRegressor(depth=5))\n",
"])\n",
"\n",
"# Проведем стандартную проверку на тестовом множестве и залогируем run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вместо GridSearch можно использовать RandomSearch"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Optuna"
]
},
{
"cell_type": "code",
"execution_count": 292,
"metadata": {},
"outputs": [],
"source": [
"import optuna"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def objective(trial):\n",
" # предлагаем гиперпараметры\n",
" depth = trial.suggest_int('depth', 1, 10)\n",
" learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1)\n",
"\n",
" # создаём и обучаем модель\n",
" opt_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_sklearn), \n",
" ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n",
" ('model', CatBoostRegressor(depth=depth, learning_rate=learning_rate, verbose=0))\n",
" ])\n",
"\n",
" opt_pipeline.fit(X_train, y_train)\n",
"\n",
" # предсказываем и вычисляем RMSE\n",
" preds = opt_pipeline.predict(X_test)\n",
" mae = mean_absolute_error(y_test, preds) \n",
"\n",
" return mae"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"study = optuna.create_study(direction='minimize')\n",
"study.optimize(objective, n_trials=10)\n",
"\n",
"# выводим результаты\n",
"print('Number of finished trials:', len(study.trials))\n",
"print('Best trial:', study.best_trial.params) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"opt_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_sklearn), \n",
" ('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),\n",
" ('model', CatBoostRegressor(depth=3, learning_rate=0.02789))\n",
"])\n",
"\n",
"# Проведем стандартную проверку на тестовом множестве и залогируем run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Выбираем лучшую модель.\n",
"Обучаем ее на всей выборке (а не только на train-части). \n",
"Далее будем деплоить именно её"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}