258 KiB
import os
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error
= pd.read_pickle('data/clean_data.pkl').sample(frac=0.1, random_state = 2) # Уменьшаем размер чтобы модель быстрее обучалась на лекции
df df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 547701 entries, 313199 to 690900
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 547701 non-null int64
1 date 547701 non-null object
2 time 547701 non-null object
3 geo_lat 547701 non-null float32
4 geo_lon 547701 non-null float32
5 region 547701 non-null category
6 building_type 547701 non-null category
7 level 547701 non-null int8
8 levels 547701 non-null int8
9 rooms 547701 non-null int8
10 area 547701 non-null float16
11 kitchen_area 547701 non-null float16
12 object_type 547701 non-null category
dtypes: category(3), float16(2), float32(2), int64(1), int8(3), object(2)
memory usage: 26.1+ MB
= df.rename(columns={'price': 'target'})
df = df.drop(columns=['date', 'time']) df
df
target | geo_lat | geo_lon | region | building_type | level | levels | rooms | area | kitchen_area | object_type | |
---|---|---|---|---|---|---|---|---|---|---|---|
313199 | 4999999 | 59.958458 | 30.215530 | 2661 | 3 | 8 | 13 | 1 | 36.00000 | 7.199219 | 1 |
2437764 | 2150000 | 45.072674 | 41.936996 | 2900 | 3 | 5 | 5 | 1 | 52.00000 | 15.000000 | 1 |
4949072 | 8600000 | 59.939358 | 30.437069 | 2661 | 2 | 11 | 22 | 1 | 37.09375 | 9.796875 | 1 |
4109465 | 5100000 | 59.740479 | 30.569540 | 2661 | 1 | 2 | 9 | 3 | 74.50000 | 9.500000 | 1 |
2187702 | 3470000 | 56.324062 | 44.005390 | 2871 | 2 | 11 | 26 | 2 | 54.00000 | 8.000000 | 11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5188085 | 2300000 | 57.750603 | 40.866467 | 4189 | 3 | 2 | 3 | 1 | 38.00000 | 11.000000 | 1 |
4542014 | 6700000 | 55.911720 | 37.737419 | 81 | 3 | 2 | 5 | 2 | 66.37500 | 8.000000 | 1 |
3306731 | 3850000 | 51.704510 | 39.273037 | 2072 | 2 | 10 | 18 | 3 | 89.50000 | 14.203125 | 1 |
520293 | 1878885 | 54.943577 | 82.958862 | 9654 | 1 | 1 | 10 | 3 | 87.75000 | 12.921875 | 11 |
690900 | 4097350 | 59.882702 | 30.451246 | 2661 | 2 | 6 | 23 | 1 | 36.09375 | 16.203125 | 11 |
547701 rows × 11 columns
= train_test_split(df.drop('target', axis=1), df['target'], test_size=0.25, random_state=2) X_train, X_test, y_train, y_test
0] X_train.iloc[
geo_lat 56.327686
geo_lon 43.928062
region 2871.000000
building_type 1.000000
level 8.000000
levels 10.000000
rooms 2.000000
area 56.000000
kitchen_area 8.500000
object_type 1.000000
Name: 879487, dtype: float64
= X_train.select_dtypes(include=['category','object']).columns.to_list()
cat_features cat_features
['region', 'building_type', 'object_type']
= X_train.select_dtypes(include=['number']).columns.to_list()
num_features num_features
['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']
https://scikit-learn.org/stable/api/sklearn.preprocessing.html - разные способы кодирования и скалирования
= StandardScaler()
s_scaler = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999) # unknown_value нужно выбирать с умом
l_encoder = CatBoostRegressor() regressor
Column transformer
# Для удобной работы со столбцами
= ColumnTransformer(
preprocessor =[
transformers'num', s_scaler, num_features), # преобразования для числовых признаков
('cat', l_encoder, cat_features), # преобразования для категориальных признаков
(
],='drop' ) # Удаляем столбцы, которые не затронуты преобразования remainder
= Pipeline(steps=[('preprocessor', preprocessor),
pipeline 'model', regressor)])
(
pipeline.fit(X_train, y_train)
Learning rate set to 0.105957
0: learn: 22102085.4544239 total: 67.8ms remaining: 1m 7s
1: learn: 21994630.3403412 total: 87.1ms remaining: 43.4s
2: learn: 21906687.8196027 total: 105ms remaining: 34.8s
3: learn: 21834890.5050552 total: 124ms remaining: 30.9s
4: learn: 21770820.6751194 total: 143ms remaining: 28.5s
5: learn: 21719543.9330108 total: 163ms remaining: 27s
6: learn: 21676510.1666598 total: 183ms remaining: 25.9s
7: learn: 21641355.8079016 total: 202ms remaining: 25.1s
8: learn: 21612289.0494648 total: 223ms remaining: 24.5s
9: learn: 21583808.7061085 total: 242ms remaining: 24s
10: learn: 21559288.9618040 total: 266ms remaining: 23.9s
11: learn: 21537048.9920531 total: 286ms remaining: 23.5s
12: learn: 21444526.1629239 total: 306ms remaining: 23.3s
13: learn: 21426349.3370315 total: 324ms remaining: 22.8s
14: learn: 21411901.2338278 total: 344ms remaining: 22.6s
15: learn: 21399279.8023459 total: 360ms remaining: 22.1s
16: learn: 21299421.1434822 total: 382ms remaining: 22.1s
17: learn: 21288560.2595435 total: 400ms remaining: 21.8s
18: learn: 21277368.8876877 total: 421ms remaining: 21.8s
19: learn: 21229205.2938305 total: 441ms remaining: 21.6s
20: learn: 21220238.4828158 total: 462ms remaining: 21.5s
21: learn: 21212849.7885410 total: 485ms remaining: 21.6s
22: learn: 21205304.4132821 total: 507ms remaining: 21.5s
23: learn: 21198813.8508479 total: 534ms remaining: 21.7s
24: learn: 21184627.2326983 total: 550ms remaining: 21.4s
25: learn: 21172748.3410688 total: 569ms remaining: 21.3s
26: learn: 21103305.4766520 total: 582ms remaining: 21s
27: learn: 21096636.4037750 total: 594ms remaining: 20.6s
28: learn: 21082202.2892557 total: 607ms remaining: 20.3s
29: learn: 21077185.5274954 total: 619ms remaining: 20s
30: learn: 21071613.1691098 total: 632ms remaining: 19.7s
31: learn: 21067654.8502386 total: 644ms remaining: 19.5s
32: learn: 21053425.8947843 total: 659ms remaining: 19.3s
33: learn: 21038024.0563140 total: 670ms remaining: 19s
34: learn: 20961357.9814339 total: 682ms remaining: 18.8s
35: learn: 20946027.4479676 total: 695ms remaining: 18.6s
36: learn: 20866676.4104322 total: 707ms remaining: 18.4s
37: learn: 20863078.3182449 total: 717ms remaining: 18.1s
38: learn: 20859910.3609500 total: 728ms remaining: 17.9s
39: learn: 20853462.2703730 total: 739ms remaining: 17.7s
40: learn: 20851610.3209036 total: 749ms remaining: 17.5s
41: learn: 20847674.0809285 total: 760ms remaining: 17.3s
42: learn: 20845384.9263391 total: 772ms remaining: 17.2s
43: learn: 20843256.7428906 total: 784ms remaining: 17s
44: learn: 20841580.8594834 total: 797ms remaining: 16.9s
45: learn: 20819301.2718345 total: 810ms remaining: 16.8s
46: learn: 20812094.5913582 total: 823ms remaining: 16.7s
47: learn: 20808932.0866915 total: 834ms remaining: 16.5s
48: learn: 20763172.9200413 total: 851ms remaining: 16.5s
49: learn: 20729084.6574594 total: 863ms remaining: 16.4s
50: learn: 20721820.5403996 total: 876ms remaining: 16.3s
51: learn: 20715664.3732084 total: 887ms remaining: 16.2s
52: learn: 20712658.7025295 total: 897ms remaining: 16s
53: learn: 20704254.1704930 total: 908ms remaining: 15.9s
54: learn: 20690967.9220470 total: 919ms remaining: 15.8s
55: learn: 20686546.8978473 total: 929ms remaining: 15.7s
56: learn: 20682362.4255777 total: 941ms remaining: 15.6s
57: learn: 20680744.8113421 total: 951ms remaining: 15.4s
58: learn: 20677926.0871267 total: 962ms remaining: 15.3s
59: learn: 20658478.3098789 total: 974ms remaining: 15.3s
60: learn: 20641964.4472246 total: 987ms remaining: 15.2s
61: learn: 20639551.4216654 total: 999ms remaining: 15.1s
62: learn: 20638344.8919341 total: 1.01s remaining: 15s
63: learn: 20635991.3894815 total: 1.02s remaining: 15s
64: learn: 20595846.8116432 total: 1.03s remaining: 14.9s
65: learn: 20592198.9483046 total: 1.05s remaining: 14.8s
66: learn: 20565316.0060422 total: 1.06s remaining: 14.8s
67: learn: 20563073.6783517 total: 1.07s remaining: 14.7s
68: learn: 20553650.4649650 total: 1.08s remaining: 14.6s
69: learn: 20545510.8230653 total: 1.09s remaining: 14.5s
70: learn: 20544114.9272186 total: 1.1s remaining: 14.5s
71: learn: 20541689.8802451 total: 1.11s remaining: 14.4s
72: learn: 20538792.7074671 total: 1.13s remaining: 14.3s
73: learn: 20517134.0713648 total: 1.14s remaining: 14.3s
74: learn: 20510477.9089445 total: 1.15s remaining: 14.2s
75: learn: 20494649.9067257 total: 1.17s remaining: 14.2s
76: learn: 20490851.9879851 total: 1.18s remaining: 14.1s
77: learn: 20488939.9621874 total: 1.19s remaining: 14.1s
78: learn: 20432532.8171644 total: 1.2s remaining: 14s
79: learn: 20428397.7107150 total: 1.22s remaining: 14s
80: learn: 20421638.7734419 total: 1.23s remaining: 13.9s
81: learn: 20421021.7388457 total: 1.24s remaining: 13.9s
82: learn: 20406404.2376730 total: 1.26s remaining: 13.9s
83: learn: 20021682.5008511 total: 1.28s remaining: 13.9s
84: learn: 20018322.6048631 total: 1.3s remaining: 14s
85: learn: 20004841.3476490 total: 1.31s remaining: 14s
86: learn: 19985666.0092745 total: 1.33s remaining: 13.9s
87: learn: 19983778.1947243 total: 1.34s remaining: 13.9s
88: learn: 19982460.1107908 total: 1.36s remaining: 13.9s
89: learn: 19979128.5494690 total: 1.37s remaining: 13.8s
90: learn: 19974094.9707357 total: 1.38s remaining: 13.8s
91: learn: 19972006.9431031 total: 1.4s remaining: 13.8s
92: learn: 19970846.2845466 total: 1.41s remaining: 13.7s
93: learn: 19968858.0073042 total: 1.42s remaining: 13.7s
94: learn: 19921720.6252972 total: 1.44s remaining: 13.7s
95: learn: 19916568.5707839 total: 1.45s remaining: 13.6s
96: learn: 19913228.5247508 total: 1.46s remaining: 13.6s
97: learn: 19901982.4625895 total: 1.48s remaining: 13.6s
98: learn: 19836107.7247888 total: 1.49s remaining: 13.6s
99: learn: 19834724.7455166 total: 1.5s remaining: 13.5s
100: learn: 19832811.9745741 total: 1.52s remaining: 13.5s
101: learn: 19818491.2851567 total: 1.53s remaining: 13.5s
102: learn: 19815779.3719026 total: 1.55s remaining: 13.5s
103: learn: 19814215.0962787 total: 1.56s remaining: 13.5s
104: learn: 19782274.6892663 total: 1.57s remaining: 13.4s
105: learn: 19777945.6507456 total: 1.59s remaining: 13.4s
106: learn: 19770488.9772154 total: 1.6s remaining: 13.4s
107: learn: 19769758.0023174 total: 1.61s remaining: 13.3s
108: learn: 19767541.9303017 total: 1.63s remaining: 13.3s
109: learn: 19766992.0126300 total: 1.64s remaining: 13.2s
110: learn: 19765032.8837298 total: 1.65s remaining: 13.2s
111: learn: 19705204.6771073 total: 1.66s remaining: 13.2s
112: learn: 19703649.0394020 total: 1.68s remaining: 13.2s
113: learn: 19693038.0415419 total: 1.69s remaining: 13.1s
114: learn: 19690294.4304072 total: 1.7s remaining: 13.1s
115: learn: 19686529.4709294 total: 1.71s remaining: 13.1s
116: learn: 19684887.8267152 total: 1.72s remaining: 13s
117: learn: 19369465.6970761 total: 1.74s remaining: 13s
118: learn: 19368868.0416380 total: 1.75s remaining: 13s
119: learn: 19334590.5868513 total: 1.77s remaining: 13s
120: learn: 19332200.0832597 total: 1.78s remaining: 12.9s
121: learn: 19320130.9244745 total: 1.79s remaining: 12.9s
122: learn: 19318220.9448337 total: 1.81s remaining: 12.9s
123: learn: 18941546.2095714 total: 1.82s remaining: 12.9s
124: learn: 18941056.2836883 total: 1.84s remaining: 12.9s
125: learn: 18939637.9662976 total: 1.85s remaining: 12.8s
126: learn: 18938172.4621610 total: 1.86s remaining: 12.8s
127: learn: 18935889.3619752 total: 1.88s remaining: 12.8s
128: learn: 18928784.7025346 total: 1.9s remaining: 12.8s
129: learn: 18926981.6933453 total: 1.91s remaining: 12.8s
130: learn: 18830178.3173696 total: 1.93s remaining: 12.8s
131: learn: 18828102.3918672 total: 1.94s remaining: 12.8s
132: learn: 18825755.9987015 total: 1.95s remaining: 12.7s
133: learn: 18793049.5462155 total: 1.97s remaining: 12.7s
134: learn: 18791452.8400128 total: 1.98s remaining: 12.7s
135: learn: 18484591.4924421 total: 1.99s remaining: 12.7s
136: learn: 18482373.1605741 total: 2s remaining: 12.6s
137: learn: 18414571.2543321 total: 2.02s remaining: 12.6s
138: learn: 18412913.4160574 total: 2.03s remaining: 12.6s
139: learn: 18409214.1141794 total: 2.04s remaining: 12.6s
140: learn: 18395140.1008086 total: 2.06s remaining: 12.5s
141: learn: 18390939.2248151 total: 2.07s remaining: 12.5s
142: learn: 18377925.8298573 total: 2.08s remaining: 12.5s
143: learn: 18371775.1291009 total: 2.09s remaining: 12.4s
144: learn: 18370251.1042623 total: 2.1s remaining: 12.4s
145: learn: 18332707.1499911 total: 2.12s remaining: 12.4s
146: learn: 18330693.2665230 total: 2.13s remaining: 12.3s
147: learn: 18329408.2952767 total: 2.14s remaining: 12.3s
148: learn: 18321783.9892793 total: 2.15s remaining: 12.3s
149: learn: 18321270.4958267 total: 2.16s remaining: 12.2s
150: learn: 18310325.1681801 total: 2.17s remaining: 12.2s
151: learn: 18299986.9413893 total: 2.18s remaining: 12.2s
152: learn: 18290217.7479708 total: 2.2s remaining: 12.2s
153: learn: 18280975.8537910 total: 2.21s remaining: 12.1s
154: learn: 18272215.6509019 total: 2.22s remaining: 12.1s
155: learn: 18263878.2178516 total: 2.23s remaining: 12.1s
156: learn: 18256009.4859248 total: 2.25s remaining: 12.1s
157: learn: 18248529.7799856 total: 2.26s remaining: 12.1s
158: learn: 18241388.0845094 total: 2.28s remaining: 12s
159: learn: 18234700.5127085 total: 2.29s remaining: 12s
160: learn: 18228095.5839778 total: 2.3s remaining: 12s
161: learn: 18222087.5153066 total: 2.31s remaining: 12s
162: learn: 18215963.2971261 total: 2.33s remaining: 12s
163: learn: 18210272.5545163 total: 2.34s remaining: 11.9s
164: learn: 18208920.7703569 total: 2.35s remaining: 11.9s
165: learn: 18204704.7145239 total: 2.37s remaining: 11.9s
166: learn: 18187135.8260335 total: 2.38s remaining: 11.9s
167: learn: 18183064.7135734 total: 2.39s remaining: 11.9s
168: learn: 18177887.1670860 total: 2.41s remaining: 11.8s
169: learn: 18173022.2110313 total: 2.42s remaining: 11.8s
170: learn: 18168573.4167384 total: 2.44s remaining: 11.8s
171: learn: 18165036.1971623 total: 2.45s remaining: 11.8s
172: learn: 18161841.9822954 total: 2.46s remaining: 11.8s
173: learn: 18129860.2061383 total: 2.48s remaining: 11.8s
174: learn: 18127931.5161091 total: 2.49s remaining: 11.7s
175: learn: 18124997.7778403 total: 2.5s remaining: 11.7s
176: learn: 18122975.2084322 total: 2.51s remaining: 11.7s
177: learn: 18120855.5325733 total: 2.53s remaining: 11.7s
178: learn: 18117907.6019994 total: 2.54s remaining: 11.7s
179: learn: 18116674.0864027 total: 2.56s remaining: 11.7s
180: learn: 18114086.9287957 total: 2.57s remaining: 11.6s
181: learn: 18087100.0827926 total: 2.59s remaining: 11.6s
182: learn: 18071944.2213105 total: 2.6s remaining: 11.6s
183: learn: 17952691.4261792 total: 2.61s remaining: 11.6s
184: learn: 17950298.6715866 total: 2.63s remaining: 11.6s
185: learn: 17949031.8169417 total: 2.64s remaining: 11.6s
186: learn: 17937943.5186847 total: 2.66s remaining: 11.5s
187: learn: 17937014.8027177 total: 2.67s remaining: 11.5s
188: learn: 17936493.5945773 total: 2.68s remaining: 11.5s
189: learn: 17935386.0093649 total: 2.69s remaining: 11.5s
190: learn: 17934203.8644718 total: 2.7s remaining: 11.5s
191: learn: 17928336.5184065 total: 2.72s remaining: 11.4s
192: learn: 17925443.1940046 total: 2.73s remaining: 11.4s
193: learn: 17924535.5533845 total: 2.75s remaining: 11.4s
194: learn: 17917225.8802206 total: 2.76s remaining: 11.4s
195: learn: 17904437.4148190 total: 2.77s remaining: 11.4s
196: learn: 17902915.3467923 total: 2.79s remaining: 11.4s
197: learn: 17900924.7512305 total: 2.8s remaining: 11.3s
198: learn: 17899976.2262471 total: 2.81s remaining: 11.3s
199: learn: 17896573.5977064 total: 2.83s remaining: 11.3s
200: learn: 17894480.1301072 total: 2.84s remaining: 11.3s
201: learn: 17891369.5414483 total: 2.85s remaining: 11.3s
202: learn: 17853776.3679239 total: 2.86s remaining: 11.2s
203: learn: 17851457.0828592 total: 2.88s remaining: 11.2s
204: learn: 17849621.6767992 total: 2.89s remaining: 11.2s
205: learn: 17848392.5509482 total: 2.9s remaining: 11.2s
206: learn: 17845597.2428619 total: 2.91s remaining: 11.1s
207: learn: 17841951.2763157 total: 2.92s remaining: 11.1s
208: learn: 17829332.8912371 total: 2.93s remaining: 11.1s
209: learn: 17825984.1152963 total: 2.95s remaining: 11.1s
210: learn: 17821360.2498463 total: 2.96s remaining: 11.1s
211: learn: 17816041.9633158 total: 2.97s remaining: 11s
212: learn: 17815089.0154101 total: 2.98s remaining: 11s
213: learn: 17812260.4222221 total: 2.99s remaining: 11s
214: learn: 17811642.1796060 total: 3s remaining: 11s
215: learn: 17811104.8656724 total: 3.01s remaining: 10.9s
216: learn: 17810456.2984828 total: 3.02s remaining: 10.9s
217: learn: 17809982.4909707 total: 3.04s remaining: 10.9s
218: learn: 17809543.7803178 total: 3.05s remaining: 10.9s
219: learn: 17809136.8325569 total: 3.06s remaining: 10.9s
220: learn: 17808758.7315278 total: 3.07s remaining: 10.8s
221: learn: 17808406.9145618 total: 3.09s remaining: 10.8s
222: learn: 17806754.0179687 total: 3.1s remaining: 10.8s
223: learn: 17806262.4885592 total: 3.11s remaining: 10.8s
224: learn: 17805319.3776209 total: 3.13s remaining: 10.8s
225: learn: 17805011.6013482 total: 3.14s remaining: 10.7s
226: learn: 17804724.0362310 total: 3.15s remaining: 10.7s
227: learn: 17793961.7547867 total: 3.16s remaining: 10.7s
228: learn: 17793044.3976904 total: 3.18s remaining: 10.7s
229: learn: 17791876.3449986 total: 3.19s remaining: 10.7s
230: learn: 17770039.2877531 total: 3.21s remaining: 10.7s
231: learn: 17769759.3423197 total: 3.22s remaining: 10.7s
232: learn: 17769498.1846872 total: 3.23s remaining: 10.6s
233: learn: 17769106.6516586 total: 3.24s remaining: 10.6s
234: learn: 17765866.7512613 total: 3.25s remaining: 10.6s
235: learn: 17763818.0836765 total: 3.27s remaining: 10.6s
236: learn: 17761637.5687877 total: 3.29s remaining: 10.6s
237: learn: 17755293.6166299 total: 3.3s remaining: 10.6s
238: learn: 17749597.6285121 total: 3.32s remaining: 10.6s
239: learn: 17731193.4780969 total: 3.33s remaining: 10.6s
240: learn: 17730941.1840209 total: 3.35s remaining: 10.5s
241: learn: 17730651.4109866 total: 3.36s remaining: 10.5s
242: learn: 17729951.1772204 total: 3.38s remaining: 10.5s
243: learn: 17725674.6169533 total: 3.39s remaining: 10.5s
244: learn: 17724397.3837970 total: 3.41s remaining: 10.5s
245: learn: 17723085.9667878 total: 3.42s remaining: 10.5s
246: learn: 17716068.0643361 total: 3.44s remaining: 10.5s
247: learn: 17685621.7941613 total: 3.45s remaining: 10.5s
248: learn: 17684272.6716694 total: 3.46s remaining: 10.4s
249: learn: 17683390.0888279 total: 3.48s remaining: 10.4s
250: learn: 17683052.4845925 total: 3.49s remaining: 10.4s
251: learn: 17678624.0868252 total: 3.51s remaining: 10.4s
252: learn: 17665657.9640584 total: 3.52s remaining: 10.4s
253: learn: 17664624.5487132 total: 3.54s remaining: 10.4s
254: learn: 17663925.0646167 total: 3.55s remaining: 10.4s
255: learn: 17653813.6196925 total: 3.56s remaining: 10.4s
256: learn: 17636698.5157040 total: 3.58s remaining: 10.3s
257: learn: 17634671.9750893 total: 3.59s remaining: 10.3s
258: learn: 17633930.6422340 total: 3.61s remaining: 10.3s
259: learn: 17633026.0861171 total: 3.62s remaining: 10.3s
260: learn: 17632489.1254856 total: 3.63s remaining: 10.3s
261: learn: 17628474.9187765 total: 3.65s remaining: 10.3s
262: learn: 17627320.9817928 total: 3.66s remaining: 10.3s
263: learn: 17626116.4772868 total: 3.67s remaining: 10.2s
264: learn: 17623329.0754817 total: 3.69s remaining: 10.2s
265: learn: 17622243.1901613 total: 3.7s remaining: 10.2s
266: learn: 17550321.8250878 total: 3.71s remaining: 10.2s
267: learn: 17549755.3651767 total: 3.73s remaining: 10.2s
268: learn: 17545607.1212430 total: 3.74s remaining: 10.2s
269: learn: 17541242.2629221 total: 3.75s remaining: 10.1s
270: learn: 17499407.7313592 total: 3.76s remaining: 10.1s
271: learn: 17499145.8282321 total: 3.77s remaining: 10.1s
272: learn: 17498934.5535116 total: 3.78s remaining: 10.1s
273: learn: 17498347.2546318 total: 3.79s remaining: 10.1s
274: learn: 17498149.7061684 total: 3.8s remaining: 10s
275: learn: 17497860.3337909 total: 3.81s remaining: 10s
276: learn: 17497134.2565818 total: 3.83s remaining: 9.98s
277: learn: 17496943.1446578 total: 3.84s remaining: 9.96s
278: learn: 17495461.7397646 total: 3.85s remaining: 9.95s
279: learn: 17492860.8467310 total: 3.86s remaining: 9.94s
280: learn: 17492256.7750564 total: 3.88s remaining: 9.92s
281: learn: 17491315.8920024 total: 3.89s remaining: 9.91s
282: learn: 17488802.8492737 total: 3.9s remaining: 9.89s
283: learn: 17479802.6541152 total: 3.92s remaining: 9.87s
284: learn: 17477169.5331720 total: 3.93s remaining: 9.86s
285: learn: 17474743.6190942 total: 3.94s remaining: 9.84s
286: learn: 17468342.7955232 total: 3.96s remaining: 9.83s
287: learn: 17467579.9985437 total: 3.97s remaining: 9.82s
288: learn: 17467009.9684055 total: 3.98s remaining: 9.8s
289: learn: 17464125.0260113 total: 4s remaining: 9.79s
290: learn: 17463508.0564477 total: 4.01s remaining: 9.77s
291: learn: 17453183.2620432 total: 4.02s remaining: 9.75s
292: learn: 17452971.0671546 total: 4.03s remaining: 9.73s
293: learn: 17452198.5884342 total: 4.05s remaining: 9.72s
294: learn: 17450925.6159031 total: 4.06s remaining: 9.7s
295: learn: 17450685.1155343 total: 4.07s remaining: 9.68s
296: learn: 17447975.7379237 total: 4.08s remaining: 9.66s
297: learn: 17446417.7251561 total: 4.1s remaining: 9.65s
298: learn: 17446166.7629704 total: 4.11s remaining: 9.63s
299: learn: 17445963.1442260 total: 4.12s remaining: 9.61s
300: learn: 17445745.7958927 total: 4.13s remaining: 9.58s
301: learn: 17444963.9290154 total: 4.14s remaining: 9.57s
302: learn: 17432650.1591210 total: 4.15s remaining: 9.55s
303: learn: 17430525.1210288 total: 4.17s remaining: 9.54s
304: learn: 17418414.4601453 total: 4.18s remaining: 9.52s
305: learn: 17417977.4735651 total: 4.19s remaining: 9.5s
306: learn: 17335624.2943914 total: 4.2s remaining: 9.49s
307: learn: 17323558.9233681 total: 4.21s remaining: 9.47s
308: learn: 17323047.3527617 total: 4.22s remaining: 9.45s
309: learn: 17322403.3488620 total: 4.24s remaining: 9.43s
310: learn: 17322187.6973801 total: 4.25s remaining: 9.41s
311: learn: 17320898.8497406 total: 4.26s remaining: 9.4s
312: learn: 17312668.7000429 total: 4.27s remaining: 9.38s
313: learn: 17299277.5985403 total: 4.29s remaining: 9.36s
314: learn: 17298175.9786240 total: 4.3s remaining: 9.35s
315: learn: 17296005.0430765 total: 4.31s remaining: 9.33s
316: learn: 17295834.3986842 total: 4.33s remaining: 9.32s
317: learn: 17295646.8271436 total: 4.33s remaining: 9.3s
318: learn: 17295412.2240763 total: 4.35s remaining: 9.28s
319: learn: 17295269.3891063 total: 4.36s remaining: 9.26s
320: learn: 17294720.1427139 total: 4.37s remaining: 9.24s
321: learn: 17280405.8179874 total: 4.38s remaining: 9.22s
322: learn: 17279788.6705542 total: 4.39s remaining: 9.2s
323: learn: 17259578.2219214 total: 4.4s remaining: 9.19s
324: learn: 17258995.8851109 total: 4.41s remaining: 9.16s
325: learn: 17256802.0040208 total: 4.42s remaining: 9.15s
326: learn: 17245667.9352932 total: 4.44s remaining: 9.13s
327: learn: 17245157.2383849 total: 4.45s remaining: 9.12s
328: learn: 17244420.0505767 total: 4.46s remaining: 9.11s
329: learn: 17240620.9311856 total: 4.48s remaining: 9.09s
330: learn: 17240126.6382259 total: 4.49s remaining: 9.07s
331: learn: 17239554.3263042 total: 4.5s remaining: 9.06s
332: learn: 17239249.4122676 total: 4.52s remaining: 9.05s
333: learn: 17237315.5959603 total: 4.53s remaining: 9.04s
334: learn: 17237170.4183008 total: 4.54s remaining: 9.02s
335: learn: 17235498.1709182 total: 4.55s remaining: 9s
336: learn: 17154286.9322136 total: 4.57s remaining: 8.98s
337: learn: 17152860.5403583 total: 4.58s remaining: 8.96s
338: learn: 17139897.5803445 total: 4.59s remaining: 8.95s
339: learn: 17139685.6194353 total: 4.6s remaining: 8.93s
340: learn: 17129406.8909698 total: 4.61s remaining: 8.92s
341: learn: 17126386.5318429 total: 4.63s remaining: 8.9s
342: learn: 17125338.5826429 total: 4.64s remaining: 8.89s
343: learn: 17124937.1764028 total: 4.65s remaining: 8.87s
344: learn: 17124773.5128614 total: 4.66s remaining: 8.85s
345: learn: 17123822.0085471 total: 4.67s remaining: 8.84s
346: learn: 17122604.8415169 total: 4.68s remaining: 8.82s
347: learn: 17121767.5370013 total: 4.7s remaining: 8.8s
348: learn: 17109471.1428348 total: 4.71s remaining: 8.79s
349: learn: 17092688.7777393 total: 4.73s remaining: 8.78s
350: learn: 17081854.5539987 total: 4.74s remaining: 8.76s
351: learn: 17081117.2220910 total: 4.75s remaining: 8.75s
352: learn: 17079431.1991192 total: 4.76s remaining: 8.73s
353: learn: 17065749.4676464 total: 4.78s remaining: 8.72s
354: learn: 17050839.2238400 total: 4.79s remaining: 8.7s
355: learn: 17050106.8831270 total: 4.8s remaining: 8.69s
356: learn: 17046033.2332065 total: 4.82s remaining: 8.67s
357: learn: 17043704.2415802 total: 4.83s remaining: 8.66s
358: learn: 17034226.2631681 total: 4.84s remaining: 8.64s
359: learn: 17019515.6806659 total: 4.85s remaining: 8.63s
360: learn: 17018472.9763746 total: 4.87s remaining: 8.61s
361: learn: 17017909.7121151 total: 4.88s remaining: 8.6s
362: learn: 17017463.3942640 total: 4.89s remaining: 8.58s
363: learn: 17016467.4317116 total: 4.9s remaining: 8.56s
364: learn: 17016320.3746025 total: 4.91s remaining: 8.55s
365: learn: 17014043.0108512 total: 4.93s remaining: 8.53s
366: learn: 17013536.3710672 total: 4.94s remaining: 8.51s
367: learn: 17011993.2014165 total: 4.95s remaining: 8.5s
368: learn: 17011849.5641841 total: 4.96s remaining: 8.48s
369: learn: 17011403.7126883 total: 4.97s remaining: 8.46s
370: learn: 17009763.5741945 total: 4.98s remaining: 8.45s
371: learn: 17009382.7519630 total: 4.99s remaining: 8.43s
372: learn: 17008464.7915054 total: 5s remaining: 8.41s
373: learn: 17008143.8161261 total: 5.01s remaining: 8.39s
374: learn: 16996814.2215431 total: 5.03s remaining: 8.38s
375: learn: 16996377.3351825 total: 5.04s remaining: 8.36s
376: learn: 16996037.5806770 total: 5.05s remaining: 8.34s
377: learn: 16991953.6478199 total: 5.06s remaining: 8.33s
378: learn: 16961328.6727692 total: 5.08s remaining: 8.32s
379: learn: 16957664.4831621 total: 5.09s remaining: 8.3s
380: learn: 16956856.4526881 total: 5.1s remaining: 8.29s
381: learn: 16947754.5891887 total: 5.12s remaining: 8.28s
382: learn: 16937471.3061729 total: 5.13s remaining: 8.26s
383: learn: 16910717.2697228 total: 5.14s remaining: 8.25s
384: learn: 16883021.8749316 total: 5.15s remaining: 8.23s
385: learn: 16874077.6620256 total: 5.17s remaining: 8.22s
386: learn: 16859663.0508862 total: 5.18s remaining: 8.2s
387: learn: 16843794.6984628 total: 5.19s remaining: 8.19s
388: learn: 16843670.2191430 total: 5.2s remaining: 8.17s
389: learn: 16833049.2556840 total: 5.21s remaining: 8.15s
390: learn: 16821522.4443567 total: 5.23s remaining: 8.14s
391: learn: 16818181.1766856 total: 5.24s remaining: 8.13s
392: learn: 16817749.5049150 total: 5.25s remaining: 8.12s
393: learn: 16817402.3614282 total: 5.27s remaining: 8.1s
394: learn: 16815679.7151727 total: 5.28s remaining: 8.09s
395: learn: 16810641.8717564 total: 5.29s remaining: 8.07s
396: learn: 16810291.1871768 total: 5.3s remaining: 8.06s
397: learn: 16808056.2422004 total: 5.32s remaining: 8.04s
398: learn: 16807804.2454334 total: 5.33s remaining: 8.03s
399: learn: 16799998.1957230 total: 5.34s remaining: 8.02s
400: learn: 16799220.2656080 total: 5.36s remaining: 8s
401: learn: 16798913.0252067 total: 5.37s remaining: 7.98s
402: learn: 16798319.2545577 total: 5.38s remaining: 7.97s
403: learn: 16796848.5752647 total: 5.39s remaining: 7.95s
404: learn: 16757656.8985529 total: 5.4s remaining: 7.93s
405: learn: 16745513.4381725 total: 5.41s remaining: 7.92s
406: learn: 16735416.8114581 total: 5.43s remaining: 7.91s
407: learn: 16734295.1424370 total: 5.44s remaining: 7.89s
408: learn: 16733140.3781664 total: 5.45s remaining: 7.88s
409: learn: 16723800.8980695 total: 5.46s remaining: 7.86s
410: learn: 16721200.9625357 total: 5.48s remaining: 7.85s
411: learn: 16720027.8472987 total: 5.49s remaining: 7.83s
412: learn: 16717199.5760035 total: 5.5s remaining: 7.82s
413: learn: 16713362.4492616 total: 5.52s remaining: 7.81s
414: learn: 16712806.0473182 total: 5.53s remaining: 7.8s
415: learn: 16711241.9902750 total: 5.55s remaining: 7.79s
416: learn: 16710626.7325455 total: 5.56s remaining: 7.77s
417: learn: 16644768.4542531 total: 5.57s remaining: 7.75s
418: learn: 16644403.8081224 total: 5.58s remaining: 7.74s
419: learn: 16644106.9601552 total: 5.59s remaining: 7.72s
420: learn: 16643628.6346956 total: 5.6s remaining: 7.71s
421: learn: 16640073.3813320 total: 5.62s remaining: 7.69s
422: learn: 16639549.7950808 total: 5.63s remaining: 7.68s
423: learn: 16639069.1006878 total: 5.64s remaining: 7.66s
424: learn: 16638481.2382327 total: 5.65s remaining: 7.65s
425: learn: 16638208.9073863 total: 5.66s remaining: 7.63s
426: learn: 16609090.9227109 total: 5.67s remaining: 7.61s
427: learn: 16607897.8537223 total: 5.69s remaining: 7.6s
428: learn: 16607613.0069443 total: 5.7s remaining: 7.58s
429: learn: 16603866.7848843 total: 5.71s remaining: 7.57s
430: learn: 16566652.4020620 total: 5.72s remaining: 7.55s
431: learn: 16566149.6048169 total: 5.74s remaining: 7.54s
432: learn: 16564672.1011733 total: 5.75s remaining: 7.53s
433: learn: 16564610.7741058 total: 5.76s remaining: 7.51s
434: learn: 16564198.8911273 total: 5.77s remaining: 7.49s
435: learn: 16559675.2968062 total: 5.78s remaining: 7.48s
436: learn: 16558753.2346339 total: 5.79s remaining: 7.46s
437: learn: 16558452.1907641 total: 5.8s remaining: 7.45s
438: learn: 16546587.2383006 total: 5.82s remaining: 7.43s
439: learn: 16543823.0847287 total: 5.83s remaining: 7.42s
440: learn: 16542126.8424469 total: 5.84s remaining: 7.4s
441: learn: 16541624.1632076 total: 5.85s remaining: 7.39s
442: learn: 16540326.5322872 total: 5.86s remaining: 7.37s
443: learn: 16530336.2084291 total: 5.88s remaining: 7.36s
444: learn: 16530167.9665629 total: 5.89s remaining: 7.34s
445: learn: 16528821.2477933 total: 5.9s remaining: 7.33s
446: learn: 16528766.2012617 total: 5.91s remaining: 7.31s
447: learn: 16518018.7193100 total: 5.92s remaining: 7.3s
448: learn: 16508723.6897544 total: 5.94s remaining: 7.29s
449: learn: 16508487.2637814 total: 5.95s remaining: 7.27s
450: learn: 16473955.6540161 total: 5.96s remaining: 7.26s
451: learn: 16453172.0203944 total: 5.98s remaining: 7.25s
452: learn: 16451483.6324413 total: 5.99s remaining: 7.23s
453: learn: 16451257.8036014 total: 6s remaining: 7.22s
454: learn: 16448369.9508352 total: 6.02s remaining: 7.21s
455: learn: 16446719.1385193 total: 6.03s remaining: 7.19s
456: learn: 16420736.1369659 total: 6.04s remaining: 7.18s
457: learn: 16420629.2824606 total: 6.05s remaining: 7.16s
458: learn: 16420336.6729748 total: 6.07s remaining: 7.15s
459: learn: 16420155.4584530 total: 6.08s remaining: 7.13s
460: learn: 16419734.8233202 total: 6.09s remaining: 7.12s
461: learn: 16419517.6225944 total: 6.1s remaining: 7.1s
462: learn: 16406145.7183320 total: 6.12s remaining: 7.09s
463: learn: 16404609.0651931 total: 6.13s remaining: 7.08s
464: learn: 16404332.0732862 total: 6.14s remaining: 7.07s
465: learn: 16404019.7507952 total: 6.16s remaining: 7.05s
466: learn: 16403507.0137349 total: 6.17s remaining: 7.04s
467: learn: 16402993.5886996 total: 6.18s remaining: 7.03s
468: learn: 16385955.8460101 total: 6.19s remaining: 7.01s
469: learn: 16373237.2004642 total: 6.2s remaining: 7s
470: learn: 16373038.3665164 total: 6.21s remaining: 6.98s
471: learn: 16372801.5860356 total: 6.23s remaining: 6.97s
472: learn: 16360759.6605520 total: 6.24s remaining: 6.95s
473: learn: 16360169.9657388 total: 6.25s remaining: 6.94s
474: learn: 16351841.0373273 total: 6.26s remaining: 6.92s
475: learn: 16349809.4004009 total: 6.28s remaining: 6.91s
476: learn: 16344483.1074475 total: 6.3s remaining: 6.9s
477: learn: 16340922.7262468 total: 6.31s remaining: 6.89s
478: learn: 16334736.4373107 total: 6.32s remaining: 6.88s
479: learn: 16334043.7402281 total: 6.33s remaining: 6.86s
480: learn: 16333745.0129155 total: 6.35s remaining: 6.85s
481: learn: 16332170.0024156 total: 6.36s remaining: 6.83s
482: learn: 16331680.4256261 total: 6.37s remaining: 6.82s
483: learn: 16321943.5880137 total: 6.38s remaining: 6.81s
484: learn: 16313566.1128530 total: 6.4s remaining: 6.79s
485: learn: 16312784.3783495 total: 6.41s remaining: 6.78s
486: learn: 16304256.8971602 total: 6.42s remaining: 6.76s
487: learn: 16299338.9360929 total: 6.43s remaining: 6.75s
488: learn: 16298399.2768748 total: 6.45s remaining: 6.74s
489: learn: 16282861.5959599 total: 6.46s remaining: 6.72s
490: learn: 16278027.7798172 total: 6.47s remaining: 6.71s
491: learn: 16262455.7433251 total: 6.49s remaining: 6.7s
492: learn: 16254609.6670435 total: 6.5s remaining: 6.68s
493: learn: 16250306.9197526 total: 6.51s remaining: 6.67s
494: learn: 16249855.9315045 total: 6.53s remaining: 6.66s
495: learn: 16248555.7562997 total: 6.54s remaining: 6.64s
496: learn: 16247555.1566330 total: 6.55s remaining: 6.63s
497: learn: 16247235.5993966 total: 6.56s remaining: 6.62s
498: learn: 16246264.7483105 total: 6.57s remaining: 6.6s
499: learn: 16246007.7491962 total: 6.58s remaining: 6.58s
500: learn: 16222867.6954421 total: 6.59s remaining: 6.57s
501: learn: 16222688.8853061 total: 6.61s remaining: 6.55s
502: learn: 16217885.3385915 total: 6.62s remaining: 6.54s
503: learn: 16217409.1580145 total: 6.63s remaining: 6.52s
504: learn: 16216838.3191240 total: 6.64s remaining: 6.51s
505: learn: 16216329.9777509 total: 6.65s remaining: 6.5s
506: learn: 16201534.4156055 total: 6.67s remaining: 6.48s
507: learn: 16198138.1904772 total: 6.68s remaining: 6.47s
508: learn: 16197904.2583705 total: 6.69s remaining: 6.45s
509: learn: 16193656.6407621 total: 6.7s remaining: 6.44s
510: learn: 16180805.8618897 total: 6.71s remaining: 6.42s
511: learn: 16176908.1769610 total: 6.73s remaining: 6.41s
512: learn: 16168261.0438871 total: 6.74s remaining: 6.4s
513: learn: 16167754.4165306 total: 6.75s remaining: 6.38s
514: learn: 16166295.0362243 total: 6.77s remaining: 6.37s
515: learn: 16166058.4053693 total: 6.78s remaining: 6.36s
516: learn: 16155412.7707338 total: 6.79s remaining: 6.34s
517: learn: 16152266.1742558 total: 6.8s remaining: 6.33s
518: learn: 16151552.8907870 total: 6.82s remaining: 6.32s
519: learn: 16140281.4351978 total: 6.83s remaining: 6.3s
520: learn: 16133450.4403783 total: 6.84s remaining: 6.29s
521: learn: 16132209.1334220 total: 6.85s remaining: 6.27s
522: learn: 16118104.6552795 total: 6.86s remaining: 6.26s
523: learn: 16108764.2393062 total: 6.87s remaining: 6.25s
524: learn: 16108234.0634605 total: 6.89s remaining: 6.23s
525: learn: 16107619.6760099 total: 6.9s remaining: 6.22s
526: learn: 16104870.7442280 total: 6.91s remaining: 6.2s
527: learn: 16102428.3934069 total: 6.92s remaining: 6.19s
528: learn: 16102157.2857565 total: 6.93s remaining: 6.17s
529: learn: 16101584.7403855 total: 6.95s remaining: 6.16s
530: learn: 16101480.3344969 total: 6.96s remaining: 6.15s
531: learn: 16100595.6548675 total: 6.97s remaining: 6.13s
532: learn: 16097511.0825233 total: 6.99s remaining: 6.12s
533: learn: 16096615.9743637 total: 7s remaining: 6.11s
534: learn: 16096369.6922988 total: 7.01s remaining: 6.09s
535: learn: 16095946.1647864 total: 7.02s remaining: 6.08s
536: learn: 16095637.6185090 total: 7.03s remaining: 6.06s
537: learn: 16094682.0243853 total: 7.04s remaining: 6.05s
538: learn: 16094291.9050311 total: 7.05s remaining: 6.03s
539: learn: 16093984.5280001 total: 7.07s remaining: 6.02s
540: learn: 16090374.6401334 total: 7.08s remaining: 6.01s
541: learn: 16090226.8772271 total: 7.09s remaining: 5.99s
542: learn: 16090050.1805201 total: 7.1s remaining: 5.98s
543: learn: 16069181.1048944 total: 7.12s remaining: 5.96s
544: learn: 16068504.9399291 total: 7.13s remaining: 5.95s
545: learn: 16068245.3744393 total: 7.14s remaining: 5.94s
546: learn: 16065773.4114093 total: 7.15s remaining: 5.92s
547: learn: 16051662.5046318 total: 7.17s remaining: 5.91s
548: learn: 16035327.2446945 total: 7.18s remaining: 5.9s
549: learn: 16035199.2858857 total: 7.2s remaining: 5.89s
550: learn: 16033842.9666151 total: 7.21s remaining: 5.88s
551: learn: 15995073.4381976 total: 7.22s remaining: 5.86s
552: learn: 15994812.5505379 total: 7.23s remaining: 5.85s
553: learn: 15994595.9921031 total: 7.24s remaining: 5.83s
554: learn: 15992248.3834318 total: 7.26s remaining: 5.82s
555: learn: 15992027.4484601 total: 7.27s remaining: 5.8s
556: learn: 15990566.0719983 total: 7.28s remaining: 5.79s
557: learn: 15985609.0920187 total: 7.29s remaining: 5.78s
558: learn: 15984517.8156083 total: 7.3s remaining: 5.76s
559: learn: 15958775.9803743 total: 7.32s remaining: 5.75s
560: learn: 15958166.8639855 total: 7.33s remaining: 5.73s
561: learn: 15949224.5334582 total: 7.34s remaining: 5.72s
562: learn: 15948769.9101270 total: 7.35s remaining: 5.71s
563: learn: 15930009.9576761 total: 7.36s remaining: 5.69s
564: learn: 15917439.6202170 total: 7.38s remaining: 5.68s
565: learn: 15908669.4567536 total: 7.39s remaining: 5.67s
566: learn: 15908084.2939630 total: 7.4s remaining: 5.65s
567: learn: 15906697.1590494 total: 7.41s remaining: 5.64s
568: learn: 15906522.4609846 total: 7.43s remaining: 5.63s
569: learn: 15906139.9138507 total: 7.44s remaining: 5.61s
570: learn: 15905855.0642382 total: 7.45s remaining: 5.6s
571: learn: 15897372.3501416 total: 7.46s remaining: 5.58s
572: learn: 15893536.4661240 total: 7.47s remaining: 5.57s
573: learn: 15893206.2810918 total: 7.49s remaining: 5.56s
574: learn: 15892918.2703602 total: 7.5s remaining: 5.54s
575: learn: 15892752.8029869 total: 7.51s remaining: 5.53s
576: learn: 15885169.7413434 total: 7.52s remaining: 5.51s
577: learn: 15884936.8745209 total: 7.53s remaining: 5.5s
578: learn: 15876877.1991641 total: 7.54s remaining: 5.49s
579: learn: 15865774.4061534 total: 7.56s remaining: 5.47s
580: learn: 15859212.9207966 total: 7.57s remaining: 5.46s
581: learn: 15858807.6511813 total: 7.58s remaining: 5.45s
582: learn: 15850129.9468116 total: 7.6s remaining: 5.43s
583: learn: 15845554.5689368 total: 7.61s remaining: 5.42s
584: learn: 15844986.6765475 total: 7.62s remaining: 5.41s
585: learn: 15844796.1180439 total: 7.63s remaining: 5.39s
586: learn: 15844586.1630771 total: 7.65s remaining: 5.38s
587: learn: 15827685.4584540 total: 7.66s remaining: 5.37s
588: learn: 15826910.6044821 total: 7.68s remaining: 5.36s
589: learn: 15824060.9875073 total: 7.69s remaining: 5.34s
590: learn: 15818523.6912985 total: 7.7s remaining: 5.33s
591: learn: 15810640.6921394 total: 7.72s remaining: 5.32s
592: learn: 15795481.4197185 total: 7.73s remaining: 5.3s
593: learn: 15795256.4491006 total: 7.74s remaining: 5.29s
594: learn: 15784420.7363473 total: 7.76s remaining: 5.28s
595: learn: 15784290.1819258 total: 7.78s remaining: 5.27s
596: learn: 15783955.0773924 total: 7.79s remaining: 5.26s
597: learn: 15781518.5372107 total: 7.8s remaining: 5.25s
598: learn: 15779547.4210947 total: 7.82s remaining: 5.24s
599: learn: 15777334.3663340 total: 7.83s remaining: 5.22s
600: learn: 15774774.7721883 total: 7.85s remaining: 5.21s
601: learn: 15774672.2356339 total: 7.86s remaining: 5.2s
602: learn: 15773528.0736833 total: 7.87s remaining: 5.18s
603: learn: 15768721.3649454 total: 7.89s remaining: 5.17s
604: learn: 15768502.0877019 total: 7.9s remaining: 5.16s
605: learn: 15768057.4929247 total: 7.92s remaining: 5.15s
606: learn: 15767950.4285043 total: 7.93s remaining: 5.14s
607: learn: 15767445.1324607 total: 7.95s remaining: 5.12s
608: learn: 15767269.0628064 total: 7.96s remaining: 5.11s
609: learn: 15767020.8174624 total: 7.97s remaining: 5.1s
610: learn: 15762309.5160245 total: 7.99s remaining: 5.09s
611: learn: 15757527.7718093 total: 8s remaining: 5.07s
612: learn: 15757150.7731734 total: 8.02s remaining: 5.06s
613: learn: 15756885.6252756 total: 8.03s remaining: 5.05s
614: learn: 15755584.6816303 total: 8.05s remaining: 5.04s
615: learn: 15755485.6737331 total: 8.06s remaining: 5.02s
616: learn: 15754432.0517599 total: 8.07s remaining: 5.01s
617: learn: 15744535.8203508 total: 8.09s remaining: 5s
618: learn: 15740683.5538600 total: 8.1s remaining: 4.99s
619: learn: 15736903.5667213 total: 8.12s remaining: 4.97s
620: learn: 15736355.4210963 total: 8.13s remaining: 4.96s
621: learn: 15729940.4032081 total: 8.15s remaining: 4.95s
622: learn: 15729775.7542976 total: 8.17s remaining: 4.94s
623: learn: 15726578.4125003 total: 8.18s remaining: 4.93s
624: learn: 15713451.5317183 total: 8.2s remaining: 4.92s
625: learn: 15712116.7478338 total: 8.22s remaining: 4.91s
626: learn: 15712039.0336448 total: 8.23s remaining: 4.9s
627: learn: 15711687.4136682 total: 8.25s remaining: 4.89s
628: learn: 15711298.6681597 total: 8.27s remaining: 4.88s
629: learn: 15705228.0021081 total: 8.28s remaining: 4.86s
630: learn: 15705060.0247650 total: 8.29s remaining: 4.85s
631: learn: 15702513.1910574 total: 8.31s remaining: 4.84s
632: learn: 15702203.6145508 total: 8.32s remaining: 4.82s
633: learn: 15698975.2951288 total: 8.33s remaining: 4.81s
634: learn: 15694674.2341421 total: 8.34s remaining: 4.8s
635: learn: 15686913.9104937 total: 8.36s remaining: 4.78s
636: learn: 15686809.9586513 total: 8.37s remaining: 4.77s
637: learn: 15685604.3787689 total: 8.38s remaining: 4.76s
638: learn: 15685081.4917552 total: 8.4s remaining: 4.74s
639: learn: 15676541.2826685 total: 8.41s remaining: 4.73s
640: learn: 15672855.0180760 total: 8.42s remaining: 4.72s
641: learn: 15666780.7593096 total: 8.44s remaining: 4.7s
642: learn: 15659438.3508408 total: 8.45s remaining: 4.69s
643: learn: 15653755.4598701 total: 8.46s remaining: 4.68s
644: learn: 15652034.4638985 total: 8.48s remaining: 4.67s
645: learn: 15645095.6489597 total: 8.49s remaining: 4.65s
646: learn: 15641226.9420905 total: 8.5s remaining: 4.64s
647: learn: 15639833.9184524 total: 8.52s remaining: 4.63s
648: learn: 15639581.1651510 total: 8.53s remaining: 4.61s
649: learn: 15635923.3848062 total: 8.54s remaining: 4.6s
650: learn: 15635813.1152459 total: 8.55s remaining: 4.58s
651: learn: 15635469.3555938 total: 8.56s remaining: 4.57s
652: learn: 15635363.9174910 total: 8.57s remaining: 4.56s
653: learn: 15633936.7433448 total: 8.59s remaining: 4.54s
654: learn: 15633839.2271448 total: 8.6s remaining: 4.53s
655: learn: 15633735.8610291 total: 8.61s remaining: 4.52s
656: learn: 15633309.8063070 total: 8.63s remaining: 4.5s
657: learn: 15632683.8986677 total: 8.64s remaining: 4.49s
658: learn: 15632461.2639014 total: 8.65s remaining: 4.48s
659: learn: 15627123.1765533 total: 8.66s remaining: 4.46s
660: learn: 15626996.0787558 total: 8.68s remaining: 4.45s
661: learn: 15624291.0204091 total: 8.69s remaining: 4.44s
662: learn: 15617684.3098363 total: 8.71s remaining: 4.42s
663: learn: 15611967.2176796 total: 8.72s remaining: 4.41s
664: learn: 15598472.2546786 total: 8.73s remaining: 4.4s
665: learn: 15597526.0470563 total: 8.74s remaining: 4.38s
666: learn: 15597430.3920481 total: 8.75s remaining: 4.37s
667: learn: 15596422.7059295 total: 8.77s remaining: 4.36s
668: learn: 15591400.2242411 total: 8.78s remaining: 4.34s
669: learn: 15585199.5277811 total: 8.79s remaining: 4.33s
670: learn: 15585003.5063693 total: 8.8s remaining: 4.32s
671: learn: 15578765.7193891 total: 8.81s remaining: 4.3s
672: learn: 15577252.0151364 total: 8.83s remaining: 4.29s
673: learn: 15576511.8797514 total: 8.84s remaining: 4.28s
674: learn: 15576120.7606092 total: 8.85s remaining: 4.26s
675: learn: 15574398.5273782 total: 8.86s remaining: 4.25s
676: learn: 15565660.7493905 total: 8.88s remaining: 4.24s
677: learn: 15561009.3437211 total: 8.89s remaining: 4.22s
678: learn: 15548878.4770401 total: 8.9s remaining: 4.21s
679: learn: 15527713.9632219 total: 8.91s remaining: 4.19s
680: learn: 15519745.2151864 total: 8.93s remaining: 4.18s
681: learn: 15519391.2760902 total: 8.94s remaining: 4.17s
682: learn: 15514461.8611265 total: 8.95s remaining: 4.15s
683: learn: 15514296.1001141 total: 8.96s remaining: 4.14s
684: learn: 15514204.8658979 total: 8.97s remaining: 4.13s
685: learn: 15513977.3554214 total: 8.98s remaining: 4.11s
686: learn: 15513906.5046745 total: 9s remaining: 4.1s
687: learn: 15513701.8112778 total: 9.01s remaining: 4.09s
688: learn: 15513602.4959013 total: 9.02s remaining: 4.07s
689: learn: 15513510.7910896 total: 9.03s remaining: 4.06s
690: learn: 15513352.2070048 total: 9.04s remaining: 4.04s
691: learn: 15513238.8204588 total: 9.05s remaining: 4.03s
692: learn: 15513154.0618557 total: 9.06s remaining: 4.01s
693: learn: 15512878.9114412 total: 9.07s remaining: 4s
694: learn: 15509248.2055515 total: 9.09s remaining: 3.99s
695: learn: 15508734.7327170 total: 9.1s remaining: 3.97s
696: learn: 15508495.7881550 total: 9.11s remaining: 3.96s
697: learn: 15508349.6872134 total: 9.12s remaining: 3.95s
698: learn: 15508190.6588965 total: 9.13s remaining: 3.93s
699: learn: 15508018.8419773 total: 9.14s remaining: 3.92s
700: learn: 15507826.3791202 total: 9.16s remaining: 3.9s
701: learn: 15507705.8100928 total: 9.17s remaining: 3.89s
702: learn: 15507533.3512682 total: 9.18s remaining: 3.88s
703: learn: 15501571.2913355 total: 9.19s remaining: 3.87s
704: learn: 15495921.0773672 total: 9.21s remaining: 3.85s
705: learn: 15495385.0875416 total: 9.22s remaining: 3.84s
706: learn: 15495191.8032918 total: 9.23s remaining: 3.83s
707: learn: 15494128.9589635 total: 9.24s remaining: 3.81s
708: learn: 15493806.9566177 total: 9.26s remaining: 3.8s
709: learn: 15493694.0465547 total: 9.27s remaining: 3.78s
710: learn: 15493305.1729869 total: 9.28s remaining: 3.77s
711: learn: 15487948.0399475 total: 9.3s remaining: 3.76s
712: learn: 15487843.0916850 total: 9.31s remaining: 3.75s
713: learn: 15482765.7669785 total: 9.32s remaining: 3.73s
714: learn: 15474767.3580796 total: 9.33s remaining: 3.72s
715: learn: 15472407.8166003 total: 9.35s remaining: 3.71s
716: learn: 15467592.3874842 total: 9.36s remaining: 3.69s
717: learn: 15467435.4901525 total: 9.37s remaining: 3.68s
718: learn: 15462871.2869120 total: 9.38s remaining: 3.67s
719: learn: 15462771.0380185 total: 9.39s remaining: 3.65s
720: learn: 15462475.2715024 total: 9.4s remaining: 3.64s
721: learn: 15454885.7938423 total: 9.42s remaining: 3.63s
722: learn: 15450557.5824215 total: 9.43s remaining: 3.61s
723: learn: 15446455.2317749 total: 9.44s remaining: 3.6s
724: learn: 15445711.0004476 total: 9.46s remaining: 3.59s
725: learn: 15441822.5331613 total: 9.47s remaining: 3.57s
726: learn: 15441182.6843715 total: 9.48s remaining: 3.56s
727: learn: 15441088.8915881 total: 9.49s remaining: 3.55s
728: learn: 15441002.5272406 total: 9.51s remaining: 3.53s
729: learn: 15440884.2830869 total: 9.52s remaining: 3.52s
730: learn: 15440734.2579995 total: 9.53s remaining: 3.51s
731: learn: 15440611.8887909 total: 9.54s remaining: 3.49s
732: learn: 15440249.4221271 total: 9.55s remaining: 3.48s
733: learn: 15440158.8154476 total: 9.56s remaining: 3.46s
734: learn: 15436472.7845071 total: 9.57s remaining: 3.45s
735: learn: 15433672.4484876 total: 9.59s remaining: 3.44s
736: learn: 15433490.1840146 total: 9.6s remaining: 3.43s
737: learn: 15433308.6881010 total: 9.61s remaining: 3.41s
738: learn: 15433042.7409848 total: 9.63s remaining: 3.4s
739: learn: 15432541.8769518 total: 9.64s remaining: 3.39s
740: learn: 15431856.6761047 total: 9.66s remaining: 3.38s
741: learn: 15431804.7359345 total: 9.67s remaining: 3.36s
742: learn: 15427355.2392047 total: 9.68s remaining: 3.35s
743: learn: 15427218.1028185 total: 9.69s remaining: 3.33s
744: learn: 15424332.1093472 total: 9.7s remaining: 3.32s
745: learn: 15388321.8033125 total: 9.71s remaining: 3.31s
746: learn: 15377267.9048803 total: 9.73s remaining: 3.29s
747: learn: 15374625.6198420 total: 9.74s remaining: 3.28s
748: learn: 15370386.0426691 total: 9.75s remaining: 3.27s
749: learn: 15359901.4299089 total: 9.77s remaining: 3.25s
750: learn: 15358774.7332579 total: 9.78s remaining: 3.24s
751: learn: 15358651.8711020 total: 9.79s remaining: 3.23s
752: learn: 15358300.8764559 total: 9.8s remaining: 3.21s
753: learn: 15357884.8170886 total: 9.81s remaining: 3.2s
754: learn: 15357643.0994172 total: 9.83s remaining: 3.19s
755: learn: 15357565.0887636 total: 9.84s remaining: 3.17s
756: learn: 15351820.0777339 total: 9.85s remaining: 3.16s
757: learn: 15351414.2517094 total: 9.86s remaining: 3.15s
758: learn: 15349501.7532204 total: 9.88s remaining: 3.14s
759: learn: 15348526.7586048 total: 9.89s remaining: 3.12s
760: learn: 15348352.7244253 total: 9.9s remaining: 3.11s
761: learn: 15347292.4488773 total: 9.91s remaining: 3.1s
762: learn: 15347207.8865499 total: 9.93s remaining: 3.08s
763: learn: 15342959.4790246 total: 9.94s remaining: 3.07s
764: learn: 15342697.6483068 total: 9.95s remaining: 3.06s
765: learn: 15313028.1511576 total: 9.96s remaining: 3.04s
766: learn: 15310947.6382018 total: 9.98s remaining: 3.03s
767: learn: 15263340.9642127 total: 9.99s remaining: 3.02s
768: learn: 15259807.2026083 total: 10s remaining: 3s
769: learn: 15259504.1148296 total: 10s remaining: 2.99s
770: learn: 15259395.2637694 total: 10s remaining: 2.98s
771: learn: 15256045.7141942 total: 10s remaining: 2.96s
772: learn: 15252870.9021417 total: 10.1s remaining: 2.95s
773: learn: 15248430.3201074 total: 10.1s remaining: 2.94s
774: learn: 15246752.4177458 total: 10.1s remaining: 2.93s
775: learn: 15245960.1417687 total: 10.1s remaining: 2.91s
776: learn: 15245917.6645107 total: 10.1s remaining: 2.9s
777: learn: 15245817.5185452 total: 10.1s remaining: 2.89s
778: learn: 15245619.8351855 total: 10.1s remaining: 2.87s
779: learn: 15244869.5667520 total: 10.1s remaining: 2.86s
780: learn: 15244818.8943236 total: 10.2s remaining: 2.85s
781: learn: 15244254.3637038 total: 10.2s remaining: 2.83s
782: learn: 15243818.2939855 total: 10.2s remaining: 2.82s
783: learn: 15243668.1645179 total: 10.2s remaining: 2.81s
784: learn: 15240656.8617467 total: 10.2s remaining: 2.8s
785: learn: 15237802.6637690 total: 10.2s remaining: 2.78s
786: learn: 15235097.3769887 total: 10.2s remaining: 2.77s
787: learn: 15231063.4576018 total: 10.3s remaining: 2.76s
788: learn: 15224406.4239600 total: 10.3s remaining: 2.74s
789: learn: 15220791.2846445 total: 10.3s remaining: 2.73s
790: learn: 15220221.4180094 total: 10.3s remaining: 2.72s
791: learn: 15220121.7499013 total: 10.3s remaining: 2.71s
792: learn: 15218396.9325757 total: 10.3s remaining: 2.69s
793: learn: 15213830.8844557 total: 10.3s remaining: 2.68s
794: learn: 15212644.4009126 total: 10.3s remaining: 2.67s
795: learn: 15212570.5272286 total: 10.4s remaining: 2.65s
796: learn: 15172874.1552397 total: 10.4s remaining: 2.64s
797: learn: 15164671.3501787 total: 10.4s remaining: 2.63s
798: learn: 15162711.8871221 total: 10.4s remaining: 2.62s
799: learn: 15162618.0050229 total: 10.4s remaining: 2.6s
800: learn: 15161186.8924011 total: 10.4s remaining: 2.59s
801: learn: 15160994.4738412 total: 10.4s remaining: 2.58s
802: learn: 15159385.3831268 total: 10.4s remaining: 2.56s
803: learn: 15159166.1576231 total: 10.5s remaining: 2.55s
804: learn: 15156764.1801770 total: 10.5s remaining: 2.54s
805: learn: 15146691.8394282 total: 10.5s remaining: 2.52s
806: learn: 15146533.6706853 total: 10.5s remaining: 2.51s
807: learn: 15146408.7773292 total: 10.5s remaining: 2.5s
808: learn: 15142359.7678728 total: 10.5s remaining: 2.48s
809: learn: 15142322.1248825 total: 10.5s remaining: 2.47s
810: learn: 15132770.1153732 total: 10.5s remaining: 2.46s
811: learn: 15101480.9924963 total: 10.6s remaining: 2.44s
812: learn: 15101445.1875248 total: 10.6s remaining: 2.43s
813: learn: 15075376.4419388 total: 10.6s remaining: 2.42s
814: learn: 15073160.0820287 total: 10.6s remaining: 2.4s
815: learn: 15072725.5140996 total: 10.6s remaining: 2.39s
816: learn: 15072585.7018342 total: 10.6s remaining: 2.38s
817: learn: 15071522.0001919 total: 10.6s remaining: 2.37s
818: learn: 15071382.2097110 total: 10.6s remaining: 2.35s
819: learn: 15071301.2886091 total: 10.7s remaining: 2.34s
820: learn: 15071025.2992144 total: 10.7s remaining: 2.33s
821: learn: 15069498.2268762 total: 10.7s remaining: 2.31s
822: learn: 15061575.7065075 total: 10.7s remaining: 2.3s
823: learn: 15061416.4068476 total: 10.7s remaining: 2.29s
824: learn: 15060945.6687130 total: 10.7s remaining: 2.27s
825: learn: 15051099.8538783 total: 10.7s remaining: 2.26s
826: learn: 15050450.9663299 total: 10.7s remaining: 2.25s
827: learn: 15049722.9751983 total: 10.8s remaining: 2.23s
828: learn: 15049467.7452535 total: 10.8s remaining: 2.22s
829: learn: 15049412.7697933 total: 10.8s remaining: 2.21s
830: learn: 15048891.7740041 total: 10.8s remaining: 2.19s
831: learn: 15048043.0994998 total: 10.8s remaining: 2.18s
832: learn: 15046697.3368860 total: 10.8s remaining: 2.17s
833: learn: 15038272.8803419 total: 10.8s remaining: 2.15s
834: learn: 15034639.9951102 total: 10.8s remaining: 2.14s
835: learn: 15030153.8614245 total: 10.9s remaining: 2.13s
836: learn: 15027964.0190757 total: 10.9s remaining: 2.12s
837: learn: 15023890.1409211 total: 10.9s remaining: 2.1s
838: learn: 15022954.0613643 total: 10.9s remaining: 2.09s
839: learn: 15022653.7321874 total: 10.9s remaining: 2.08s
840: learn: 15021763.5899870 total: 10.9s remaining: 2.06s
841: learn: 15021552.9666208 total: 10.9s remaining: 2.05s
842: learn: 15017213.3112838 total: 10.9s remaining: 2.04s
843: learn: 15006868.9919636 total: 11s remaining: 2.02s
844: learn: 15006047.4873296 total: 11s remaining: 2.01s
845: learn: 15003167.3995596 total: 11s remaining: 2s
846: learn: 15001516.1719277 total: 11s remaining: 1.99s
847: learn: 15000023.6971343 total: 11s remaining: 1.97s
848: learn: 14996097.9901016 total: 11s remaining: 1.96s
849: learn: 14995809.9617414 total: 11s remaining: 1.95s
850: learn: 14991694.0680204 total: 11s remaining: 1.93s
851: learn: 14990806.5441048 total: 11.1s remaining: 1.92s
852: learn: 14990539.4146062 total: 11.1s remaining: 1.91s
853: learn: 14990428.7975864 total: 11.1s remaining: 1.89s
854: learn: 14989061.7567162 total: 11.1s remaining: 1.88s
855: learn: 14983131.0103419 total: 11.1s remaining: 1.87s
856: learn: 14982655.3316759 total: 11.1s remaining: 1.86s
857: learn: 14977099.0608273 total: 11.1s remaining: 1.84s
858: learn: 14976713.9058693 total: 11.2s remaining: 1.83s
859: learn: 14976613.3364184 total: 11.2s remaining: 1.82s
860: learn: 14964115.6999829 total: 11.2s remaining: 1.81s
861: learn: 14961152.7626425 total: 11.2s remaining: 1.79s
862: learn: 14960316.8698796 total: 11.2s remaining: 1.78s
863: learn: 14960206.5805103 total: 11.2s remaining: 1.77s
864: learn: 14948350.2065232 total: 11.2s remaining: 1.75s
865: learn: 14948237.8225238 total: 11.3s remaining: 1.74s
866: learn: 14948145.1280412 total: 11.3s remaining: 1.73s
867: learn: 14947479.9936319 total: 11.3s remaining: 1.72s
868: learn: 14946706.9144290 total: 11.3s remaining: 1.7s
869: learn: 14946008.4529886 total: 11.3s remaining: 1.69s
870: learn: 14938382.9733130 total: 11.3s remaining: 1.68s
871: learn: 14935923.0018589 total: 11.3s remaining: 1.66s
872: learn: 14935763.9386719 total: 11.4s remaining: 1.65s
873: learn: 14935390.9032799 total: 11.4s remaining: 1.64s
874: learn: 14924136.4999000 total: 11.4s remaining: 1.63s
875: learn: 14923231.4975181 total: 11.4s remaining: 1.61s
876: learn: 14920764.7489123 total: 11.4s remaining: 1.6s
877: learn: 14920619.6869935 total: 11.4s remaining: 1.59s
878: learn: 14920259.9887151 total: 11.4s remaining: 1.57s
879: learn: 14918671.6063618 total: 11.4s remaining: 1.56s
880: learn: 14909484.3446534 total: 11.5s remaining: 1.55s
881: learn: 14909331.3722806 total: 11.5s remaining: 1.53s
882: learn: 14909000.4744294 total: 11.5s remaining: 1.52s
883: learn: 14907810.4215534 total: 11.5s remaining: 1.51s
884: learn: 14907739.9399244 total: 11.5s remaining: 1.5s
885: learn: 14907643.8234156 total: 11.5s remaining: 1.48s
886: learn: 14902234.6414918 total: 11.5s remaining: 1.47s
887: learn: 14899639.1808572 total: 11.5s remaining: 1.46s
888: learn: 14898572.6302420 total: 11.6s remaining: 1.44s
889: learn: 14898481.9011232 total: 11.6s remaining: 1.43s
890: learn: 14898396.0569341 total: 11.6s remaining: 1.42s
891: learn: 14897825.8306216 total: 11.6s remaining: 1.4s
892: learn: 14897667.5383103 total: 11.6s remaining: 1.39s
893: learn: 14894977.2967186 total: 11.6s remaining: 1.38s
894: learn: 14894451.1015405 total: 11.6s remaining: 1.36s
895: learn: 14894302.6357933 total: 11.6s remaining: 1.35s
896: learn: 14892042.1460828 total: 11.7s remaining: 1.34s
897: learn: 14891913.2181914 total: 11.7s remaining: 1.32s
898: learn: 14891548.8875063 total: 11.7s remaining: 1.31s
899: learn: 14891535.2806629 total: 11.7s remaining: 1.3s
900: learn: 14885531.4554658 total: 11.7s remaining: 1.29s
901: learn: 14885300.5490787 total: 11.7s remaining: 1.27s
902: learn: 14882237.8064495 total: 11.7s remaining: 1.26s
903: learn: 14871534.0201501 total: 11.7s remaining: 1.25s
904: learn: 14870231.5883229 total: 11.8s remaining: 1.23s
905: learn: 14870176.1224648 total: 11.8s remaining: 1.22s
906: learn: 14869680.0980501 total: 11.8s remaining: 1.21s
907: learn: 14869457.0180442 total: 11.8s remaining: 1.2s
908: learn: 14868933.3692698 total: 11.8s remaining: 1.18s
909: learn: 14863961.6816683 total: 11.8s remaining: 1.17s
910: learn: 14863914.5911026 total: 11.8s remaining: 1.16s
911: learn: 14855476.3946477 total: 11.8s remaining: 1.14s
912: learn: 14855166.2145584 total: 11.9s remaining: 1.13s
913: learn: 14850934.1124148 total: 11.9s remaining: 1.12s
914: learn: 14846483.1667184 total: 11.9s remaining: 1.1s
915: learn: 14837736.3329095 total: 11.9s remaining: 1.09s
916: learn: 14837595.6179847 total: 11.9s remaining: 1.08s
917: learn: 14830817.0386636 total: 11.9s remaining: 1.06s
918: learn: 14830573.0969752 total: 11.9s remaining: 1.05s
919: learn: 14830470.0025192 total: 12s remaining: 1.04s
920: learn: 14829815.6285131 total: 12s remaining: 1.03s
921: learn: 14825434.2189552 total: 12s remaining: 1.01s
922: learn: 14819445.0181126 total: 12s remaining: 1s
923: learn: 14819398.6279561 total: 12s remaining: 988ms
924: learn: 14818813.5923928 total: 12s remaining: 975ms
925: learn: 14815956.9177135 total: 12s remaining: 962ms
926: learn: 14815788.2679741 total: 12s remaining: 948ms
927: learn: 14815448.9260298 total: 12.1s remaining: 935ms
928: learn: 14815074.5372959 total: 12.1s remaining: 923ms
929: learn: 14814229.2585638 total: 12.1s remaining: 910ms
930: learn: 14804039.0241152 total: 12.1s remaining: 897ms
931: learn: 14803929.0738285 total: 12.1s remaining: 883ms
932: learn: 14803822.0317935 total: 12.1s remaining: 870ms
933: learn: 14802847.7620639 total: 12.1s remaining: 857ms
934: learn: 14802644.0143811 total: 12.1s remaining: 844ms
935: learn: 14801390.4240818 total: 12.2s remaining: 831ms
936: learn: 14801273.4225706 total: 12.2s remaining: 818ms
937: learn: 14801021.8599058 total: 12.2s remaining: 805ms
938: learn: 14800715.3029627 total: 12.2s remaining: 792ms
939: learn: 14799551.6406369 total: 12.2s remaining: 779ms
940: learn: 14795598.5613345 total: 12.2s remaining: 766ms
941: learn: 14794465.0278834 total: 12.2s remaining: 753ms
942: learn: 14794259.7563387 total: 12.2s remaining: 740ms
943: learn: 14794156.6214413 total: 12.3s remaining: 727ms
944: learn: 14792982.4344262 total: 12.3s remaining: 714ms
945: learn: 14792468.8012658 total: 12.3s remaining: 701ms
946: learn: 14792139.9923168 total: 12.3s remaining: 688ms
947: learn: 14786295.7938911 total: 12.3s remaining: 675ms
948: learn: 14783272.8762359 total: 12.3s remaining: 662ms
949: learn: 14782689.5513664 total: 12.3s remaining: 649ms
950: learn: 14782664.1266181 total: 12.3s remaining: 636ms
951: learn: 14774339.7873426 total: 12.4s remaining: 623ms
952: learn: 14769038.5403572 total: 12.4s remaining: 610ms
953: learn: 14761312.5028488 total: 12.4s remaining: 597ms
954: learn: 14760414.3496721 total: 12.4s remaining: 584ms
955: learn: 14752950.6860631 total: 12.4s remaining: 571ms
956: learn: 14752691.2767919 total: 12.4s remaining: 558ms
957: learn: 14752196.9268404 total: 12.4s remaining: 545ms
958: learn: 14750149.4071752 total: 12.5s remaining: 532ms
959: learn: 14749691.8632556 total: 12.5s remaining: 519ms
960: learn: 14749496.6164671 total: 12.5s remaining: 507ms
961: learn: 14749338.8909588 total: 12.5s remaining: 494ms
962: learn: 14749291.1143099 total: 12.5s remaining: 481ms
963: learn: 14739582.8672605 total: 12.5s remaining: 468ms
964: learn: 14739542.4442574 total: 12.5s remaining: 455ms
965: learn: 14739306.4391584 total: 12.5s remaining: 442ms
966: learn: 14708630.7344942 total: 12.6s remaining: 429ms
967: learn: 14680984.5320050 total: 12.6s remaining: 416ms
968: learn: 14677319.7390898 total: 12.6s remaining: 403ms
969: learn: 14677109.6650390 total: 12.6s remaining: 389ms
970: learn: 14676355.6242387 total: 12.6s remaining: 376ms
971: learn: 14667109.8761175 total: 12.6s remaining: 363ms
972: learn: 14643934.7657464 total: 12.6s remaining: 350ms
973: learn: 14642633.8347823 total: 12.6s remaining: 337ms
974: learn: 14642167.8071045 total: 12.7s remaining: 324ms
975: learn: 14641845.3365852 total: 12.7s remaining: 311ms
976: learn: 14640306.5144587 total: 12.7s remaining: 298ms
977: learn: 14640184.9147582 total: 12.7s remaining: 286ms
978: learn: 14640008.8483425 total: 12.7s remaining: 273ms
979: learn: 14636344.5519428 total: 12.7s remaining: 260ms
980: learn: 14633576.2663352 total: 12.7s remaining: 247ms
981: learn: 14624796.2134142 total: 12.7s remaining: 234ms
982: learn: 14624250.6774943 total: 12.8s remaining: 221ms
983: learn: 14615917.0650799 total: 12.8s remaining: 208ms
984: learn: 14615782.8388140 total: 12.8s remaining: 195ms
985: learn: 14612724.2754075 total: 12.8s remaining: 182ms
986: learn: 14609973.2837772 total: 12.8s remaining: 169ms
987: learn: 14605203.8050795 total: 12.8s remaining: 156ms
988: learn: 14605011.6874159 total: 12.8s remaining: 143ms
989: learn: 14594552.7887146 total: 12.8s remaining: 130ms
990: learn: 14591881.9316489 total: 12.9s remaining: 117ms
991: learn: 14581962.3358039 total: 12.9s remaining: 104ms
992: learn: 14581829.5331587 total: 12.9s remaining: 90.8ms
993: learn: 14581669.7347033 total: 12.9s remaining: 77.9ms
994: learn: 14577373.3119596 total: 12.9s remaining: 64.9ms
995: learn: 14577196.6327960 total: 12.9s remaining: 51.9ms
996: learn: 14577122.1536884 total: 12.9s remaining: 38.9ms
997: learn: 14574702.1336653 total: 12.9s remaining: 25.9ms
998: learn: 14574660.6060510 total: 13s remaining: 13ms
999: learn: 14574625.9856659 total: 13s remaining: 0us
Pipeline(steps=[('preprocessor', ColumnTransformer(transformers=[('num', StandardScaler(), ['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']), ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999), ['region', 'building_type', 'object_type'])])), ('model', <catboost.core.CatBoostRegressor object at 0x7be8319969b0>)])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('preprocessor', ColumnTransformer(transformers=[('num', StandardScaler(), ['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']), ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999), ['region', 'building_type', 'object_type'])])), ('model', <catboost.core.CatBoostRegressor object at 0x7be8319969b0>)])
ColumnTransformer(transformers=[('num', StandardScaler(), ['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']), ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999), ['region', 'building_type', 'object_type'])])
['geo_lat', 'geo_lon', 'level', 'levels', 'rooms', 'area', 'kitchen_area']
StandardScaler()
['region', 'building_type', 'object_type']
OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=99999999)
<catboost.core.CatBoostRegressor object at 0x7be8319969b0>
= {
model_params "geo_lat":56.327686,
"geo_lon":43.928062,
"region":2871.000000,
"building_type":1.000000,
"level":8.000000,
"levels":10.000000,
"rooms":2.000000,
"area":56.000000,
"kitchen_area":8.500000,
"object_type":1.000000,
"floor_level":0,
}= pd.DataFrame(model_params, index=[0]) df_pred
pipeline.predict(df_pred)
array([3414175.89042869])
= pipeline.predict(X_test)
predictions
= {}
metrics "mae"] = mean_absolute_error(y_test, predictions)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions)
metrics["mse"] = mean_squared_error(y_test, predictions)
metrics[
metrics
{'mae': 1447931.3425270966,
'mape': 1.6294525363466488e+18,
'mse': 281898017343454.56}
# Работаем с MLflow локально
= "127.0.0.1"
TRACKING_SERVER_HOST = 5000
TRACKING_SERVER_PORT
= f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"
registry_uri = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"
tracking_uri
mlflow.set_tracking_uri(tracking_uri)
mlflow.set_registry_uri(registry_uri)
# название тестового эксперимента, запуска (run) внутри него, имени, под которым модель будет регистрироваться
= "estate_project"
EXPERIMENT_NAME = "baseline model"
RUN_NAME = "estate_model_rf" REGISTRY_MODEL_NAME
Логируем вручную
# Обязательно логируем сигнатуру модели и пример входных данных. Подготовим их
from mlflow.models import infer_signature
= infer_signature(model_input = X_train.head(5))
signature = X_train.head(5) input_example
# Будем логировать requirements и артефакт - текстовый файл
= 'requirements.txt'
req_file = 'comment.txt' art
# Параметры, котороые будут залогированы, можем задавать вручную или полностью взять из модели
#params_dict = {'n_estimators': 10, 'max_depth': 10}
= pipeline.get_params() params_dict
# Когда создаем новый эксперимент, то:
= mlflow.create_experiment(EXPERIMENT_NAME)
experiment_id
# Впоследствии. чтобы добавлять запуски в этот же эксепримент мы должны получить его id:
#experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(pipeline, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)
mlflow.log_artifact(art)
mlflow.log_params(params_dict)
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
Удаление runs, experiments
Использовать осторожно
= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id #mlflow.delete_experiment(experiment_id)
mlflow.search_runs(#experiment_ids=[experiment_id],
=[EXPERIMENT_NAME],
experiment_names# filter_string='status = "FAILED"'
#filter_string='metrics.mae > 1'
)
#mlflow.delete_run('74d2a7a40c07413c9cf65df841164356')
Автологирование
После включения будет срабатывать на каждом обучении модели (на методе fit()).
Есть плюсы, есть и минусы. Предлагается сделать прогон и сравнить с результатами вручную
mlflow.sklearn.autolog()
with mlflow.start_run(run_name='auto', experiment_id=experiment_id) as run:
pipeline.fit(X_train, y_train)
# Отключаем автологирование
=True) mlflow.sklearn.autolog(disable
Model #2
Обучим вторую "маленькую" модель
= RandomForestRegressor(n_estimators=10, max_depth=6) regressor2
= Pipeline(steps=[('preprocessor', preprocessor),
pipeline 'model', regressor2)])
(
pipeline.fit(X_train, y_train)
= pipeline.predict(X_test)
predictions = {}
metrics "mae"] = mean_absolute_error(y_test, predictions)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions)
metrics["mse"] = mean_squared_error(y_test, predictions)
metrics[
metrics
# !!! Проверить название прогона а также все логируемые параметры и артефакты, что они соответствуют второй "маленькой" модели.
= 'smaller_model'
RUN_NAME
= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(pipeline, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)
mlflow.log_artifact(art)
mlflow.log_params(pipeline.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
# No model
# Логировать можно только артефакты, без модели. Например, залогироавть графики после этапа EDA
= 'no_model'
RUN_NAME = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
= run.info.run_id
run_id
mlflow.log_artifact(art)
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
= '06fa7ec1f1b74aedb3509c88dc4ee1c0' # Указываем run id
run_id f"runs:/{run_id}/models", REGISTRY_MODEL_NAME) mlflow.register_model(
# Можно регистрировать сразу при создании прогона
= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id
with mlflow.start_run(run_name='register_at_run', experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(pipeline, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file,
pip_requirements= REGISTRY_MODEL_NAME # Указываем для какой модели регистрируем
registered_model_name
)
mlflow.log_metrics(metrics)
mlflow.log_artifact(art)
mlflow.log_params(pipeline.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
# Можно найти зарегистрированные модели
= mlflow.search_registered_models()
model_reg 0] model_reg[
= REGISTRY_MODEL_NAME
model_name = 1
model_version
= mlflow.sklearn.load_model(model_uri=f"models:/{model_name}/{model_version}") model_loaded
0:1]) model_loaded.predict(X_test.iloc[
0] y_test.iloc[
Feature engineering
Sklearn
from sklearn.preprocessing import QuantileTransformer, SplineTransformer, PolynomialFeatures, MinMaxScaler
= X_train.copy() X_train_sklearn
PolynomialFeatures
Создает полином степени degree
из указанных признаков
= PolynomialFeatures(degree=2) pf
X_train_sklearn
'area','kitchen_area']]) pf.fit_transform(X_train_sklearn[[
SplineTransformer
Cоздаёт новую матрицу признаков, состоящую из сплайнов порядка degree. Количество сгенерированных сплайнов равно n_splines=n_knots + degree - 1
для каждого признака, где
n_knots
определяет количество узлов (точек, в которых сопрягаются сплайны) для каждого признака.
degree
определяет порядок полинома, используемого для построения сплайнов.
= SplineTransformer(n_knots=3, degree=3) sp
'area']]) sp.fit_transform(X_train_sklearn[[
QuantileTransformer
Этот метод преобразует признаки, чтобы они распределялись равномерно или нормально — так данные меньше подвергаются влиянию выбросов. Преобразование применяется к каждому признаку независимо. Идея метода такова: оценить функцию распределения признака, чтобы преобразовать исходные значения в равномерное или нормальное распределение.
output_distribution='uniform'
или output_distribution='normal'
соответственно
Пример использования: если у вас есть данные о доходах с широким диапазоном значений, квантильное преобразование сделает их более сопоставимыми и устойчивыми к выбросам.
= QuantileTransformer() qt
'area']]) qt.fit_transform(X_train_sklearn[[
Объединяем в ColumnTransformer и создаем Pipeline
= PolynomialFeatures(degree=2)
pf = QuantileTransformer()
qt = SplineTransformer(n_knots=3, degree=3) sp
# Значения преобразованных признаков нужно отскейлить, поэтому создаем pipeline из двух шагов - преобразование и скейлинг
= Pipeline(steps=[
pf_pipeline 'poly', pf),
('scale', StandardScaler())
( ])
= ColumnTransformer(
preprocessor_sklearn =[
transformers'num', s_scaler, num_features), # преобразования для числовых признаков
('cat', l_encoder, cat_features), # преобразования для категориальных признаков
('quantile', qt,num_features),
('poly', pf_pipeline, ['area', 'kitchen_area']), # В преобразования добавляем созданный ранее pipeline
('spline', sp, ['area'])
(
],='drop',
remainder# Удаляем столбцы, которые не затронуты преобразования )
Посмотрим что из себя теперь представляет датафрейм
## не влезаем в float64 в полиномальном преобразовании. Использовать его нужно с умом!
'area', 'kitchen_area']] = X_train_sklearn[['area', 'kitchen_area']].astype('float128')
X_train_sklearn[['area', 'kitchen_area']] = X_train_sklearn[['area', 'kitchen_area']].astype('float128') X_train_sklearn[[
= preprocessor_sklearn.fit_transform(X_train_sklearn)
X_train_sklearn_raw = pd.DataFrame(X_train_sklearn_raw, columns=preprocessor_sklearn.get_feature_names_out()) X_train_sklearn
# Удобно использовать для отображения всех строк\столбцов в DataFrame
with pd.option_context('display.max_rows', 5, 'display.max_columns', None):
display (X_train_sklearn)
num__geo_lat | num__geo_lon | num__level | num__levels | num__rooms | num__area | num__kitchen_area | cat__region | cat__building_type | cat__object_type | quantile__geo_lat | quantile__geo_lon | quantile__level | quantile__levels | quantile__rooms | quantile__area | quantile__kitchen_area | poly__1 | poly__area | poly__kitchen_area | poly__area^2 | poly__area kitchen_area | poly__kitchen_area^2 | spline__area_sp_0 | spline__area_sp_1 | spline__area_sp_2 | spline__area_sp_3 | spline__area_sp_4 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.495902 | -0.449742 | 0.359235 | -0.214789 | 0.253413 | 0.063735 | -0.186285 | 20.0 | 1.0 | 0.0 | 0.766257 | 0.511028 | 0.717217 | 0.536537 | 0.600601 | 0.623624 | 0.374875 | 0.0 | 0.063735 | -0.186285 | -0.010002 | -0.132188 | -0.002792 | 0.155806 | 0.666179 | 0.178013 | 0.000002 | 0.0 |
1 | 0.177806 | 1.433673 | -0.246529 | -0.367718 | 0.253413 | -0.114293 | -0.186285 | 70.0 | 1.0 | 0.0 | 0.297142 | 0.867999 | 0.522022 | 0.386887 | 0.600601 | 0.541542 | 0.374875 | 0.0 | -0.114293 | -0.186285 | -0.017375 | -0.169370 | -0.002792 | 0.156921 | 0.666275 | 0.176803 | 0.000001 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
410773 | -0.748366 | -0.804077 | -0.650371 | 0.702788 | 0.253413 | 1.365441 | 1.501833 | 52.0 | 3.0 | 0.0 | 0.193143 | 0.114753 | 0.309810 | 0.741742 | 0.600601 | 0.961367 | 0.984535 | 0.0 | 1.365441 | 1.501833 | 0.068438 | 1.570163 | 0.008616 | 0.147820 | 0.665159 | 0.187011 | 0.000010 | 0.0 |
410774 | 1.257769 | -1.101815 | -0.044608 | 0.091070 | 1.175911 | 0.553789 | -0.142544 | 14.0 | 1.0 | 0.0 | 0.908036 | 0.075725 | 0.604605 | 0.645646 | 0.867367 | 0.841842 | 0.436436 | 0.0 | 0.553789 | -0.142544 | 0.014463 | -0.002742 | -0.002649 | 0.152767 | 0.665860 | 0.181370 | 0.000004 | 0.0 |
410775 rows × 28 columns
Создаем пайплайн с препроцессингом и моделью
= Pipeline(steps=[
pipeline_sklearn 'transform', preprocessor_sklearn),
('model', regressor)
(
])
= pipeline_sklearn.fit(X_train, y_train) model_sklearn
model_sklearn
= model_sklearn.predict(X_test)
predictions = {}
metrics "mae"] = mean_absolute_error(y_test, predictions)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions)
metrics["mse"] = mean_squared_error(y_test, predictions)
metrics[
metrics
= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id = 'fe_sklearn'
RUN_NAME
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(model_sklearn, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)
mlflow.log_artifact(art)
mlflow.log_params(model_sklearn.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
Autofeat
from autofeat import AutoFeatRegressor
= ["1/", "exp", "log", "abs", "sqrt", "^2", "^3", "1+", "1-", "sin", "cos", "exp-", "2^"] transformations
= AutoFeatRegressor(verbose=1, feateng_steps=2, max_gb=8, transformations=["log", "sqrt"],feateng_cols=num_features)
afreg = afreg.fit_transform(X_train,y_train)
X_train_arf X_train_arf
# Создаем обертку, в которой добавляем метод get_feature_names_out() для получения названий признаков
import numpy as np
class AutoFeatWrapper():
def __init__(self, feateng_cols, feateng_steps=1, max_gb=16, transformations=["1/", "exp", "log"], n_jobs=-1, verbose=1):
self.feateng_cols = feateng_cols
self.feateng_steps = feateng_steps
self.max_gb = max_gb
self.transformations = transformations
self.n_jobs = n_jobs
self.afreg = AutoFeatRegressor(feateng_cols=self.feateng_cols,
=self.feateng_steps,
feateng_steps=self.max_gb,
max_gb=self.transformations,
transformations=self.n_jobs)
n_jobs
def fit(self, X, y=None):
self.afreg.fit(X, y)
return self
def transform(self, X):
return self.afreg.transform(X)
def get_feature_names_out(self, input_features=None):
# Преобразуем данные и возвращаем имена фичей из DataFrame
= self.afreg.transform(pd.DataFrame(np.zeros((1, len(self.feateng_cols))), columns=self.feateng_cols))
transformed_X return transformed_X.columns.tolist()
= Pipeline(steps=[
afreg_pipeline 'autofeat', AutoFeatWrapper( feateng_steps=2, max_gb=16, transformations=["log", "sqrt"],feateng_cols=num_features)),
('scaler', StandardScaler()),
( ])
= ColumnTransformer(
preprocessor_afr =[
transformers'num', s_scaler, num_features), # преобразования для числовых признаков
('cat', l_encoder, cat_features), # преобразования для категориальных признаков
('afr', afreg_pipeline, num_features), # преобразования autofeat
(
],='drop', # Удаляем столбцы, которые не затронуты преобразованиями
remainder )
= preprocessor_afr.fit_transform(X_train,y_train)
X_train_afr_raw = pd.DataFrame(X_train_afr_raw, columns=preprocessor_afr.get_feature_names_out()) X_train_afr
with pd.option_context('display.max_rows', 5, 'display.max_columns', None):
display (X_train_afr)
num__geo_lat | num__geo_lon | num__level | num__levels | num__rooms | num__area | num__kitchen_area | cat__region | cat__building_type | cat__object_type | afr__geo_lat | afr__geo_lon | afr__level | afr__levels | afr__rooms | afr__area | afr__kitchen_area | afr__area*rooms | afr__area*geo_lon | afr__levels*rooms | afr__area*kitchen_area | afr__sqrt(area)*geo_lat | afr__sqrt(area)*log(level) | afr__kitchen_area*log(level) | afr__sqrt(area)*kitchen_area | afr__geo_lon*log(kitchen_area) | afr__sqrt(area)*sqrt(kitchen_area) | afr__sqrt(geo_lon)*sqrt(kitchen_area) | afr__log(area) | afr__rooms*log(level) | afr__kitchen_area*rooms | afr__kitchen_area*levels | afr__sqrt(geo_lon)*sqrt(level) | afr__area**(3/2) | afr__geo_lat*log(kitchen_area) | afr__geo_lat*log(geo_lon) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.495902 | -0.449742 | 0.359235 | -0.214789 | 0.253413 | 0.063735 | -0.186285 | 20.0 | 1.0 | 0.0 | 0.495902 | -0.449742 | 0.359235 | -0.214789 | 0.253413 | 0.063735 | -0.186285 | 0.006208 | -0.195129 | 0.060916 | -0.132188 | 0.373151 | 0.688076 | 0.044178 | -0.211335 | -0.481294 | -0.153548 | -0.490805 | 0.307835 | 0.690329 | -0.132529 | -0.352834 | 0.323880 | -0.008748 | -0.031529 | 0.068167 |
1 | 0.177806 | 1.433673 | -0.246529 | -0.367718 | 0.253413 | -0.114293 | -0.186285 | 70.0 | 1.0 | 0.0 | 0.177806 | 1.433673 | -0.246529 | -0.367718 | 0.253413 | -0.114293 | -0.186285 | -0.083402 | 0.655053 | -0.054279 | -0.169370 | 0.005114 | 0.071369 | -0.173647 | -0.252775 | 1.191304 | -0.267268 | 0.615798 | 0.031907 | 0.282625 | -0.132529 | -0.418643 | 0.552794 | -0.056540 | -0.143829 | 1.129118 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
410773 | -0.748366 | -0.804077 | -0.650371 | 0.702788 | 0.253413 | 1.365441 | 1.501833 | 52.0 | 3.0 | 0.0 | -0.748366 | -0.804077 | -0.650371 | 0.702788 | 0.253413 | 1.365441 | 1.501833 | 0.661427 | 0.375199 | 0.752088 | 1.570163 | 1.274445 | -0.002521 | 0.745507 | 2.382258 | 0.071599 | 2.828890 | 1.431272 | 1.729715 | -0.160491 | 1.581436 | 2.432437 | -0.843150 | 0.411475 | 1.671069 | -1.052343 |
410774 | 1.257769 | -1.101815 | -0.044608 | 0.091070 | 1.175911 | 0.553789 | -0.142544 | 14.0 | 1.0 | 0.0 | 1.257769 | -1.101815 | -0.044608 | 0.091070 | 1.175911 | 0.553789 | -0.142544 | 0.807887 | -0.330070 | 0.982478 | -0.002742 | 1.338996 | 0.635065 | -0.040302 | -0.055435 | -1.025588 | 0.202136 | -0.916054 | 0.940624 | 1.217910 | 0.311575 | -0.174762 | -0.415359 | 0.135617 | 0.359680 | -0.246790 |
410775 rows × 36 columns
= Pipeline(steps=[('preprocessor', preprocessor_afr),
pipeline_afr 'model', regressor)])
(
pipeline_afr.fit(X_train, y_train)
= pipeline_afr.predict(X_test)
predictions
= {}
metrics "mae"] = mean_absolute_error(y_test, predictions)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions)
metrics["mse"] = mean_squared_error(y_test, predictions)
metrics[
metrics
= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id
with mlflow.start_run(run_name='autofeat', experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(pipeline_afr, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)
mlflow.log_artifact(art)
mlflow.log_params(pipeline_afr.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
FEATURE SELECTION
RFE
Используем autofeat признаки
Поскольку autofeat дает разные совокупности сгенерированных признаков, мы можем добавить выбор информативных только как шаг пайплайна
from sklearn.feature_selection import RFE
X_train_afr
num__geo_lat | num__geo_lon | num__level | num__levels | num__rooms | num__area | num__kitchen_area | cat__region | cat__building_type | cat__object_type | ... | afr__sqrt(area)*sqrt(kitchen_area) | afr__sqrt(geo_lon)*sqrt(kitchen_area) | afr__log(area) | afr__rooms*log(level) | afr__kitchen_area*rooms | afr__kitchen_area*levels | afr__sqrt(geo_lon)*sqrt(level) | afr__area**(3/2) | afr__geo_lat*log(kitchen_area) | afr__geo_lat*log(geo_lon) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.495902 | -0.449742 | 0.359235 | -0.214789 | 0.253413 | 0.063735 | -0.186285 | 20.0 | 1.0 | 0.0 | ... | -0.153548 | -0.490805 | 0.307835 | 0.690329 | -0.132529 | -0.352834 | 0.323880 | -0.008748 | -0.031529 | 0.068167 |
1 | 0.177806 | 1.433673 | -0.246529 | -0.367718 | 0.253413 | -0.114293 | -0.186285 | 70.0 | 1.0 | 0.0 | ... | -0.267268 | 0.615798 | 0.031907 | 0.282625 | -0.132529 | -0.418643 | 0.552794 | -0.056540 | -0.143829 | 1.129118 |
2 | 0.440548 | 0.047222 | -0.448450 | -0.367718 | -0.669085 | -0.456947 | -0.142544 | 15.0 | 3.0 | 1.0 | ... | -0.454880 | -0.067183 | -0.603122 | -0.512211 | -0.487813 | -0.383803 | -0.243092 | -0.140800 | 0.063464 | 0.460495 |
3 | -1.588818 | -0.722477 | -0.246529 | -0.979436 | 0.253413 | -0.181292 | -0.142544 | 18.0 | 1.0 | 0.0 | ... | -0.254514 | -0.607607 | -0.080304 | 0.282625 | -0.088119 | -0.662523 | -0.369355 | -0.073838 | -0.672113 | -1.481033 |
4 | 1.493662 | 1.125819 | 0.157313 | 0.549858 | 0.253413 | 0.615045 | -0.011322 | 10.0 | 2.0 | 0.0 | ... | 0.438600 | 0.891383 | 1.009612 | 0.574497 | 0.045112 | 0.208478 | 0.945981 | 0.154902 | 0.780855 | 1.923382 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
410770 | 0.592011 | 0.355014 | 0.561156 | 1.008646 | 0.253413 | -0.079836 | -0.092653 | 54.0 | 2.0 | 0.0 | ... | -0.120035 | 0.237580 | 0.087725 | 0.792500 | -0.037463 | 0.322797 | 0.974381 | -0.047496 | 0.243018 | 0.789871 |
410771 | 0.240478 | 0.392697 | -0.650371 | -0.979436 | 0.253413 | -0.334434 | -0.404989 | 45.0 | 3.0 | 0.0 | ... | -0.716150 | -0.510766 | -0.357277 | -0.160491 | -0.354582 | -0.778657 | -0.406361 | -0.111897 | -0.808157 | 0.574534 |
410772 | -1.936771 | -0.688830 | 0.359235 | 0.855717 | -0.669085 | -0.456947 | -0.142544 | 18.0 | 0.0 | 1.0 | ... | -0.454880 | -0.581851 | -0.603122 | -0.211576 | -0.487813 | 0.173638 | 0.170166 | -0.140800 | -0.798234 | -1.663294 |
410773 | -0.748366 | -0.804077 | -0.650371 | 0.702788 | 0.253413 | 1.365441 | 1.501833 | 52.0 | 3.0 | 0.0 | ... | 2.828890 | 1.431272 | 1.729715 | -0.160491 | 1.581436 | 2.432437 | -0.843150 | 0.411475 | 1.671069 | -1.052343 |
410774 | 1.257769 | -1.101815 | -0.044608 | 0.091070 | 1.175911 | 0.553789 | -0.142544 | 14.0 | 1.0 | 0.0 | ... | 0.202136 | -0.916054 | 0.940624 | 1.217910 | 0.311575 | -0.174762 | -0.415359 | 0.135617 | 0.359680 | -0.246790 |
410775 rows × 36 columns
= RFE(estimator=regressor, n_features_to_select=12, step = 0.2) #drop 20% of features each iteration
rfe_selector = rfe_selector.fit_transform(X_train_afr,y_train) X_train_rfe
= pd.DataFrame(X_train_rfe, columns=rfe_selector.get_feature_names_out())
X_train_afr_rfe X_train_afr_rfe
num__geo_lat | num__geo_lon | afr__geo_lon | afr__area*kitchen_area | afr__sqrt(area)*geo_lat | afr__sqrt(area)*log(level) | afr__kitchen_area*log(level) | afr__sqrt(area)*sqrt(kitchen_area) | afr__rooms*log(level) | afr__kitchen_area*rooms | afr__sqrt(geo_lon)*sqrt(level) | afr__geo_lat*log(geo_lon) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.495902 | -0.449742 | -0.449742 | -0.132188 | 0.373151 | 0.688076 | 0.044178 | -0.153548 | 0.690329 | -0.132529 | 0.323880 | 0.068167 |
1 | 0.177806 | 1.433673 | 1.433673 | -0.169370 | 0.005114 | 0.071369 | -0.173647 | -0.267268 | 0.282625 | -0.132529 | 0.552794 | 1.129118 |
2 | 0.440548 | 0.047222 | 0.047222 | -0.226261 | -0.425530 | -0.335537 | -0.239271 | -0.454880 | -0.512211 | -0.487813 | -0.243092 | 0.460495 |
3 | -1.588818 | -0.722477 | -0.722477 | -0.165302 | -0.723225 | 0.034116 | -0.129771 | -0.254514 | 0.282625 | -0.088119 | -0.369355 | -1.481033 |
4 | 1.493662 | 1.125819 | 1.125819 | 0.094342 | 1.522265 | 0.862773 | 0.194490 | 0.438600 | 0.574497 | 0.045112 | 0.945981 | 1.923382 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
410770 | 0.592011 | 0.355014 | 0.355014 | -0.120841 | 0.206926 | 0.714499 | 0.226990 | -0.120035 | 0.792500 | -0.037463 | 0.974381 | 0.789871 |
410771 | 0.240478 | 0.392697 | 0.392697 | -0.296252 | -0.297209 | -0.551021 | -0.560144 | -0.716150 | -0.160491 | -0.354582 | -0.406361 | 0.574534 |
410772 | -1.936771 | -0.688830 | -0.688830 | -0.226261 | -1.192706 | 0.306280 | 0.100868 | -0.454880 | -0.211576 | -0.487813 | 0.170166 | -1.663294 |
410773 | -0.748366 | -0.804077 | -0.804077 | 1.570163 | 1.274445 | -0.002521 | 0.745507 | 2.828890 | -0.160491 | 1.581436 | -0.843150 | -1.052343 |
410774 | 1.257769 | -1.101815 | -1.101815 | -0.002742 | 1.338996 | 0.635065 | -0.040302 | 0.202136 | 1.217910 | 0.311575 | -0.415359 | -0.246790 |
410775 rows × 12 columns
= Pipeline(steps=[
rfe_pipeline 'preprocessor', preprocessor_afr),
('rfe_extractor', RFE(estimator=regressor, n_features_to_select=12, step = 0.2)),
('model', regressor)
(
])
rfe_pipeline.fit(X_train, y_train)
= rfe_pipeline.predict(X_test)
predictions_rfe
= {}
metrics "mae"] = mean_absolute_error(y_test, predictions_rfe)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions_rfe)
metrics["mse"] = mean_squared_error(y_test, predictions_rfe)
metrics[
metrics
{'mae': 1431925.3203264712,
'mape': 1.239752923791043e+18,
'mse': 261947924998018.2}
= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id = 'rfe_feature_selection'
RUN_NAME
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(rfe_pipeline, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)
mlflow.log_artifact(art)
mlflow.log_params(model_sklearn.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 40.15it/s]
2024/10/17 14:26:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run rfe_feature_selection at: http://127.0.0.1:5000/#/experiments/1/runs/96f0bbcd6d88466abcf38f3b53f06ff1.
2024/10/17 14:26:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1.
Используем sklearn признаки
Тут мы можем отобрать признаки один раз на обучении, а далее в качестве шага пайплайна использовать написанный класс ColumnExtractor для выбора нуных столбцов
= RFE(estimator=regressor, n_features_to_select=12, step = 0.2) #drop 20% of features each iteration
rfe_skl_selector = rfe_skl_selector.fit_transform(X_train_sklearn,y_train) X_train_skl_rfe
= pd.DataFrame(X_train_skl_rfe, columns=rfe_skl_selector.get_feature_names_out())
X_train_skl_rfe X_train_skl_rfe
num__geo_lat | num__geo_lon | num__level | num__rooms | num__kitchen_area | cat__region | quantile__geo_lat | quantile__geo_lon | quantile__level | poly__area kitchen_area | spline__area_sp_0 | spline__area_sp_2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.495902 | -0.449742 | 0.359235 | 0.253413 | -0.186285 | 20.0 | 0.766257 | 0.511028 | 0.717217 | -0.132188 | 0.155806 | 0.178013 |
1 | 0.177806 | 1.433673 | -0.246529 | 0.253413 | -0.186285 | 70.0 | 0.297142 | 0.867999 | 0.522022 | -0.169370 | 0.156921 | 0.176803 |
2 | 0.440548 | 0.047222 | -0.448450 | -0.669085 | -0.142544 | 15.0 | 0.732330 | 0.629984 | 0.417417 | -0.226261 | 0.159080 | 0.174488 |
3 | -1.588818 | -0.722477 | -0.246529 | 0.253413 | -0.142544 | 18.0 | 0.148789 | 0.295262 | 0.522022 | -0.165302 | 0.157341 | 0.176349 |
4 | 1.493662 | 1.125819 | 0.157313 | 0.253413 | -0.011322 | 10.0 | 0.985937 | 0.758363 | 0.662663 | 0.094342 | 0.152390 | 0.181792 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
410770 | 0.592011 | 0.355014 | 0.561156 | 0.253413 | -0.092653 | 54.0 | 0.788393 | 0.686728 | 0.771271 | -0.120841 | 0.156705 | 0.177037 |
410771 | 0.240478 | 0.392697 | -0.650371 | 0.253413 | -0.404989 | 45.0 | 0.494062 | 0.717240 | 0.309810 | -0.296252 | 0.158306 | 0.175314 |
410772 | -1.936771 | -0.688830 | 0.359235 | -0.669085 | -0.142544 | 18.0 | 0.131352 | 0.327613 | 0.717217 | -0.226261 | 0.159080 | 0.174488 |
410773 | -0.748366 | -0.804077 | -0.650371 | 0.253413 | 1.501833 | 52.0 | 0.193143 | 0.114753 | 0.309810 | 1.570163 | 0.147820 | 0.187011 |
410774 | 1.257769 | -1.101815 | -0.044608 | 1.175911 | -0.142544 | 14.0 | 0.908036 | 0.075725 | 0.604605 | -0.002742 | 0.152767 | 0.181370 |
410775 rows × 12 columns
= X_train_skl_rfe.columns.tolist()
rfe_cols rfe_cols
['num__geo_lat',
'num__geo_lon',
'num__level',
'num__rooms',
'num__kitchen_area',
'cat__region',
'quantile__geo_lat',
'quantile__geo_lon',
'quantile__level',
'poly__area kitchen_area',
'spline__area_sp_0',
'spline__area_sp_2']
= rfe_skl_selector.support_
rfe_idx rfe_idx
array([ True, True, True, False, True, False, True, True, False,
False, True, True, True, False, False, False, False, False,
False, False, False, True, False, True, False, True, False,
False])
# Отбираемые столбцы нужно залогировать, иначе мы потеряем информацию о том, какие призныки выбраны
with open('rfe_skl_idx.txt', 'w+') as f:
str(rfe_idx))
f.write(with open('rfe_skl_cols.txt', 'w+') as f:
str(rfe_cols)) f.write(
class ColumnExtractor(object):
def __init__(self, cols):
self.cols = cols
def transform(self, X):
return X[:,self.cols]
def fit(self, X, y=None):
return self
= Pipeline(steps=[
rfe_skl_pipeline 'preprocessor', preprocessor_sklearn),
('rfe_extractor', ColumnExtractor(rfe_idx)),
('model', regressor)
(
])
rfe_skl_pipeline.fit(X_train, y_train)
= rfe_skl_pipeline.predict(X_test)
predictions_rfe_skl
= {}
metrics "mae"] = mean_absolute_error(y_test, predictions_rfe_skl)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions_rfe_skl)
metrics["mse"] = mean_squared_error(y_test, predictions_rfe_skl)
metrics[
metrics= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id = 'rfe_skl_feature_selection'
RUN_NAME
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(rfe_pipeline, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)'rfe_skl_cols.txt')
mlflow.log_artifact('rfe_skl_idx.txt')
mlflow.log_artifact(
mlflow.log_params(model_sklearn.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 193.34it/s]
2024/10/17 14:32:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run rfe_skl_feature_selection at: http://127.0.0.1:5000/#/experiments/1/runs/e55206caeb1549e4aa0d98343d5c1d4d.
2024/10/17 14:32:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1.
mlextend
from mlxtend.feature_selection import SequentialFeatureSelector
#from sklearn.feature_selection import SequentialFeatureSelector
= SequentialFeatureSelector(RandomForestRegressor(n_estimators=3),
sfs =3,
k_features=True,
forward=False, # True to drop selected features
floating='neg_mean_absolute_error',
scoring=2)
cv
sfs.fit(X_train_sklearn,y_train)
= X_train_sklearn.loc[:, sfs.k_feature_names_]
selected_features_sfs selected_features_sfs
num__geo_lon | quantile__geo_lat | spline__area_sp_3 | |
---|---|---|---|
0 | -0.449742 | 0.766257 | 1.826008e-06 |
1 | 1.433673 | 0.297142 | 1.310449e-06 |
2 | 0.047222 | 0.732330 | 6.098363e-07 |
3 | -0.722477 | 0.148789 | 1.144942e-06 |
4 | 1.125819 | 0.985937 | 4.240047e-06 |
... | ... | ... | ... |
410770 | 0.355014 | 0.788393 | 1.401454e-06 |
410771 | 0.392697 | 0.494062 | 8.202272e-07 |
410772 | -0.688830 | 0.131352 | 6.098363e-07 |
410773 | -0.804077 | 0.193143 | 1.004843e-05 |
410774 | -1.101815 | 0.908036 | 3.903343e-06 |
410775 rows × 3 columns
= list(sfs.k_feature_idx_)
rfe_sfs_idx
rfe_sfs_idx= list(sfs.k_feature_names_)
rfe_sfs_col rfe_sfs_col
['num__geo_lon', 'quantile__geo_lat', 'spline__area_sp_3']
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
= plot_sfs(sfs.get_metric_dict(), kind='std_dev')
fig
'Sequential Forward Selection (w. StdDev)')
plt.title(
plt.grid()
plt.show()
= Pipeline(steps=[
rfe_sfs_pipeline 'preprocessor', preprocessor_sklearn),
('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),
('model', regressor)
(
])
rfe_sfs_pipeline.fit(X_train, y_train)
= rfe_sfs_pipeline.predict(X_test)
predictions_sfs
= {}
metrics "mae"] = mean_absolute_error(y_test, predictions_sfs)
metrics["mape"] = mean_absolute_percentage_error(y_test, predictions_sfs)
metrics["mse"] = mean_squared_error(y_test, predictions_sfs)
metrics[
metrics= mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
experiment_id = 'rfe_sfs_feature_selection'
RUN_NAME
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
# получаем уникальный идентификатор запуска эксперимента
= run.info.run_id
run_id
mlflow.sklearn.log_model(rfe_sfs_pipeline, ="models",
artifact_path=signature,
signature=input_example,
input_example=req_file
pip_requirements
)
mlflow.log_metrics(metrics)'rfe_skl_cols.txt')
mlflow.log_artifact('rfe_skl_idx.txt')
mlflow.log_artifact(
mlflow.log_params(model_sklearn.get_params())
= mlflow.get_run(run_id)
run assert (run.info.status =='FINISHED')
Можно совмещать признаки, выбранные по sfs и sbs: брать их объединение или пересечение. Можно комбинировать с признаками, выделенными разными подходами - целое поле для исследований
HYPERPARAMS
Gridsearch
from sklearn.model_selection import GridSearchCV
= {
param_grid 'model__depth': [1,3,5]
}
= GridSearchCV(rfe_sfs_pipeline, param_grid, cv=2, scoring='neg_mean_absolute_error')
gs
gs.fit(X_train, y_train)print("Лучшие гиперпараметры:", gs.best_params_)
= Pipeline(steps=[
gs_pipeline 'preprocessor', preprocessor_sklearn),
('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),
('model', CatBoostRegressor(depth=5))
(
])
# Проведем стандартную проверку на тестовом множестве и залогируем run
Вместо GridSearch можно использовать RandomSearch
Optuna
import optuna
def objective(trial):
# предлагаем гиперпараметры
= trial.suggest_int('depth', 1, 10)
depth = trial.suggest_float('learning_rate', 0.001, 0.1)
learning_rate
# создаём и обучаем модель
= Pipeline(steps=[
opt_pipeline 'preprocessor', preprocessor_sklearn),
('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),
('model', CatBoostRegressor(depth=depth, learning_rate=learning_rate, verbose=0))
(
])
opt_pipeline.fit(X_train, y_train)
# предсказываем и вычисляем RMSE
= opt_pipeline.predict(X_test)
preds = mean_absolute_error(y_test, preds)
mae
return mae
= optuna.create_study(direction='minimize')
study =10)
study.optimize(objective, n_trials
# выводим результаты
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
= Pipeline(steps=[
opt_pipeline 'preprocessor', preprocessor_sklearn),
('rfe_extractor', ColumnExtractor(rfe_sfs_idx)),
('model', CatBoostRegressor(depth=3, learning_rate=0.02789))
(
])
# Проведем стандартную проверку на тестовом множестве и залогируем run
Выбираем лучшую модель. Обучаем ее на всей выборке (а не только на train-части). Далее будем деплоить именно её