{ "cells": [ { "cell_type": "markdown", "id": "5c39c249", "metadata": {}, "source": [ "# Исследование и настройка предсказательной модели для цен подержанных автомобилях" ] }, { "cell_type": "markdown", "id": "f8ee2da9", "metadata": {}, "source": [ "Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill." ] }, { "cell_type": "code", "execution_count": 1, "id": "030077f5-b1e3-4b5a-9e2f-4dc83a7bfa1e", "metadata": {}, "outputs": [], "source": [ "#XXX: разделить блокнот штук на 5" ] }, { "cell_type": "code", "execution_count": 2, "id": "2a9483a4", "metadata": {}, "outputs": [], "source": [ "from typing import Optional" ] }, { "cell_type": "code", "execution_count": 3, "id": "3d7aae3e", "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "data_aug_pickle_path: Optional[str] = None\n", "# Полный путь к файлу (pickle) для сохранения очищенного датасета. Если не установлен, используется `data/`.\n", "data_aug_pickle_relpath: str = 'cars.aug.pickle'\n", "# Путь к файлу (pickle) для сохранения очищенного датасета относительно директории данных `data`. Игнорируется, если установлен data_aug_pickle_path.\n", "\n", "#model_global_comment_path: Optional[str] = None\n", "## Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/`.\n", "#model_comment_relpath: str = 'comment.txt'\n", "## Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.\n", "\n", "mlflow_tracking_server_uri: str = 'http://localhost:5000'\n", "# URL tracking-сервера MLFlow.\n", "mlflow_registry_uri: Optional[str] = None\n", "# URL сервера registry MLFlow (если не указан, используется `mlflow_tracking_server_uri`).\n", "\n", "mlflow_do_log: bool = False\n", "# Записывать ли прогоны (runs) в MLFlow.\n", "mlflow_experiment_id: Optional[str] = None\n", "# ID эксперимента MLFlow, имеет приоритет над `mlflow_experiment_name`.\n", "mlflow_experiment_name: Optional[str] = 'Current price predicion for used cars'\n", "# Имя эксперимента MLFlow (ниже приоритетом, чем `mlflow_experiment_id`).\n", "mlflow_root_run_name: str = 'Models'\n", "# Имя корневого прогона MLFlow (остальные прогоны будут созданы блокнотом внутри этого, как nested)" ] }, { "cell_type": "code", "execution_count": 4, "id": "7afe82f3", "metadata": {}, "outputs": [], "source": [ "from collections.abc import Collection, Sequence\n", "import os\n", "import pathlib\n", "import pickle\n", "import sys" ] }, { "cell_type": "code", "execution_count": 5, "id": "a02f69a7", "metadata": {}, "outputs": [], "source": [ "import matplotlib\n", "import mlflow\n", "import mlflow.models\n", "import mlflow.sklearn\n", "import mlxtend.feature_selection\n", "import mlxtend.plotting\n", "import optuna\n", "import optuna.samplers\n", "import sklearn.compose\n", "import sklearn.ensemble\n", "import sklearn.metrics\n", "import sklearn.model_selection\n", "import sklearn.pipeline\n", "import sklearn.preprocessing" ] }, { "cell_type": "code", "execution_count": 6, "id": "f9a47ec5", "metadata": {}, "outputs": [], "source": [ "BASE_PATH = pathlib.Path('..')" ] }, { "cell_type": "code", "execution_count": 7, "id": "66d6fe3d", "metadata": {}, "outputs": [], "source": [ "CODE_PATH = BASE_PATH\n", "sys.path.insert(0, str(CODE_PATH.resolve()))" ] }, { "cell_type": "code", "execution_count": 8, "id": "1c227e7d", "metadata": {}, "outputs": [], "source": [ "from iis_project.mlxtend_utils.feature_selection import SEQUENTIAL_FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE\n", "from iis_project.sklearn_utils import filter_params\n", "from iis_project.sklearn_utils.compose import COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE\n", "from iis_project.sklearn_utils.ensemble import RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE\n", "from iis_project.sklearn_utils.pandas import pandas_dataframe_from_transformed_artifacts\n", "from iis_project.sklearn_utils.preprocessing import STANDARD_SCALER_PARAMS_COMMON_EXCLUDE" ] }, { "cell_type": "code", "execution_count": 9, "id": "0b847527", "metadata": {}, "outputs": [], "source": [ "MODEL_INOUT_EXAMPLE_SIZE = 0x10" ] }, { "cell_type": "code", "execution_count": 10, "id": "2a3a7a2e", "metadata": {}, "outputs": [], "source": [ "mlflow.set_tracking_uri(mlflow_tracking_server_uri)\n", "if mlflow_registry_uri is not None:\n", " mlflow.set_registry_uri(mlflow_registry_uri)" ] }, { "cell_type": "code", "execution_count": 11, "id": "4f60bfaa", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025/11/02 01:54:17 INFO mlflow.tracking.fluent: Experiment with name 'Current price predicion for used cars' does not exist. Creating a new experiment.\n" ] } ], "source": [ "if mlflow_do_log:\n", " mlflow_experiment = mlflow.set_experiment(experiment_name=mlflow_experiment_name, experiment_id=mlflow_experiment_id)\n", " mlflow_root_run_id = None # изменяется позже" ] }, { "cell_type": "code", "execution_count": 12, "id": "97d23eb9", "metadata": {}, "outputs": [], "source": [ "DATA_PATH = (\n", " pathlib.Path(os.path.dirname(data_aug_pickle_path))\n", " if data_aug_pickle_path is not None\n", " else (BASE_PATH / 'data')\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "id": "493e9bd3-463c-41f6-b32f-f08bdf4a6323", "metadata": {}, "outputs": [], "source": [ "def build_sequential_feature_selector(*args, **kwargs):\n", " return mlxtend.feature_selection.SequentialFeatureSelector(*args, **kwargs)\n", "\n", "def plot_sequential_feature_selection(feature_selector, *args_rest, **kwargs):\n", " metric_dict = feature_selector.get_metric_dict()\n", " return mlxtend.plotting.plot_sequential_feature_selection(metric_dict, *args_rest, **kwargs)" ] }, { "cell_type": "markdown", "id": "4b20cbda", "metadata": {}, "source": [ "## Загрузка и обзор данных" ] }, { "cell_type": "code", "execution_count": 14, "id": "e2b45fd1", "metadata": {}, "outputs": [], "source": [ "with open(\n", " (\n", " data_aug_pickle_path\n", " if data_aug_pickle_path is not None\n", " else (DATA_PATH / data_aug_pickle_relpath)\n", " ),\n", " 'rb',\n", ") as input_file:\n", " df_orig = pickle.load(input_file)" ] }, { "cell_type": "markdown", "id": "c3ef97d1", "metadata": {}, "source": [ "Обзор датасета:" ] }, { "cell_type": "code", "execution_count": 15, "id": "d45da024", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "299" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_orig)" ] }, { "cell_type": "code", "execution_count": 16, "id": "75b0feea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 299 entries, 0 to 300\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 car_name 299 non-null object \n", " 1 year 299 non-null int64 \n", " 2 selling_price 299 non-null float64 \n", " 3 present_price 299 non-null float64 \n", " 4 driven_kms 299 non-null int64 \n", " 5 fuel_type 299 non-null category\n", " 6 selling_type 299 non-null category\n", " 7 transmission 299 non-null category\n", " 8 owner 299 non-null category\n", " 9 age 299 non-null float64 \n", " 10 present_price_ratio 299 non-null float64 \n", " 11 log_selling_price 299 non-null float64 \n", " 12 log_present_price 299 non-null float64 \n", " 13 log_driven_kms 299 non-null float64 \n", " 14 log_age 299 non-null float64 \n", "dtypes: category(4), float64(8), int64(2), object(1)\n", "memory usage: 29.3+ KB\n" ] } ], "source": [ "df_orig.info()" ] }, { "cell_type": "code", "execution_count": 17, "id": "5b336654", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_nameyearselling_pricepresent_pricedriven_kmsfuel_typeselling_typetransmissionowneragepresent_price_ratiolog_selling_pricelog_present_pricelog_driven_kmslog_age
0ritz20145.593.3527000petroldealermanual05.00.5992840.7474120.5250454.4313640.698970
1sx420139.544.7543000dieseldealermanual06.00.4979040.9795480.6766944.6334680.778151
2ciaz20179.857.256900petroldealermanual02.00.7360410.9934360.8603383.8388490.301030
3wagon r20114.152.855200petroldealermanual08.00.6867470.6180480.4548453.7160030.903090
4swift20146.874.6042450dieseldealermanual05.00.6695780.8369570.6627584.6278780.698970
5vitara brezza20189.839.252071dieseldealermanual01.00.9409970.9925540.9661423.3161800.000000
6ciaz20158.126.7518796petroldealermanual04.00.8312810.9095560.8293044.2740650.602060
7s cross20158.616.5033429dieseldealermanual04.00.7549360.9350030.8129134.5241230.602060
8ciaz20168.898.7520273dieseldealermanual03.00.9842520.9489020.9420084.3069180.477121
9ciaz20158.927.4542367dieseldealermanual04.00.8352020.9503650.8721564.6270280.602060
10alto 80020173.602.852135petroldealermanual02.00.7916670.5563030.4548453.3293980.301030
11ciaz201510.386.8551000dieseldealermanual04.00.6599231.0161970.8356914.7075700.602060
12ciaz20159.947.5015000petroldealerautomatic04.00.7545270.9973860.8750614.1760910.602060
13ertiga20157.716.1026000petroldealermanual04.00.7911800.8870540.7853304.4149730.602060
14dzire20097.212.2577427petroldealermanual010.00.3120670.8579350.3521834.8888921.000000
15ertiga201610.797.7543000dieseldealermanual03.00.7182581.0330210.8893024.6334680.477121
\n", "
" ], "text/plain": [ " car_name year selling_price present_price driven_kms fuel_type \\\n", "0 ritz 2014 5.59 3.35 27000 petrol \n", "1 sx4 2013 9.54 4.75 43000 diesel \n", "2 ciaz 2017 9.85 7.25 6900 petrol \n", "3 wagon r 2011 4.15 2.85 5200 petrol \n", "4 swift 2014 6.87 4.60 42450 diesel \n", "5 vitara brezza 2018 9.83 9.25 2071 diesel \n", "6 ciaz 2015 8.12 6.75 18796 petrol \n", "7 s cross 2015 8.61 6.50 33429 diesel \n", "8 ciaz 2016 8.89 8.75 20273 diesel \n", "9 ciaz 2015 8.92 7.45 42367 diesel \n", "10 alto 800 2017 3.60 2.85 2135 petrol \n", "11 ciaz 2015 10.38 6.85 51000 diesel \n", "12 ciaz 2015 9.94 7.50 15000 petrol \n", "13 ertiga 2015 7.71 6.10 26000 petrol \n", "14 dzire 2009 7.21 2.25 77427 petrol \n", "15 ertiga 2016 10.79 7.75 43000 diesel \n", "\n", " selling_type transmission owner age present_price_ratio \\\n", "0 dealer manual 0 5.0 0.599284 \n", "1 dealer manual 0 6.0 0.497904 \n", "2 dealer manual 0 2.0 0.736041 \n", "3 dealer manual 0 8.0 0.686747 \n", "4 dealer manual 0 5.0 0.669578 \n", "5 dealer manual 0 1.0 0.940997 \n", "6 dealer manual 0 4.0 0.831281 \n", "7 dealer manual 0 4.0 0.754936 \n", "8 dealer manual 0 3.0 0.984252 \n", "9 dealer manual 0 4.0 0.835202 \n", "10 dealer manual 0 2.0 0.791667 \n", "11 dealer manual 0 4.0 0.659923 \n", "12 dealer automatic 0 4.0 0.754527 \n", "13 dealer manual 0 4.0 0.791180 \n", "14 dealer manual 0 10.0 0.312067 \n", "15 dealer manual 0 3.0 0.718258 \n", "\n", " log_selling_price log_present_price log_driven_kms log_age \n", "0 0.747412 0.525045 4.431364 0.698970 \n", "1 0.979548 0.676694 4.633468 0.778151 \n", "2 0.993436 0.860338 3.838849 0.301030 \n", "3 0.618048 0.454845 3.716003 0.903090 \n", "4 0.836957 0.662758 4.627878 0.698970 \n", "5 0.992554 0.966142 3.316180 0.000000 \n", "6 0.909556 0.829304 4.274065 0.602060 \n", "7 0.935003 0.812913 4.524123 0.602060 \n", "8 0.948902 0.942008 4.306918 0.477121 \n", "9 0.950365 0.872156 4.627028 0.602060 \n", "10 0.556303 0.454845 3.329398 0.301030 \n", "11 1.016197 0.835691 4.707570 0.602060 \n", "12 0.997386 0.875061 4.176091 0.602060 \n", "13 0.887054 0.785330 4.414973 0.602060 \n", "14 0.857935 0.352183 4.888892 1.000000 \n", "15 1.033021 0.889302 4.633468 0.477121 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_orig.head(0x10)" ] }, { "cell_type": "markdown", "id": "e39f88d0", "metadata": {}, "source": [ "## Разделение датасета на выборки" ] }, { "cell_type": "markdown", "id": "df5b723b", "metadata": {}, "source": [ "Выделение признаков и целевых переменных:" ] }, { "cell_type": "code", "execution_count": 18, "id": "7a24a133", "metadata": {}, "outputs": [], "source": [ "feature_columns = (\n", " 'selling_price',\n", " 'driven_kms',\n", " 'fuel_type',\n", " 'selling_type',\n", " 'transmission',\n", " #'owner',\n", " 'age',\n", ")\n", "\n", "target_columns = (\n", " 'present_price',\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "id": "f527d556", "metadata": {}, "outputs": [], "source": [ "features_to_scale_to_standard_columns = (\n", " 'selling_price',\n", " 'driven_kms',\n", " 'age',\n", ")\n", "assert all(\n", " (col in df_orig.select_dtypes(('number',)).columns)\n", " for col in features_to_scale_to_standard_columns\n", ")\n", "\n", "features_to_encode_wrt_target_columns = (\n", " 'fuel_type',\n", " 'selling_type',\n", " 'transmission',\n", " #'owner',\n", ")\n", "assert all(\n", " (col in df_orig.select_dtypes(('category', 'object')).columns)\n", " for col in features_to_encode_wrt_target_columns\n", ")" ] }, { "cell_type": "code", "execution_count": 20, "id": "8ce8c469", "metadata": {}, "outputs": [], "source": [ "df_orig_features = df_orig[list(feature_columns)]\n", "df_target = df_orig[list(target_columns)]" ] }, { "cell_type": "markdown", "id": "c82f9d7a", "metadata": {}, "source": [ "Разделение на обучающую и тестовую выборки:" ] }, { "cell_type": "code", "execution_count": 21, "id": "c9ba918e", "metadata": {}, "outputs": [], "source": [ "DF_TEST_PORTION = 0.25" ] }, { "cell_type": "code", "execution_count": 22, "id": "0147b1d6", "metadata": {}, "outputs": [], "source": [ "df_orig_features_train, df_orig_features_test, df_target_train, df_target_test = (\n", " sklearn.model_selection.train_test_split(\n", " df_orig_features, df_target, test_size=DF_TEST_PORTION, random_state=0x7AE6,\n", " )\n", ")" ] }, { "cell_type": "markdown", "id": "2f2e9fad", "metadata": {}, "source": [ "Размеры обучающей и тестовой выборки соответственно:" ] }, { "cell_type": "code", "execution_count": 23, "id": "dc58ff10", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(224, 75)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tuple(map(len, (df_target_train, df_target_test)))" ] }, { "cell_type": "markdown", "id": "d9ddbdc7", "metadata": {}, "source": [ "## Модели" ] }, { "cell_type": "code", "execution_count": 24, "id": "97a58917", "metadata": {}, "outputs": [], "source": [ "# XXX: один файл requirements для всех моделей\n", "MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt'" ] }, { "cell_type": "markdown", "id": "4639cc98", "metadata": {}, "source": [ "Сигнатура модели для MLFlow:" ] }, { "cell_type": "code", "execution_count": 25, "id": "a78986be", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\studying\\university\\projects\\sem_03_iis\\mpei-iis-project\\.venv\\Lib\\site-packages\\mlflow\\types\\utils.py:452: UserWarning: Hint: Inferred schema contains integer column(s). Integer columns in Python cannot represent missing values. If your input data contains missing values at inference time, it will be encoded as floats and will cause a schema enforcement error. The best way to avoid this problem is to infer the model schema based on a realistic data sample (training dataset) that includes missing values. Alternatively, you can declare integer columns as doubles (float64) whenever these columns may have missing values. See `Handling Integers With Missing Values `_ for more details.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "inputs: \n", " ['selling_price': double (required), 'driven_kms': long (required), 'fuel_type': string (required), 'selling_type': string (required), 'transmission': string (required), 'age': double (required)]\n", "outputs: \n", " ['present_price': double (required)]\n", "params: \n", " None" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mlflow_model_signature = mlflow.models.infer_signature(model_input=df_orig_features, model_output=df_target)\n", "mlflow_model_signature" ] }, { "cell_type": "raw", "id": "691d63bf", "metadata": { "vscode": { "languageId": "raw" } }, "source": [ "input_schema = mlflow.types.schema.Schema([\n", " mlflow.types.schema.ColSpec(\"double\", \"selling_price\"),\n", " mlflow.types.schema.ColSpec(\"double\", \"driven_kms\"),\n", " mlflow.types.schema.ColSpec(\"string\", \"fuel_type\"),\n", " mlflow.types.schema.ColSpec(\"string\", \"selling_type\"),\n", " mlflow.types.schema.ColSpec(\"string\", \"transmission\"),\n", " mlflow.types.schema.ColSpec(\"double\", \"age\"),\n", "])\n", "\n", "output_schema = mlflow.types.schema.Schema([\n", " mlflow.types.schema.ColSpec(\"double\", \"present_price\"),\n", "])\n", "\n", "mlflow_model_signature = mlflow.models.ModelSignature(inputs=input_schema, outputs=output_schema)" ] }, { "cell_type": "code", "execution_count": 26, "id": "e014b988", "metadata": {}, "outputs": [], "source": [ "def build_features_scaler_standard():\n", " return sklearn.preprocessing.StandardScaler()" ] }, { "cell_type": "code", "execution_count": 27, "id": "4e513ece", "metadata": {}, "outputs": [], "source": [ "#def build_categorical_features_encoder_onehot():\n", "# return sklearn.preprocessing.OneHotEncoder()\n", "\n", "def build_categorical_features_encoder_target(*, random_state=None):\n", " return sklearn.preprocessing.TargetEncoder(\n", " target_type='continuous', smooth='auto', shuffle=True, random_state=random_state,\n", " )" ] }, { "cell_type": "markdown", "id": "814626b7", "metadata": {}, "source": [ "Регрессор — небольшой случайный лес, цель — минимизация квадрата ошибки предсказания:" ] }, { "cell_type": "code", "execution_count": 28, "id": "e46bedcf", "metadata": {}, "outputs": [], "source": [ "def build_regressor(n_estimators, *, max_depth=None, max_features='sqrt', random_state=None):\n", " return sklearn.ensemble.RandomForestRegressor(\n", " n_estimators, criterion='squared_error',\n", " max_depth=max_depth, max_features=max_features,\n", " random_state=random_state,\n", " )\n", "\n", "def build_regressor_baseline(*, random_state=None):\n", " return build_regressor(10, max_depth=8, max_features='sqrt')" ] }, { "cell_type": "code", "execution_count": 29, "id": "e3d8b2f0-e0cd-4fcf-9dd2-bd01f903b9ad", "metadata": {}, "outputs": [], "source": [ "def score_predictions(target_test, target_test_predicted):\n", " return {\n", " 'mse': sklearn.metrics.mean_squared_error(target_test, target_test_predicted),\n", " 'mae': sklearn.metrics.mean_absolute_error(target_test, target_test_predicted),\n", " 'mape': sklearn.metrics.mean_absolute_percentage_error(target_test, target_test_predicted),\n", " }" ] }, { "cell_type": "code", "execution_count": 30, "id": "b62aca3d-d1c6-4075-aded-d4017bdc2129", "metadata": {}, "outputs": [], "source": [ "# использует глобальные переменные mlflow_do_log, mlflow_experiment, mlflow_root_run_name\n", "def mlflow_log_model(\n", " model,\n", " model_params,\n", " metrics,\n", " *,\n", " nested_run_name,\n", " model_signature=None,\n", " input_example=None,\n", " pip_requirements=None,\n", " #global_comment_file_path=None,\n", " extra_logs_handler=None,\n", "):\n", " global mlflow_root_run_id\n", " if not mlflow_do_log:\n", " return\n", " experiment_id = mlflow_experiment.experiment_id\n", " start_run_root_kwargs_extra = {}\n", " if mlflow_root_run_id is not None:\n", " start_run_root_kwargs_extra['run_id'] = mlflow_root_run_id\n", " else:\n", " start_run_root_kwargs_extra['run_name'] = mlflow_root_run_name\n", " with mlflow.start_run(experiment_id=experiment_id, **start_run_root_kwargs_extra) as root_run:\n", " if root_run.info.status not in ('RUNNING',):\n", " raise RuntimeError('Cannot get the root run to run')\n", " if mlflow_root_run_id is None:\n", " mlflow_root_run_id = root_run.info.run_id\n", " # важно одновременно использовать nested=True и parent_run_id=...:\n", " with mlflow.start_run(experiment_id=experiment_id, run_name=nested_run_name, nested=True, parent_run_id=mlflow_root_run_id):\n", " if isinstance(pip_requirements, pathlib.PurePath):\n", " pip_requirements = str(pip_requirements)\n", " _ = mlflow.sklearn.log_model(\n", " model,\n", " 'model',\n", " signature=model_signature,\n", " input_example=input_example,\n", " pip_requirements=pip_requirements,\n", " )\n", " if model_params is not None:\n", " _ = mlflow.log_params(model_params)\n", " if metrics is not None:\n", " _ = mlflow.log_metrics(metrics)\n", " #if (global_comment_file_path is not None) and global_comment_file_path.exists():\n", " # mlflow.log_artifact(str(global_comment_file_path))\n", " if extra_logs_handler is not None:\n", " if callable(extra_logs_handler) and (not isinstance(extra_logs_handler, Collection)):\n", " extra_logs_handler = (extra_logs_handler,)\n", " for extr_logs_handler_fn in extra_logs_handler:\n", " extr_logs_handler_fn(mlflow)" ] }, { "cell_type": "markdown", "id": "9271ef07", "metadata": {}, "source": [ "### Baseline модель" ] }, { "cell_type": "markdown", "id": "80a5e4c5", "metadata": {}, "source": [ "Пайплайн предобработки признаков:" ] }, { "cell_type": "code", "execution_count": 31, "id": "869bae01", "metadata": {}, "outputs": [], "source": [ "preprocess_transformer = sklearn.compose.ColumnTransformer(\n", " [\n", " ('scale_to_standard', build_features_scaler_standard(), features_to_scale_to_standard_columns),\n", " (\n", " #'encode_categoricals_one_hot',\n", " 'encode_categoricals_wrt_target',\n", " #build_categorical_features_encoder_onehot(),\n", " build_categorical_features_encoder_target(random_state=0x2ED6),\n", " features_to_encode_wrt_target_columns,\n", " ),\n", " ],\n", " remainder='drop',\n", ")" ] }, { "cell_type": "code", "execution_count": 32, "id": "8959cb29", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=10)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor = build_regressor_baseline(random_state=0x016B)\n", "regressor" ] }, { "cell_type": "markdown", "id": "cb0f1a67", "metadata": {}, "source": [ "Составной пайплайн:" ] }, { "cell_type": "code", "execution_count": 33, "id": "2ef69753", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('preprocess',\n",
       "                 ColumnTransformer(transformers=[('scale_to_standard',\n",
       "                                                  StandardScaler(),\n",
       "                                                  ('selling_price',\n",
       "                                                   'driven_kms', 'age')),\n",
       "                                                 ('encode_categoricals_wrt_target',\n",
       "                                                  TargetEncoder(random_state=11990,\n",
       "                                                                target_type='continuous'),\n",
       "                                                  ('fuel_type', 'selling_type',\n",
       "                                                   'transmission'))])),\n",
       "                ('regress',\n",
       "                 RandomForestRegressor(max_depth=8, max_features='sqrt',\n",
       "                                       n_estimators=10))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('scale_to_standard',\n", " StandardScaler(),\n", " ('selling_price',\n", " 'driven_kms', 'age')),\n", " ('encode_categoricals_wrt_target',\n", " TargetEncoder(random_state=11990,\n", " target_type='continuous'),\n", " ('fuel_type', 'selling_type',\n", " 'transmission'))])),\n", " ('regress',\n", " RandomForestRegressor(max_depth=8, max_features='sqrt',\n", " n_estimators=10))])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = sklearn.pipeline.Pipeline([\n", " ('preprocess', preprocess_transformer),\n", " ('regress', regressor),\n", "])\n", "pipeline" ] }, { "cell_type": "code", "execution_count": 34, "id": "a38b50f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'preprocess__remainder': 'drop',\n", " 'preprocess__sparse_threshold': 0.3,\n", " 'preprocess__transformer_weights': None,\n", " 'preprocess__scale_to_standard__with_mean': True,\n", " 'preprocess__scale_to_standard__with_std': True,\n", " 'regress__bootstrap': True,\n", " 'regress__ccp_alpha': 0.0,\n", " 'regress__criterion': 'squared_error',\n", " 'regress__max_depth': 8,\n", " 'regress__max_features': 'sqrt',\n", " 'regress__max_leaf_nodes': None,\n", " 'regress__max_samples': None,\n", " 'regress__min_impurity_decrease': 0.0,\n", " 'regress__min_samples_leaf': 1,\n", " 'regress__min_samples_split': 2,\n", " 'regress__min_weight_fraction_leaf': 0.0,\n", " 'regress__monotonic_cst': None,\n", " 'regress__n_estimators': 10,\n", " 'regress__oob_score': False,\n", " 'regress__random_state': None}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_params = filter_params(\n", " pipeline.get_params(),\n", " include={\n", " 'preprocess': (\n", " False,\n", " {\n", " **{k: True for k in COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE},\n", " 'scale_to_standard': True,\n", " 'encode_categorical_wrt_target': True,\n", " },\n", " ),\n", " 'regress': (False, True),\n", " },\n", " exclude={\n", " 'preprocess': {'scale_to_standard': STANDARD_SCALER_PARAMS_COMMON_EXCLUDE},\n", " 'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE,\n", " },\n", ")\n", "model_params" ] }, { "cell_type": "markdown", "id": "4064c359", "metadata": {}, "source": [ "Обучение модели:" ] }, { "cell_type": "code", "execution_count": 35, "id": "9639f2f4", "metadata": {}, "outputs": [], "source": [ "_ = pipeline.fit(df_orig_features_train, df_target_train.iloc[:, 0])" ] }, { "cell_type": "markdown", "id": "d385bf67", "metadata": {}, "source": [ "Оценка качества:" ] }, { "cell_type": "code", "execution_count": 36, "id": "c15e4e08", "metadata": {}, "outputs": [], "source": [ "target_test_predicted = pipeline.predict(df_orig_features_test)" ] }, { "cell_type": "markdown", "id": "24e1b454", "metadata": {}, "source": [ "Метрики качества (MAPE, а также MSE, MAE):" ] }, { "cell_type": "code", "execution_count": 37, "id": "ec74bb87", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mse': 1.1769122812432413,\n", " 'mae': 0.7433282022345273,\n", " 'mape': 0.3469466962984192}" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = score_predictions(df_target_test, target_test_predicted)\n", "metrics" ] }, { "cell_type": "code", "execution_count": 38, "id": "1f6b1ca5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9ebfedda037646158f6e4acd2cbab0e5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading artifacts: 0%| | 0/7 [00:00#sk-container-id-3 {\n", " /* Definition of color scheme common for light and dark mode */\n", " --sklearn-color-text: #000;\n", " --sklearn-color-text-muted: #666;\n", " --sklearn-color-line: gray;\n", " /* Definition of color scheme for unfitted estimators */\n", " --sklearn-color-unfitted-level-0: #fff5e6;\n", " --sklearn-color-unfitted-level-1: #f6e4d2;\n", " --sklearn-color-unfitted-level-2: #ffe0b3;\n", " --sklearn-color-unfitted-level-3: chocolate;\n", " /* Definition of color scheme for fitted estimators */\n", " --sklearn-color-fitted-level-0: #f0f8ff;\n", " --sklearn-color-fitted-level-1: #d4ebff;\n", " --sklearn-color-fitted-level-2: #b3dbfd;\n", " --sklearn-color-fitted-level-3: cornflowerblue;\n", "\n", " /* Specific color for light theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-icon: #696969;\n", "\n", " @media (prefers-color-scheme: dark) {\n", " /* Redefinition of color scheme for dark theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-icon: #878787;\n", " }\n", "}\n", "\n", "#sk-container-id-3 {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "#sk-container-id-3 pre {\n", " padding: 0;\n", "}\n", "\n", "#sk-container-id-3 input.sk-hidden--visually {\n", " border: 0;\n", " clip: rect(1px 1px 1px 1px);\n", " clip: rect(1px, 1px, 1px, 1px);\n", " height: 1px;\n", " margin: -1px;\n", " overflow: hidden;\n", " padding: 0;\n", " position: absolute;\n", " width: 1px;\n", "}\n", "\n", "#sk-container-id-3 div.sk-dashed-wrapped {\n", " border: 1px dashed var(--sklearn-color-line);\n", " margin: 0 0.4em 0.5em 0.4em;\n", " box-sizing: border-box;\n", " padding-bottom: 0.4em;\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "#sk-container-id-3 div.sk-container {\n", " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", " so we also need the `!important` here to be able to override the\n", " default hidden behavior on the sphinx rendered scikit-learn.org.\n", " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", " display: inline-block !important;\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-3 div.sk-text-repr-fallback {\n", " display: none;\n", "}\n", "\n", "div.sk-parallel-item,\n", "div.sk-serial,\n", "div.sk-item {\n", " /* draw centered vertical line to link estimators */\n", " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", " background-size: 2px 100%;\n", " background-repeat: no-repeat;\n", " background-position: center center;\n", "}\n", "\n", "/* Parallel-specific style estimator block */\n", "\n", "#sk-container-id-3 div.sk-parallel-item::after {\n", " content: \"\";\n", " width: 100%;\n", " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", " flex-grow: 1;\n", "}\n", "\n", "#sk-container-id-3 div.sk-parallel {\n", " display: flex;\n", " align-items: stretch;\n", " justify-content: center;\n", " background-color: var(--sklearn-color-background);\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-3 div.sk-parallel-item {\n", " display: flex;\n", " flex-direction: column;\n", "}\n", "\n", "#sk-container-id-3 div.sk-parallel-item:first-child::after {\n", " align-self: flex-end;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-3 div.sk-parallel-item:last-child::after {\n", " align-self: flex-start;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-3 div.sk-parallel-item:only-child::after {\n", " width: 0;\n", "}\n", "\n", "/* Serial-specific style estimator block */\n", "\n", "#sk-container-id-3 div.sk-serial {\n", " display: flex;\n", " flex-direction: column;\n", " align-items: center;\n", " background-color: var(--sklearn-color-background);\n", " padding-right: 1em;\n", " padding-left: 1em;\n", "}\n", "\n", "\n", "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", "clickable and can be expanded/collapsed.\n", "- Pipeline and ColumnTransformer use this feature and define the default style\n", "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", "*/\n", "\n", "/* Pipeline and ColumnTransformer style (default) */\n", "\n", "#sk-container-id-3 div.sk-toggleable {\n", " /* Default theme specific background. It is overwritten whether we have a\n", " specific estimator or a Pipeline/ColumnTransformer */\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "/* Toggleable label */\n", "#sk-container-id-3 label.sk-toggleable__label {\n", " cursor: pointer;\n", " display: flex;\n", " width: 100%;\n", " margin-bottom: 0;\n", " padding: 0.5em;\n", " box-sizing: border-box;\n", " text-align: center;\n", " align-items: start;\n", " justify-content: space-between;\n", " gap: 0.5em;\n", "}\n", "\n", "#sk-container-id-3 label.sk-toggleable__label .caption {\n", " font-size: 0.6rem;\n", " font-weight: lighter;\n", " color: var(--sklearn-color-text-muted);\n", "}\n", "\n", "#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n", " /* Arrow on the left of the label */\n", " content: \"▸\";\n", " float: left;\n", " margin-right: 0.25em;\n", " color: var(--sklearn-color-icon);\n", "}\n", "\n", "#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "/* Toggleable content - dropdown */\n", "\n", "#sk-container-id-3 div.sk-toggleable__content {\n", " display: none;\n", " text-align: left;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-3 div.sk-toggleable__content.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-3 div.sk-toggleable__content pre {\n", " margin: 0.2em;\n", " border-radius: 0.25em;\n", " color: var(--sklearn-color-text);\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", " /* Expand drop-down */\n", " display: block;\n", " width: 100%;\n", " overflow: visible;\n", "}\n", "\n", "#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", " content: \"▾\";\n", "}\n", "\n", "/* Pipeline/ColumnTransformer-specific style */\n", "\n", "#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator-specific style */\n", "\n", "/* Colorize estimator box */\n", "#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n", "#sk-container-id-3 div.sk-label label {\n", " /* The background is the default theme color */\n", " color: var(--sklearn-color-text-on-default-background);\n", "}\n", "\n", "/* On hover, darken the color of the background */\n", "#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "/* Label box, darken color on hover, fitted */\n", "#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator label */\n", "\n", "#sk-container-id-3 div.sk-label label {\n", " font-family: monospace;\n", " font-weight: bold;\n", " display: inline-block;\n", " line-height: 1.2em;\n", "}\n", "\n", "#sk-container-id-3 div.sk-label-container {\n", " text-align: center;\n", "}\n", "\n", "/* Estimator-specific */\n", "#sk-container-id-3 div.sk-estimator {\n", " font-family: monospace;\n", " border: 1px dotted var(--sklearn-color-border-box);\n", " border-radius: 0.25em;\n", " box-sizing: border-box;\n", " margin-bottom: 0.5em;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-3 div.sk-estimator.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "/* on hover */\n", "#sk-container-id-3 div.sk-estimator:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-3 div.sk-estimator.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", "\n", "/* Common style for \"i\" and \"?\" */\n", "\n", ".sk-estimator-doc-link,\n", "a:link.sk-estimator-doc-link,\n", "a:visited.sk-estimator-doc-link {\n", " float: right;\n", " font-size: smaller;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1em;\n", " height: 1em;\n", " width: 1em;\n", " text-decoration: none !important;\n", " margin-left: 0.5em;\n", " text-align: center;\n", " /* unfitted */\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-unfitted-level-1);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted,\n", "a:link.sk-estimator-doc-link.fitted,\n", "a:visited.sk-estimator-doc-link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "/* Span, style for the box shown on hovering the info icon */\n", ".sk-estimator-doc-link span {\n", " display: none;\n", " z-index: 9999;\n", " position: relative;\n", " font-weight: normal;\n", " right: .2ex;\n", " padding: .5ex;\n", " margin: .5ex;\n", " width: min-content;\n", " min-width: 20ex;\n", " max-width: 50ex;\n", " color: var(--sklearn-color-text);\n", " box-shadow: 2pt 2pt 4pt #999;\n", " /* unfitted */\n", " background: var(--sklearn-color-unfitted-level-0);\n", " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted span {\n", " /* fitted */\n", " background: var(--sklearn-color-fitted-level-0);\n", " border: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link:hover span {\n", " display: block;\n", "}\n", "\n", "/* \"?\"-specific style due to the `` HTML tag */\n", "\n", "#sk-container-id-3 a.estimator_doc_link {\n", " float: right;\n", " font-size: 1rem;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1rem;\n", " height: 1rem;\n", " width: 1rem;\n", " text-decoration: none;\n", " /* unfitted */\n", " color: var(--sklearn-color-unfitted-level-1);\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", "}\n", "\n", "#sk-container-id-3 a.estimator_doc_link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "#sk-container-id-3 a.estimator_doc_link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".estimator-table summary {\n", " padding: .5rem;\n", " font-family: monospace;\n", " cursor: pointer;\n", "}\n", "\n", ".estimator-table details[open] {\n", " padding-left: 0.1rem;\n", " padding-right: 0.1rem;\n", " padding-bottom: 0.3rem;\n", "}\n", "\n", ".estimator-table .parameters-table {\n", " margin-left: auto !important;\n", " margin-right: auto !important;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(odd) {\n", " background-color: #fff;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(even) {\n", " background-color: #f6f6f6;\n", "}\n", "\n", ".estimator-table .parameters-table tr:hover {\n", " background-color: #e0e0e0;\n", "}\n", "\n", ".estimator-table table td {\n", " border: 1px solid rgba(106, 105, 104, 0.232);\n", "}\n", "\n", ".user-set td {\n", " color:rgb(255, 94, 0);\n", " text-align: left;\n", "}\n", "\n", ".user-set td.value pre {\n", " color:rgb(255, 94, 0) !important;\n", " background-color: transparent !important;\n", "}\n", "\n", ".default td {\n", " color: black;\n", " text-align: left;\n", "}\n", "\n", ".user-set td i,\n", ".default td i {\n", " color: black;\n", "}\n", "\n", ".copy-paste-icon {\n", " background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n", " background-repeat: no-repeat;\n", " background-size: 14px 14px;\n", " background-position: 0;\n", " display: inline-block;\n", " width: 14px;\n", " height: 14px;\n", " cursor: pointer;\n", "}\n", "
ColumnTransformer(transformers=[('extend_features_as_polynomial',\n",
       "                                 Pipeline(steps=[('extend_features',\n",
       "                                                  PolynomialFeatures(include_bias=False)),\n",
       "                                                 ('scale_to_standard',\n",
       "                                                  StandardScaler())]),\n",
       "                                 ('selling_price', 'driven_kms')),\n",
       "                                ('extend_features_as_spline',\n",
       "                                 SplineTransformer(include_bias=False,\n",
       "                                                   knots='quantile',\n",
       "                                                   n_knots=4),\n",
       "                                 ('age',)),\n",
       "                                ('scale_to_standard', StandardScaler(),\n",
       "                                 ('age',)),\n",
       "                                ('encode_categoricals_wrt_target',\n",
       "                                 TargetEncoder(random_state=11990,\n",
       "                                               target_type='continuous'),\n",
       "                                 ('fuel_type', 'selling_type',\n",
       "                                  'transmission'))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "ColumnTransformer(transformers=[('extend_features_as_polynomial',\n", " Pipeline(steps=[('extend_features',\n", " PolynomialFeatures(include_bias=False)),\n", " ('scale_to_standard',\n", " StandardScaler())]),\n", " ('selling_price', 'driven_kms')),\n", " ('extend_features_as_spline',\n", " SplineTransformer(include_bias=False,\n", " knots='quantile',\n", " n_knots=4),\n", " ('age',)),\n", " ('scale_to_standard', StandardScaler(),\n", " ('age',)),\n", " ('encode_categoricals_wrt_target',\n", " TargetEncoder(random_state=11990,\n", " target_type='continuous'),\n", " ('fuel_type', 'selling_type',\n", " 'transmission'))])" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preprocess_transformer = build_preprocess_augmenting_transformer()\n", "preprocess_transformer" ] }, { "cell_type": "markdown", "id": "0c041b34-bd18-4f26-b6cb-9f4567a4bc65", "metadata": {}, "source": [ "Демонстрация предобработки данных:" ] }, { "cell_type": "code", "execution_count": 43, "id": "df3207ab-36ea-417d-b10c-05145d6e3777", "metadata": {}, "outputs": [], "source": [ "preprocess_transformer_tmp = build_preprocess_augmenting_transformer()\n", "df_augd_features_matrix_train = preprocess_transformer_tmp.fit_transform(df_orig_features_train, df_target_train.iloc[:, 0])\n", "df_augd_features_train = pandas_dataframe_from_transformed_artifacts(df_augd_features_matrix_train, preprocess_transformer_tmp)\n", "del preprocess_transformer_tmp" ] }, { "cell_type": "markdown", "id": "41cc8af7-56d8-4d2d-b536-cbff20bb2545", "metadata": {}, "source": [ "Обзор предобработанного датасета:" ] }, { "cell_type": "code", "execution_count": 44, "id": "ec3f4c72-edea-4260-9a26-9b0d57628e9e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 224 entries, 0 to 223\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 extend_features_as_polynomial__selling_price 224 non-null float64\n", " 1 extend_features_as_polynomial__driven_kms 224 non-null float64\n", " 2 extend_features_as_polynomial__selling_price^2 224 non-null float64\n", " 3 extend_features_as_polynomial__selling_price driven_kms 224 non-null float64\n", " 4 extend_features_as_polynomial__driven_kms^2 224 non-null float64\n", " 5 extend_features_as_spline__age_sp_0 224 non-null float64\n", " 6 extend_features_as_spline__age_sp_1 224 non-null float64\n", " 7 extend_features_as_spline__age_sp_2 224 non-null float64\n", " 8 extend_features_as_spline__age_sp_3 224 non-null float64\n", " 9 extend_features_as_spline__age_sp_4 224 non-null float64\n", " 10 scale_to_standard__age 224 non-null float64\n", " 11 encode_categoricals_wrt_target__fuel_type 224 non-null float64\n", " 12 encode_categoricals_wrt_target__selling_type 224 non-null float64\n", " 13 encode_categoricals_wrt_target__transmission 224 non-null float64\n", "dtypes: float64(14)\n", "memory usage: 24.6 KB\n" ] } ], "source": [ "df_augd_features_train.info()" ] }, { "cell_type": "code", "execution_count": 45, "id": "393d1826-963c-4479-b004-6fedf2f6dc77", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
extend_features_as_polynomial__selling_priceextend_features_as_polynomial__driven_kmsextend_features_as_polynomial__selling_price^2extend_features_as_polynomial__selling_price driven_kmsextend_features_as_polynomial__driven_kms^2extend_features_as_spline__age_sp_0extend_features_as_spline__age_sp_1extend_features_as_spline__age_sp_2extend_features_as_spline__age_sp_3extend_features_as_spline__age_sp_4scale_to_standard__ageencode_categoricals_wrt_target__fuel_typeencode_categoricals_wrt_target__selling_typeencode_categoricals_wrt_target__transmission
0-0.104244-0.059337-0.160142-0.184156-0.2133920.0000000.0000000.2844440.6143430.0998790.9831593.4180666.7230444.251590
10.524405-0.9309840.023111-0.341051-0.4670470.0493830.5283950.4177780.0044440.000000-1.1412239.3746556.4008213.750236
2-0.364071-0.699614-0.204196-0.411821-0.4272500.0061730.3035490.6547220.0355560.000000-0.7871593.3134047.0181164.015122
3-0.686652-0.942552-0.233103-0.493887-0.4685140.0061730.3035490.6547220.0355560.000000-0.7871593.5320720.6731514.202766
4-0.2914070.090899-0.193742-0.236248-0.1411380.0000000.1000000.7800000.1200000.000000-0.4330964.9681117.1611094.059384
5-0.747205-0.236874-0.235345-0.474524-0.2879600.0000000.0000000.1905560.6402020.1647421.3372223.0806850.6971193.750236
60.0267711.112782-0.1309000.2464120.5729310.0000000.0000000.0000000.2272730.6060613.8156673.5320726.6754064.202766
7-0.180210-0.066162-0.174939-0.219328-0.2164750.0000000.1000000.7800000.1200000.000000-0.4330963.2843267.1611094.059384
\n", "
" ], "text/plain": [ " extend_features_as_polynomial__selling_price \\\n", "0 -0.104244 \n", "1 0.524405 \n", "2 -0.364071 \n", "3 -0.686652 \n", "4 -0.291407 \n", "5 -0.747205 \n", "6 0.026771 \n", "7 -0.180210 \n", "\n", " extend_features_as_polynomial__driven_kms \\\n", "0 -0.059337 \n", "1 -0.930984 \n", "2 -0.699614 \n", "3 -0.942552 \n", "4 0.090899 \n", "5 -0.236874 \n", "6 1.112782 \n", "7 -0.066162 \n", "\n", " extend_features_as_polynomial__selling_price^2 \\\n", "0 -0.160142 \n", "1 0.023111 \n", "2 -0.204196 \n", "3 -0.233103 \n", "4 -0.193742 \n", "5 -0.235345 \n", "6 -0.130900 \n", "7 -0.174939 \n", "\n", " extend_features_as_polynomial__selling_price driven_kms \\\n", "0 -0.184156 \n", "1 -0.341051 \n", "2 -0.411821 \n", "3 -0.493887 \n", "4 -0.236248 \n", "5 -0.474524 \n", "6 0.246412 \n", "7 -0.219328 \n", "\n", " extend_features_as_polynomial__driven_kms^2 \\\n", "0 -0.213392 \n", "1 -0.467047 \n", "2 -0.427250 \n", "3 -0.468514 \n", "4 -0.141138 \n", "5 -0.287960 \n", "6 0.572931 \n", "7 -0.216475 \n", "\n", " extend_features_as_spline__age_sp_0 extend_features_as_spline__age_sp_1 \\\n", "0 0.000000 0.000000 \n", "1 0.049383 0.528395 \n", "2 0.006173 0.303549 \n", "3 0.006173 0.303549 \n", "4 0.000000 0.100000 \n", "5 0.000000 0.000000 \n", "6 0.000000 0.000000 \n", "7 0.000000 0.100000 \n", "\n", " extend_features_as_spline__age_sp_2 extend_features_as_spline__age_sp_3 \\\n", "0 0.284444 0.614343 \n", "1 0.417778 0.004444 \n", "2 0.654722 0.035556 \n", "3 0.654722 0.035556 \n", "4 0.780000 0.120000 \n", "5 0.190556 0.640202 \n", "6 0.000000 0.227273 \n", "7 0.780000 0.120000 \n", "\n", " extend_features_as_spline__age_sp_4 scale_to_standard__age \\\n", "0 0.099879 0.983159 \n", "1 0.000000 -1.141223 \n", "2 0.000000 -0.787159 \n", "3 0.000000 -0.787159 \n", "4 0.000000 -0.433096 \n", "5 0.164742 1.337222 \n", "6 0.606061 3.815667 \n", "7 0.000000 -0.433096 \n", "\n", " encode_categoricals_wrt_target__fuel_type \\\n", "0 3.418066 \n", "1 9.374655 \n", "2 3.313404 \n", "3 3.532072 \n", "4 4.968111 \n", "5 3.080685 \n", "6 3.532072 \n", "7 3.284326 \n", "\n", " encode_categoricals_wrt_target__selling_type \\\n", "0 6.723044 \n", "1 6.400821 \n", "2 7.018116 \n", "3 0.673151 \n", "4 7.161109 \n", "5 0.697119 \n", "6 6.675406 \n", "7 7.161109 \n", "\n", " encode_categoricals_wrt_target__transmission \n", "0 4.251590 \n", "1 3.750236 \n", "2 4.015122 \n", "3 4.202766 \n", "4 4.059384 \n", "5 3.750236 \n", "6 4.202766 \n", "7 4.059384 " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_augd_features_train.head(0x8)" ] }, { "cell_type": "code", "execution_count": 46, "id": "2bb56d09", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=10)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor = build_regressor_baseline(random_state=0x3AEF)\n", "regressor" ] }, { "cell_type": "markdown", "id": "dd34c150", "metadata": {}, "source": [ "Составной пайплайн:" ] }, { "cell_type": "code", "execution_count": 47, "id": "ff9d2a85", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('preprocess',\n",
       "                 ColumnTransformer(transformers=[('extend_features_as_polynomial',\n",
       "                                                  Pipeline(steps=[('extend_features',\n",
       "                                                                   PolynomialFeatures(include_bias=False)),\n",
       "                                                                  ('scale_to_standard',\n",
       "                                                                   StandardScaler())]),\n",
       "                                                  ('selling_price',\n",
       "                                                   'driven_kms')),\n",
       "                                                 ('extend_features_as_spline',\n",
       "                                                  SplineTransformer(include_bias=False,\n",
       "                                                                    knots='quantile',\n",
       "                                                                    n_knots=4),\n",
       "                                                  ('age',)),\n",
       "                                                 ('scale_to_standard',\n",
       "                                                  StandardScaler(), ('age',)),\n",
       "                                                 ('encode_categoricals_wrt_target',\n",
       "                                                  TargetEncoder(random_state=11990,\n",
       "                                                                target_type='continuous'),\n",
       "                                                  ('fuel_type', 'selling_type',\n",
       "                                                   'transmission'))])),\n",
       "                ('regress',\n",
       "                 RandomForestRegressor(max_depth=8, max_features='sqrt',\n",
       "                                       n_estimators=10))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('extend_features_as_polynomial',\n", " Pipeline(steps=[('extend_features',\n", " PolynomialFeatures(include_bias=False)),\n", " ('scale_to_standard',\n", " StandardScaler())]),\n", " ('selling_price',\n", " 'driven_kms')),\n", " ('extend_features_as_spline',\n", " SplineTransformer(include_bias=False,\n", " knots='quantile',\n", " n_knots=4),\n", " ('age',)),\n", " ('scale_to_standard',\n", " StandardScaler(), ('age',)),\n", " ('encode_categoricals_wrt_target',\n", " TargetEncoder(random_state=11990,\n", " target_type='continuous'),\n", " ('fuel_type', 'selling_type',\n", " 'transmission'))])),\n", " ('regress',\n", " RandomForestRegressor(max_depth=8, max_features='sqrt',\n", " n_estimators=10))])" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = sklearn.pipeline.Pipeline([\n", " ('preprocess', preprocess_transformer),\n", " ('regress', regressor),\n", "])\n", "pipeline" ] }, { "cell_type": "code", "execution_count": 48, "id": "eec22b97", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'preprocess__remainder': 'drop',\n", " 'preprocess__sparse_threshold': 0.3,\n", " 'preprocess__transformer_weights': None,\n", " 'preprocess__extend_features_as_spline': SplineTransformer(include_bias=False, knots='quantile', n_knots=4),\n", " 'preprocess__extend_features_as_polynomial__extend_features': PolynomialFeatures(include_bias=False),\n", " 'preprocess__extend_features_as_polynomial__extend_features__degree': 2,\n", " 'preprocess__extend_features_as_polynomial__extend_features__include_bias': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__interaction_only': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__order': 'C',\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_mean': True,\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_std': True,\n", " 'preprocess__extend_features_as_spline__degree': 3,\n", " 'preprocess__extend_features_as_spline__extrapolation': 'constant',\n", " 'preprocess__extend_features_as_spline__include_bias': False,\n", " 'preprocess__extend_features_as_spline__knots': 'quantile',\n", " 'preprocess__extend_features_as_spline__n_knots': 4,\n", " 'preprocess__extend_features_as_spline__order': 'C',\n", " 'preprocess__extend_features_as_spline__sparse_output': False,\n", " 'preprocess__scale_to_standard__with_mean': True,\n", " 'preprocess__scale_to_standard__with_std': True,\n", " 'regress__bootstrap': True,\n", " 'regress__ccp_alpha': 0.0,\n", " 'regress__criterion': 'squared_error',\n", " 'regress__max_depth': 8,\n", " 'regress__max_features': 'sqrt',\n", " 'regress__max_leaf_nodes': None,\n", " 'regress__max_samples': None,\n", " 'regress__min_impurity_decrease': 0.0,\n", " 'regress__min_samples_leaf': 1,\n", " 'regress__min_samples_split': 2,\n", " 'regress__min_weight_fraction_leaf': 0.0,\n", " 'regress__monotonic_cst': None,\n", " 'regress__n_estimators': 10,\n", " 'regress__oob_score': False,\n", " 'regress__random_state': None}" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_params = filter_params(\n", " pipeline.get_params(),\n", " include={\n", " 'preprocess': (False, PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_INCLUDE.copy()),\n", " 'regress': (False, True),\n", " },\n", " exclude={\n", " 'preprocess': PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_EXCLUDE.copy(),\n", " 'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE,\n", " },\n", ")\n", "model_params" ] }, { "cell_type": "markdown", "id": "23519fd3", "metadata": {}, "source": [ "Обучение модели:" ] }, { "cell_type": "code", "execution_count": 49, "id": "95f8079b", "metadata": {}, "outputs": [], "source": [ "_ = pipeline.fit(df_orig_features_train, df_target_train.iloc[:, 0])" ] }, { "cell_type": "markdown", "id": "5627ba08-b0a9-4316-a4a9-630047cec1cc", "metadata": {}, "source": [ "Оценка качества:" ] }, { "cell_type": "code", "execution_count": 50, "id": "a88ccce4-cb32-4810-982e-9b126778d611", "metadata": {}, "outputs": [], "source": [ "target_test_predicted = pipeline.predict(df_orig_features_test)" ] }, { "cell_type": "markdown", "id": "5cef4314-5872-4f6e-9355-316c0419158a", "metadata": {}, "source": [ "Метрики качества (MAPE, а также MSE, MAE):" ] }, { "cell_type": "code", "execution_count": 51, "id": "bfd94c07-05ec-45c9-b49e-ddf861d39d06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mse': 1.5006829920671902,\n", " 'mae': 0.7582020656775502,\n", " 'mape': 0.30794862210624835}" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = score_predictions(df_target_test, target_test_predicted)\n", "metrics" ] }, { "cell_type": "code", "execution_count": 52, "id": "80b5ab1d-234c-4b18-98a6-eef0520677ec", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5821a1adbbe242a882fed4dd765843c8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading artifacts: 0%| | 0/7 [00:00#sk-container-id-6 {\n", " /* Definition of color scheme common for light and dark mode */\n", " --sklearn-color-text: #000;\n", " --sklearn-color-text-muted: #666;\n", " --sklearn-color-line: gray;\n", " /* Definition of color scheme for unfitted estimators */\n", " --sklearn-color-unfitted-level-0: #fff5e6;\n", " --sklearn-color-unfitted-level-1: #f6e4d2;\n", " --sklearn-color-unfitted-level-2: #ffe0b3;\n", " --sklearn-color-unfitted-level-3: chocolate;\n", " /* Definition of color scheme for fitted estimators */\n", " --sklearn-color-fitted-level-0: #f0f8ff;\n", " --sklearn-color-fitted-level-1: #d4ebff;\n", " --sklearn-color-fitted-level-2: #b3dbfd;\n", " --sklearn-color-fitted-level-3: cornflowerblue;\n", "\n", " /* Specific color for light theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-icon: #696969;\n", "\n", " @media (prefers-color-scheme: dark) {\n", " /* Redefinition of color scheme for dark theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-icon: #878787;\n", " }\n", "}\n", "\n", "#sk-container-id-6 {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "#sk-container-id-6 pre {\n", " padding: 0;\n", "}\n", "\n", "#sk-container-id-6 input.sk-hidden--visually {\n", " border: 0;\n", " clip: rect(1px 1px 1px 1px);\n", " clip: rect(1px, 1px, 1px, 1px);\n", " height: 1px;\n", " margin: -1px;\n", " overflow: hidden;\n", " padding: 0;\n", " position: absolute;\n", " width: 1px;\n", "}\n", "\n", "#sk-container-id-6 div.sk-dashed-wrapped {\n", " border: 1px dashed var(--sklearn-color-line);\n", " margin: 0 0.4em 0.5em 0.4em;\n", " box-sizing: border-box;\n", " padding-bottom: 0.4em;\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "#sk-container-id-6 div.sk-container {\n", " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", " so we also need the `!important` here to be able to override the\n", " default hidden behavior on the sphinx rendered scikit-learn.org.\n", " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", " display: inline-block !important;\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-6 div.sk-text-repr-fallback {\n", " display: none;\n", "}\n", "\n", "div.sk-parallel-item,\n", "div.sk-serial,\n", "div.sk-item {\n", " /* draw centered vertical line to link estimators */\n", " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", " background-size: 2px 100%;\n", " background-repeat: no-repeat;\n", " background-position: center center;\n", "}\n", "\n", "/* Parallel-specific style estimator block */\n", "\n", "#sk-container-id-6 div.sk-parallel-item::after {\n", " content: \"\";\n", " width: 100%;\n", " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", " flex-grow: 1;\n", "}\n", "\n", "#sk-container-id-6 div.sk-parallel {\n", " display: flex;\n", " align-items: stretch;\n", " justify-content: center;\n", " background-color: var(--sklearn-color-background);\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-6 div.sk-parallel-item {\n", " display: flex;\n", " flex-direction: column;\n", "}\n", "\n", "#sk-container-id-6 div.sk-parallel-item:first-child::after {\n", " align-self: flex-end;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-6 div.sk-parallel-item:last-child::after {\n", " align-self: flex-start;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-6 div.sk-parallel-item:only-child::after {\n", " width: 0;\n", "}\n", "\n", "/* Serial-specific style estimator block */\n", "\n", "#sk-container-id-6 div.sk-serial {\n", " display: flex;\n", " flex-direction: column;\n", " align-items: center;\n", " background-color: var(--sklearn-color-background);\n", " padding-right: 1em;\n", " padding-left: 1em;\n", "}\n", "\n", "\n", "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", "clickable and can be expanded/collapsed.\n", "- Pipeline and ColumnTransformer use this feature and define the default style\n", "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", "*/\n", "\n", "/* Pipeline and ColumnTransformer style (default) */\n", "\n", "#sk-container-id-6 div.sk-toggleable {\n", " /* Default theme specific background. It is overwritten whether we have a\n", " specific estimator or a Pipeline/ColumnTransformer */\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "/* Toggleable label */\n", "#sk-container-id-6 label.sk-toggleable__label {\n", " cursor: pointer;\n", " display: flex;\n", " width: 100%;\n", " margin-bottom: 0;\n", " padding: 0.5em;\n", " box-sizing: border-box;\n", " text-align: center;\n", " align-items: start;\n", " justify-content: space-between;\n", " gap: 0.5em;\n", "}\n", "\n", "#sk-container-id-6 label.sk-toggleable__label .caption {\n", " font-size: 0.6rem;\n", " font-weight: lighter;\n", " color: var(--sklearn-color-text-muted);\n", "}\n", "\n", "#sk-container-id-6 label.sk-toggleable__label-arrow:before {\n", " /* Arrow on the left of the label */\n", " content: \"▸\";\n", " float: left;\n", " margin-right: 0.25em;\n", " color: var(--sklearn-color-icon);\n", "}\n", "\n", "#sk-container-id-6 label.sk-toggleable__label-arrow:hover:before {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "/* Toggleable content - dropdown */\n", "\n", "#sk-container-id-6 div.sk-toggleable__content {\n", " display: none;\n", " text-align: left;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-6 div.sk-toggleable__content.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-6 div.sk-toggleable__content pre {\n", " margin: 0.2em;\n", " border-radius: 0.25em;\n", " color: var(--sklearn-color-text);\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-6 div.sk-toggleable__content.fitted pre {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-6 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", " /* Expand drop-down */\n", " display: block;\n", " width: 100%;\n", " overflow: visible;\n", "}\n", "\n", "#sk-container-id-6 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", " content: \"▾\";\n", "}\n", "\n", "/* Pipeline/ColumnTransformer-specific style */\n", "\n", "#sk-container-id-6 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-6 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator-specific style */\n", "\n", "/* Colorize estimator box */\n", "#sk-container-id-6 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-6 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "#sk-container-id-6 div.sk-label label.sk-toggleable__label,\n", "#sk-container-id-6 div.sk-label label {\n", " /* The background is the default theme color */\n", " color: var(--sklearn-color-text-on-default-background);\n", "}\n", "\n", "/* On hover, darken the color of the background */\n", "#sk-container-id-6 div.sk-label:hover label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "/* Label box, darken color on hover, fitted */\n", "#sk-container-id-6 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator label */\n", "\n", "#sk-container-id-6 div.sk-label label {\n", " font-family: monospace;\n", " font-weight: bold;\n", " display: inline-block;\n", " line-height: 1.2em;\n", "}\n", "\n", "#sk-container-id-6 div.sk-label-container {\n", " text-align: center;\n", "}\n", "\n", "/* Estimator-specific */\n", "#sk-container-id-6 div.sk-estimator {\n", " font-family: monospace;\n", " border: 1px dotted var(--sklearn-color-border-box);\n", " border-radius: 0.25em;\n", " box-sizing: border-box;\n", " margin-bottom: 0.5em;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-6 div.sk-estimator.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "/* on hover */\n", "#sk-container-id-6 div.sk-estimator:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-6 div.sk-estimator.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", "\n", "/* Common style for \"i\" and \"?\" */\n", "\n", ".sk-estimator-doc-link,\n", "a:link.sk-estimator-doc-link,\n", "a:visited.sk-estimator-doc-link {\n", " float: right;\n", " font-size: smaller;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1em;\n", " height: 1em;\n", " width: 1em;\n", " text-decoration: none !important;\n", " margin-left: 0.5em;\n", " text-align: center;\n", " /* unfitted */\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-unfitted-level-1);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted,\n", "a:link.sk-estimator-doc-link.fitted,\n", "a:visited.sk-estimator-doc-link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "/* Span, style for the box shown on hovering the info icon */\n", ".sk-estimator-doc-link span {\n", " display: none;\n", " z-index: 9999;\n", " position: relative;\n", " font-weight: normal;\n", " right: .2ex;\n", " padding: .5ex;\n", " margin: .5ex;\n", " width: min-content;\n", " min-width: 20ex;\n", " max-width: 50ex;\n", " color: var(--sklearn-color-text);\n", " box-shadow: 2pt 2pt 4pt #999;\n", " /* unfitted */\n", " background: var(--sklearn-color-unfitted-level-0);\n", " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted span {\n", " /* fitted */\n", " background: var(--sklearn-color-fitted-level-0);\n", " border: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link:hover span {\n", " display: block;\n", "}\n", "\n", "/* \"?\"-specific style due to the `` HTML tag */\n", "\n", "#sk-container-id-6 a.estimator_doc_link {\n", " float: right;\n", " font-size: 1rem;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1rem;\n", " height: 1rem;\n", " width: 1rem;\n", " text-decoration: none;\n", " /* unfitted */\n", " color: var(--sklearn-color-unfitted-level-1);\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", "}\n", "\n", "#sk-container-id-6 a.estimator_doc_link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "#sk-container-id-6 a.estimator_doc_link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "#sk-container-id-6 a.estimator_doc_link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".estimator-table summary {\n", " padding: .5rem;\n", " font-family: monospace;\n", " cursor: pointer;\n", "}\n", "\n", ".estimator-table details[open] {\n", " padding-left: 0.1rem;\n", " padding-right: 0.1rem;\n", " padding-bottom: 0.3rem;\n", "}\n", "\n", ".estimator-table .parameters-table {\n", " margin-left: auto !important;\n", " margin-right: auto !important;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(odd) {\n", " background-color: #fff;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(even) {\n", " background-color: #f6f6f6;\n", "}\n", "\n", ".estimator-table .parameters-table tr:hover {\n", " background-color: #e0e0e0;\n", "}\n", "\n", ".estimator-table table td {\n", " border: 1px solid rgba(106, 105, 104, 0.232);\n", "}\n", "\n", ".user-set td {\n", " color:rgb(255, 94, 0);\n", " text-align: left;\n", "}\n", "\n", ".user-set td.value pre {\n", " color:rgb(255, 94, 0) !important;\n", " background-color: transparent !important;\n", "}\n", "\n", ".default td {\n", " color: black;\n", " text-align: left;\n", "}\n", "\n", ".user-set td i,\n", ".default td i {\n", " color: black;\n", "}\n", "\n", ".copy-paste-icon {\n", " background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n", " background-repeat: no-repeat;\n", " background-size: 14px 14px;\n", " background-position: 0;\n", " display: inline-block;\n", " width: 14px;\n", " height: 14px;\n", " cursor: pointer;\n", "}\n", "
RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=10)" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor = build_regressor_baseline(random_state=0x8EDD)\n", "regressor" ] }, { "cell_type": "markdown", "id": "36ebb8f4-bb00-4d20-82e8-940a8798f4b1", "metadata": {}, "source": [ "Выбор признаков среди дополненного набора по минимизации MAPE:" ] }, { "cell_type": "code", "execution_count": 56, "id": "3cc243e6-f4e0-4b03-a4f2-8e8e4835466a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_augd_features_train.columns)" ] }, { "cell_type": "code", "execution_count": 57, "id": "9c5645b1-e052-43a8-9766-9ed7e62f7ebc", "metadata": {}, "outputs": [], "source": [ "FILTERED_FEATURES_NUM = (4, 8)" ] }, { "cell_type": "code", "execution_count": 58, "id": "8e897be5-ba9a-427a-9c36-dd1cdfa2727e", "metadata": {}, "outputs": [], "source": [ "def build_feature_selector(*, verbose=0):\n", " return build_sequential_feature_selector(\n", " regressor, k_features=FILTERED_FEATURES_NUM, forward=True, floating=True, cv=4, scoring='neg_mean_absolute_percentage_error',\n", " verbose=verbose,\n", " )" ] }, { "cell_type": "code", "execution_count": 59, "id": "e2294a0a-5a8d-4daf-aa55-c099dd5085d0", "metadata": {}, "outputs": [], "source": [ "FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE = {\n", " **{k: True for k in SEQUENTIAL_FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE},\n", " 'estimator': False,\n", "}\n", "FEATURE_SELECTOR_PARAMS_COMMON_EXCLUDE = () # TODO: ай-яй-яй" ] }, { "cell_type": "code", "execution_count": 60, "id": "dfd187a8-5b32-42c3-bc38-987b75bb2a2d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
SequentialFeatureSelector(cv=4,\n",
       "                          estimator=RandomForestRegressor(max_depth=8,\n",
       "                                                          max_features='sqrt',\n",
       "                                                          n_estimators=10),\n",
       "                          floating=True, k_features=(4, 8),\n",
       "                          scoring='neg_mean_absolute_percentage_error',\n",
       "                          verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "SequentialFeatureSelector(cv=4,\n", " estimator=RandomForestRegressor(max_depth=8,\n", " max_features='sqrt',\n", " n_estimators=10),\n", " floating=True, k_features=(4, 8),\n", " scoring='neg_mean_absolute_percentage_error',\n", " verbose=1)" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature_selector = build_feature_selector(verbose=1)\n", "feature_selector" ] }, { "cell_type": "code", "execution_count": 61, "id": "e8bf4bb3-e3af-4bc4-8ab5-7625e1e428e3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 14 out of 14 | elapsed: 0.6s finished\n", "Features: 1/8[Parallel(n_jobs=1)]: Done 13 out of 13 | elapsed: 0.5s finished\n", "Features: 2/8[Parallel(n_jobs=1)]: Done 12 out of 12 | elapsed: 0.5s finished\n", "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s finished\n", "Features: 3/8[Parallel(n_jobs=1)]: Done 11 out of 11 | elapsed: 0.4s finished\n", "[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 0.0s finished\n", "Features: 4/8[Parallel(n_jobs=1)]: Done 10 out of 10 | elapsed: 0.4s finished\n", "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 0.1s finished\n", "Features: 5/8[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 0.1s finished\n", "Features: 6/8[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 0.2s finished\n", "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 0.1s finished\n", "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 0.1s finished\n", "Features: 5/8[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 0.1s finished\n", "Features: 6/8[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 0.2s finished\n", "Features: 7/8[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.2s finished\n", "[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 0.2s finished\n", "Features: 7/8[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.2s finished\n", "[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.2s finished\n", "Features: 8/8" ] } ], "source": [ "_ = feature_selector.fit(df_augd_features_train, df_target_train.iloc[:, 0])" ] }, { "cell_type": "markdown", "id": "ed67ab27-023f-4639-85a0-cd4c3ef85dc8", "metadata": {}, "source": [ "Выбранные признаки (имена и индексы):" ] }, { "cell_type": "code", "execution_count": 62, "id": "66be5774-0ff7-43be-99df-b56c9165a4f7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'names': ('extend_features_as_polynomial__selling_price',\n", " 'extend_features_as_polynomial__selling_price^2',\n", " 'extend_features_as_spline__age_sp_1',\n", " 'extend_features_as_spline__age_sp_2',\n", " 'scale_to_standard__age'),\n", " 'indices': (0, 2, 6, 7, 10)}" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector)" ] }, { "cell_type": "markdown", "id": "1c7498fd-669b-4fec-83ad-7d688fd23698", "metadata": {}, "source": [ "MAPE в зависимости от количества выбранных признаков (указан регион выбора, ограниченный `FILTERED_FEATURES_NUM`):" ] }, { "cell_type": "code", "execution_count": 63, "id": "0180f3da-9775-451f-8262-b825f69228fc", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAG2CAYAAACwF7hHAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAbUxJREFUeJzt3Qd4VGX6NvB7eklP6NIEEbAjCGJHim1dRXcVO4ioq2ABC7giTUXsa9lFXUVdRdey+il/RUCwKyAKdiygKBBKejL1zMx3Pe/JhAnJhJyQMuX+eR2TKZmcHJKZe973Oc9rikQiERARERFRo5kbf1ciIiIiEgxQRERERAYxQBEREREZxABFREREZBADFBEREZFBDFBEREREBjFAERERERnEAEVERERkEAMUERERkUEMUERERESpHqAeffRR9OzZE06nE0OGDMGqVasavP/LL7+Mfv36qfsffPDBeOutt2rdLivZ3HbbbejcuTNcLhdGjBiBn376qYV/CiIiIkpmSRWg/vvf/2Ly5MmYMWMGvvjiCxx66KE46aSTsH379nrv/8knn+C8887D+PHj8eWXX+LMM89U2zfffFNzn7vvvhsPPfQQ5s+fj5UrVyIjI0M9ps/na8WfjIiIiJKJKZkWE5YRpyOOOAKPPPKIuhwOh9GtWzdMmjQJU6dOrXP/c889F1VVVVi0aFHNdUceeSQOO+wwFZjkR+/SpQumTJmCG264Qd1eVlaGjh074umnn8aYMWNa8acjIiKiZGFFkggEAlizZg2mTZtWc53ZbFZTbp9++mm9XyPXy4hVLBldev3119XnGzduRGFhoXqMqJycHBXU5GvjBSi/36+2KAlyxcXFKCgogMlk2uuflYiIiFqeDKRUVFSowRTJFCkZoHbu3IlQKKRGh2LJ5R9++KHer5FwVN/95fro7dHr4t2nPnPnzsWsWbOa/LMQERFR4vj999/RtWvX1AxQiURGwWJHtmTar3v37vjxxx+Rn5/fpvuWaILBIFasWIFhw4bBZrO19e4kDB6X+AK+AN774D0cc8AQWO18ioqlhUL46OvPcMzBR8JqsbT17iQUHpv4eGziKykuwYHHHIasrCwYlTTPTu3atYPFYsG2bdtqXS+XO3XqVO/XyPUN3T/6Ua6Ts/Bi7yN1UvE4HA617U7Ck0zjUe2g4Ha71XFhUNiFxyW+gNevjk1+QT7sDntb705CCWrart8ba9I8fbcKHpv4eGz2rCnlN0lzFp7dbsfAgQPx7rvv1qo9kstDhw6t92vk+tj7i6VLl9bcf99991UhKvY+5eXl6my8eI9JRERElFRRVKbNLrnkEgwaNAiDBw/Ggw8+qM6yGzdunLr94osvxj777KNqlMS1116L448/Hvfddx9OO+00vPjii/j888/x+OOP1yTO6667Drfffjv69OmjAtX06dNVMZm0OyAiIiJK+gAlbQl27NihGl9KkbdMsy1evLimCHzTpk21quiPOuooLFy4ELfeeituueUWFZLkDLyDDjqo5j433XSTCmGXX345SktLccwxx6jHlMabREREREkfoMTEiRPVVp/33nuvznV//etf1RaPjELNnj1bbUREREQpVQNFRERElCgYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiJK1QBVXFyMCy64ANnZ2cjNzcX48eNRWVnZ4Nf4fD5cffXVKCgoQGZmJs4++2xs27at1n1MJlOd7cUXX2zhn4aIiIiSWdIEKAlP3377LZYuXYpFixbhgw8+wOWXX97g11x//fV488038fLLL+P999/Hli1bcNZZZ9W534IFC7B169aa7cwzz2zBn4SIiIiSnRVJ4Pvvv8fixYuxevVqDBo0SF338MMP49RTT8W9996LLl261PmasrIyPPnkk1i4cCFOPPHEmqDUv39/fPbZZzjyyCNr7isjWp06dWrFn4iIiIiSWVIEqE8//VSFnGh4EiNGjIDZbMbKlSsxevToOl+zZs0aBINBdb+ofv36oXv37urxYgOUTPNddtll6NWrF6688kqMGzdOTeXF4/f71RZVXl6uPsr3k412iR4PHpfaeFziC2qa+qiFNJi0pBkkbxVyTGI/0i48NvHx2MSnhUJI6QBVWFiIDh061LrOarUiPz9f3Rbva+x2uwpesTp27Fjra2bPnq1GqNxuN5YsWYKrrrpK1VZdc801cfdn7ty5mDVrVp3rV6xYoR6H6pKpV6qLxyW+99Z+3Na7kLBWrPmwrXchYfHYxMdjU5fH40FSBqipU6di3rx5e5y+a0nTp0+v+XzAgAGoqqrCPffc02CAmjZtGiZPnlxrBKpbt24YNmyYKlinXWSERULCyJEjYbPZ2np3EgaPS3wBXwDLli/DCYcdDZvd3ta7k1BkBEFeBIcNPBZWS1K8/201PDbx8djEV1RUhKZq0yM5ZcoUjB07tsH7yLSa1Cdt37691vWapqkz8+LVLsn1gUAApaWltUah5Cy8huqdhgwZgjlz5qgpOofDUe995Pr6bpMXQr4Y1o/Hpn48LnVFtLD6KE/0Niuf7OvDYxMfj018PDZ1WS0WNFWbHsn27durbU+GDh2qgpDUNQ0cOFBdt3z5coTDYRV46iP3kxemd999V7UvEOvXr8emTZvU48Wzdu1a5OXlxQ1PREREREkRReXMuZNPPhkTJkzA/Pnz1fTHxIkTMWbMmJoz8DZv3ozhw4fj2WefxeDBg5GTk6N6RclUm9RKSf+oSZMmqfAULSCXFgcyIiWXnU6nmlK58847ccMNN7TxT0xERESJLCkClHj++edVaJKQJGffyajSQw89VHO7hCoZYYotCHvggQdq7itTcieddBL++c9/1twuI1SPPvqo6hcViUSw33774f7771dBjYiIiCjpA5SMIklPp3h69uypQlAsGVWSgCRbfWRUSzYiIiIiI9hkhYiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIUjVAFRcX44ILLkB2djZyc3Mxfvx4VFZWNvg1jz/+OE444QT1NSaTCaWlpc3yuERERJTekiZAScj59ttvsXTpUixatAgffPABLr/88ga/xuPx4OSTT8Ytt9zSrI9LRERE6c2KJPD9999j8eLFWL16NQYNGqSue/jhh3Hqqafi3nvvRZcuXer9uuuuu059fO+995r1cYmIiCi9JUWA+vTTT9X0WjTkiBEjRsBsNmPlypUYPXp0qz6u3+9XW1R5ebn6GAwG1Ua7RI8Hj0ttPC7xBTVNfdRCGkxa0gyStwo5JrEfaRcem/h4bOLTQiGkdIAqLCxEhw4dal1ntVqRn5+vbmvtx507dy5mzZpV5/oVK1bA7XY3eX9SmUyRUl08LvG9t/bjtt6FhLVizYdtvQsJi8cmPh6b+kt9kjJATZ06FfPmzWvwPjLNlmimTZuGyZMn1xqB6tatG4YNG4aCgoI23bdEIyMsEhJGjhwJm83W1ruTMHhc4gv4Ali2fBlOOOxo2Oz2tt6dhCIjCPIiOGzgsbBakuL9b6vhsYmPxya+oqIiNFWbHskpU6Zg7NixDd6nV69e6NSpE7Zv317rek3T1Bl0cltTNfVxHQ6H2nYnL4R8Mawfj039eFzqimhh9VGe6G1WPtnXh8cmPh6b+Hhs6rJaLGiqNj2S7du3V9ueDB06VLUgWLNmDQYOHKiuW758OcLhMIYMGdLk799Sj0tERESpLSkqNPv376/aEUyYMAGrVq3Cxx9/jIkTJ2LMmDE1Z8pt3rwZ/fr1U7dHSR3T2rVr8fPPP6vLX3/9tbosI0yNfVwiIiKipAxQ4vnnn1cBafjw4arNwDHHHKMaZcbWlKxfv75WQdj8+fMxYMAAFZDEcccdpy6/8cYbjX5cIiIiot0lzWSonBm3cOHCuLf37NkTkUik1nUzZ85U2948LhEREVHSjkARERERJQoGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiIiIiAxigCIiIiIyiAGKiIiIyCAGKCIiIiKDGKCIiIiIDGKAIiIiIjKIAYqIiIiotQKUpmlYtmwZHnvsMVRUVKjrtmzZgsrKyqY+JBEREVFSsDbli3777TecfPLJ2LRpE/x+P0aOHImsrCzMmzdPXZ4/f37z7ykRERFRMo9AXXvttRg0aBBKSkrgcrlqrh89ejTefffd5tw/IiIiotQYgfrwww/xySefwG6317q+Z8+e2Lx5c3PtGxEREVHqjECFw2GEQqE61//xxx9qKo+IiIgolTUpQI0aNQoPPvhgzWWTyaSKx2fMmIFTTz21OfePiIiIKDWm8O677z6cdNJJOOCAA+Dz+XD++efjp59+Qrt27fDCCy80/14SERERJXuA6tq1K9atW4f//ve/6qOMPo0fPx4XXHBBraJyIiIiolRkbfIXWq0qMMlGREQtK6CFUVzuVZ9v2F6OTKcDDqsFVosZNqsZNvloMcNkaus9JUoPTQpQc+fORceOHXHppZfWuv6pp57Cjh07cPPNNzfX/hERpTV/MITSKj+KKvzw+ALqukAgjJ0BH0LhsFShwmw2wWo2wWI2w2mzwOWwwG6xwGY1VQcsi7qdiNo4QEn38YULF9a5/sADD8SYMWMYoIiI9pLXr6Gkyo/iSr8KUW6HFXlZdmzfAWQ4rbBYdj19h8IRFaa0UBiV/iBKPX4got9msUi40kepnHarClgyUmW1mvSQxVErotYLUIWFhejcuXOd69u3b4+tW7c2bU+IiNJcJAJUBYIoqfCjtCqAYEiCkw1ZLr3nXiik1ft1FjX6ZIHdatnt8SRYRaCFwwgEw/D6fSpkCbPZXBOuHDJqZTfDYbXWjFqpzcxwRdSsAapbt274+OOPse+++9a6Xq7r0qVLUx6SiCitg1OlL6Cm6cq9AWihCDKdVmS7azcrNkpazFglJFnMgK32beHqYCWBqsofRLkngnAkDBNMNcFKPrpl1MpurR7FklErGb2ygDOClO6aFKAmTJiA6667DsFgECeeeKK6TpZwuemmmzBlypTm3kciopQUjgAV3gCKyn0o9walnEkFJ5ul9khSS5C6KfseRq1CoYiaRtTKfXILzKboqJUJdqsZLrtVjV5FC9itVo5aUfpoUoC68cYbUVRUhKuuugqBgF7U6HQ6Ve3TtGnT0BKKi4sxadIkvPnmm2ro+eyzz8Y//vEPZGZmxv2axx9/XNVqffHFF6ioqFBr9+Xm5tZZfkYWR969SH7q1Kkt8nMQEUlAKfcEUVTpRYU3qKbgslw2faSojTU4aqXCVRhBLQxPIIQKn6ZGsiRcSQG7hCeprZI6K1VrZZWAVn2GoNUMM5MVpXuAkj+wefPmYfr06fj+++9V76c+ffrA4XCgpUi7BKmvWrp0qRr5GjduHC6//PJ6i9mjPB4PTj75ZLU1FOxmz56tRtWiuBwNEbWEYCiMck8AOyt8qPJpKlTkuO0qfCQDCUBmVXhed4RMpgL1QvYIyoJBFIX0QnbJTHpNlT5q5dxt1EqOAUetKK36QAkZ/TniiCPQ0iSkLV68GKtXr8agQYPUdQ8//LBaNubee++NW3cl04zivffea/DxJTB16tSpBfaciEjv4aS3IvDB49fgtFmRl+FQ02ipIlp47ogzaiXByhsIo9In7RciMcXv+miXy75r1MpmMamQJuFKbidKmQBVVVWFu+66S9U9bd++XS0uHGvDhg1oTp9++qmaeouGJzFixAg1lbdy5UqMHj16rx5ffpY5c+age/fualma66+/XjUKjcfv96stqry8XH2UkTHZaJfo8eBxqY3HJb6gpp9ppoU0mLTkGJmJR9oPlHn8KKkMwBvQVM1QToYVZpgQiYRQz5rsDQqFQ7U+Jgv5V7RbZJMwtGv0KhSRUSv97MKSqiBCWlh1X5DRKAlO0boqaRjqsllgiWkauvuolfy+xH6kXXhs4tOM/hHubYC67LLL8P777+Oiiy5S7QxkSq8lSduEDh061LpOAk5+fr66bW9cc801OPzww9VjffLJJ2qqT6YK77///rhfIzVSs2bNqnP9ihUr4Ha792p/UpVMvVJdPC7xvbf247behYT1069ftPUuJKwVaz5s611IWDw29Zf6tGqAevvtt/F///d/OProo7E3pFBbaqn2NH3XkiZPnlzz+SGHHAK73Y4rrrhChaR4NV0SsmK/TkagpLXDsGHDUFBQ0KL7m2xkhEVCwsiRI2Gz7Ta2n8Z4XOIL+AJYtnwZTjjsaNjse3caf2u3IvAEgiit9KPUE4CmhZHhsqnRk+YiI08Snvr0PFz1fUpHoeqzBEOhMDQ5WzAURiQcQSQSxo6d36Jnt0OR43aq0T7ZEqEwv63JyJOEp2EDj4U1pgErQZ0Q11RNOpJ5eXlqxGZvScuDsWPHNnifXr16qfokmSqMpWmaOjOvuWuXhgwZoh77119/Rd++feu9jwSr+sKVvBDyxbB+PDb143GpK6LpJQHyRG9rYCo9EXs4lXkC6qy0TJejTnuA5iThKbYTeTqJd1S1UBA7dsp0oBnby+Ts8IAqVpcO7nKGo7M6UKVzSVWy/E21JutetAxp0pGUeqHbbrsNzzzzzF5NWUnnctn2ZOjQoSgtLcWaNWswcOBAdd3y5ctV7ZUEnua0du1aVVu1+5QhEdHuxdGqFUGFT7UikEqGjFbq4UR1SQNQEV3mRv59AlpI9dcqrvCp+imn3YIsp011d5dgJWcFErVqgLrvvvvwyy+/qAWFpY/S7u+gpe9Sc+rfv79qRSCtBubPn6+mPyZOnKjW3Yuegbd582YMHz4czz77LAYPHqyuk/oo2X7++Wd1+euvv1Zn3EmxuIygSXG6FKHL1JtcL5elgPzCCy9Uo2xERLuTaSNpRSDBqVJ6OFnMCdPDiWq3XJCzHZ3VL08y1SdF/dvKvIhEvKoYXbqsSxsJCVYyOsUz/qjFA9SZZ56J1vb888+r0CQhKdpI86GHHqq5XULV+vXraxWESdiKLfY+7rjj1McFCxaoqUOZhnvxxRcxc+ZMdVadLE0jASq2vomIKNrDqSwanHyaGr3IyXDwRTfJ2ixkwKa6rcu/pyegqTUH5d/QYTMj02VHpsOmWirI9B97U1GzB6gZM2agtcmIUUNNM2UkTP4oYkkwki0eOfvus88+a9b9JKLUEgiG1HImxZX+mh5O+SnWwyndyJnjUqOm6tRcUpyvj05JON5R6lG9qGRUKtulT/VJ/ZS0TiCKxWoyIqJ6SN8maX4Z7eEkL6QFWc4Wb9tCrU86wbsdsumXg6EQ/IEwtnirVFMqOZNSaquynHa94afdytEpalqACoVCeOCBB/DSSy9h06ZNNevhRcnZcUREyUZvRSBNHSU4+REIhtULJ4NTelFd0F1yMoBNnVXp16Qhqiz67Fdr/cn0ntROSQ2VhCkWo6enJv2rS12RNJo899xzUVZWpmqGzjrrLFWb1NCUGRFR4rYiCGLTzkr8UliG7aVe9aLYLlvvJ8TwlL5kqlZ1kHc7UJDtRIbDhlAogi3FHvxSWI6ftpRhw7YKNf1X5dfU2X+UHqxNLeh+4okncNppp6nAdN5556F3796qEaXUFEl3byKiRCdLslV6Ayiq1Hs4SR1lptPWoj2cKEWK0Z22mlYJlf4gSip96nr53cl221TQctv1tf2Yv1NTkwKUtAY4+OCDaxYUllEo8ac//QnTp09v3j0kImqxHk5e1SfIYjKp/kBsRUBNbpXg0lslSKDaUeZDYUQfxZQQleWyqxo6tkpILU0KUF27dlXrxUk/JRl5WrJkiTqjbfXq1XGXPyEiSogeTlUB7Kz0qh5OVhktcNkZnKhZR6ekGH1XqwSpn6pSYUu1SnDa1OiV1E+xVUIaBqjRo0fj3XffVV3AJ02apBpPPvnkk6qgXPooERElkoAWRlmVX03VeXxB2G1W5GY4ORpArdMqQa1jGIE/qKl2GDvKvapQna0S0jBA3XXXXTWfSyG5jERJF+8+ffrg9NNPb879IyJqMuntI60I5EXL6w+pF6y8LKcaDSBqTRLW9SVkardK2Oz1qFGo2FYJ8nsqgYr5Pg36QMladbIRESUCrz/aiiAAX1Dv4ZSf5Ujqs+lCIWDlajvWfbsPyorsGHJEGFx2L/lbJWTu3iqhwg+rRW+VoI9OSWd0tkpIqQC1ZcsWfPTRR9i+fbta1DcWz8IjotYmZ49XBYIordSDk7zDlxefdi5XW+/aXntnqQO3z81E4TZJTO3UdZ06hnDrtEqcNNLf1rtHzdQqQbbYdfsKS7yIwKOmAeV3Wc7uk9EpqZ/iKGqSBqinn34aV1xxBex2OwoKCmq9q5PPGaCIqHV7OAXUNJ28g5cePTIVku22IxVIeJp0fbb6OWNt225W1z/8QDlDVAq3SojEtEoorfKpfosOq1n9fkurBOmMLgGLeSpJApS0Krjtttswbdo09Y9JRNQWPZwqpIdTuU+1IhCZLinETZ15LZm2k5EnPTzVfoWMRORyBDPnZGK/3hoyMyNwuyJwuSKwptkiXak8vSmDEg6bnLGnX46u2xfbKkFClJxNGi1Gt7J4qlU06c/M4/FgzJgxDE9E1OrkbCbVw6nSi4poDydXavZw+nyNrXraLh4TdhZZcPLpBbWutVkjcLn1MBUNVdGP6vOa21Bzncupf03D949AOtUk0mhHuk1vxq7bF22V4AuEUe6phNlkVq0SZORK2iWodftsaZamW1GTjuz48ePx8ssvY+rUqc2/R0RE9ZAXigpPEDsqvKjyabBZzWo9MnlBSUXBIPD2O43rq+dwhBEMmhAO68kmqJkQLDehvLz598tk2hWunC7sMXDVXBfz+a7L1V8fc38jI0fpPr1Zu1WCTb25kOk+qQPcWe5VbyqkZirTrv+NVPmDcIQAi8Ws6q7kzUciheG0CFBz585VXccXL16sOpLbbNVji9VknTwioubs4bSzwgePX1PvqPMyHOoFIBV5PMBLr7qw4Bk3tmxtXJp4cn4ZBh8RhKzr7vGa4POa1Edv7EeP/rHWdV7AK9f76rvNpN9W/XkgYKqZOqzyyNYyP7/dHhO2qkfEnM66I2MOZwQvv+KKO70pQe+OuzIx4kR/ykznNaZVQmwxerRVwtYqr7q8obBCNY+V4KWHJ5M6489qNqs3JDaLSQ9XcrsErOqQJR9NZv1+DFzNEKDeeecd9O3bV13evYiciGhvSZ1HsdeD4go/vAFNvSjkp3APp6JiE/7zvBvPv+BCaZk+YpCfH1ahqLJSfua6P7eEhE4dwxg0MKj3EnLIFgFyIy1SZ1Q7nKHegBa9HP08NszVDWi7HkOv6YIKarKV6iuENZk83tZCi5oGHTJYr5FL11YJLrsJ27YD2Rk2mGBBBBE1WiXtEwLBCHwIIeKNIBSJ1BrNk38ReaMSO1plsZhgM1tgtZphl8Blrh7Nqt7k79NaHbjUban559r0AHXffffhqaeewtixY5t/j4goLcnUgy8YQkW5T13euK0CQUjzQSsKspwp++Zs0+9mPPW0G6+85oLfr/+MPbprGD/Og7PO8OG9D/RpKikYj4aMaHgSf59a2SojLPI9sjIjamtu8qLt90MfCfPUHT2rL6B9850F773v3ONj3/tgBsZe5MVxxwZaZN+TiVniU3WtoK0RvzNSYxWu3qKBS9Mi8EeCNYFLT1wmRGIDl6l6M+uXVZCT4GU1q1Es/TZ99Cs62hUNYcn0BqlJAUrWuzv66KObf2+IKC3ImUS+gB6YfAEN5d4AvIEQ/MEwNBlyUYXQUuO05xfIZPXtd1Y88ZRb1TlFa5cOPjCICeM9GDVi17ST1PBILc+uQmmdjDxJeEqFGh95zXQ6ZYsgr5GjZytX2RoVoNaus+O6dXZVWD/0yABGDvdj+LAA2rev3b+Q6opO9VmaELjCErgiMnIpdVlB/bawvpC3fv6oLjq6VStwVU8pqulFi0W/XQUsud2s75Ma/TJB/WdKogB17bXX4uGHH8ZDDz3U/HtERClFnjADwWhYCqHCF0ClT1MjTlpIfzKVJ0vpvCzNAhE0o1jeqNlSrzhc3qx/8pkNjz+ZgU8+3dWn6rhj/JhwqUdNNdX3YiAhSWp5Vq42Y923v+LQA3um1Kn6TSHTlnK2nRSMx47MxY7QyRSojOK9u8KBDRut+OAjh9pumx3BYYdqKkzJ1rNHqE1+hlQOXGh04NI/ypuqaOAKaiGE/Vp1CJOopT9PyB+HfA+ZFqwZuYrWcVmkhkuCV/2BK3aEq7kCV5MC1KpVq7B8+XIsWrQIBx54YJ0i8v/973/Ns3dElJxTcdWjS3LWj7QakHqmoBZWtRcynC+9a7Jc9noXT03FlzJN088YkxGnb7/Tny8tlghOO8WPy8Z50L+ftsfHkLA05IgAcgo2o1+vLrBY0vv0dDke0qpApjclLNU3vTlruj5Cd9OUKvyywYJl7zqw5F0Hvvrahi/X6tvd92Wiz36aCqgSpg46UGOhdKsGLqjJv8a2IZGpxEjstGIkAm9APoZqRr5qApcanaoe2YoWw5tkSnFX4Kqo0EsGmqJJf4G5ubk466yzmvxNiSg1yJITEpQkIHlkKs4TUOFJzpyTJZ6k3kJfhsIKm1vO4EmvVyYptP7f6y48+Ywbv/+uvyWXM8j+erYX4y72oOs+nEbaG0amN3v3CqF3Lw+umOBB4TazGpVa+q5DTQX+9LNVbf96PAOdOoUw8kQ/Rgz344iBQew2PkBtzKKq0hv/PBIdxYoGLglfvkAEIYTUbcVFVa0XoDRNw7BhwzBq1Ch06tSpyd+YiJKLPAFJUJKAJGfFyciShCY1FaeF9Z40Nn10SZr4pWqbgcYoKTXhuYUu/GehGyUl+jvrvLwwLjrfgwvP9za6zof2rCnTmxKwLhjjVVt5uQnvfWBXYeqDD+0oLLSofzfZcrLDGHaCPH4Axx7th9vdmj8ZNQdV1N5A4Kosb/pIruGvtFqtuPLKK/H99983+ZsSUWLT198K1xR5V/mCqPRr8MvoUiikno6kn4zDqi8hkYpdwJti8xYznnrGjZdfdakzxUTXriGMv8SDs0d7VeNIan57M72ZnR3Bn//kV5ucCfjJZ3qYkhGq4mIzXn/DpTZpD3HMUQE1MnXiCX7k5zEEp7smRa/Bgwfjyy+/RI8ePZp/j4ioTbp8R0eXPIGgWirFF9QQCOp1S3LqsYwuuZ1W5FjsaTcVtyc/rLfgiacy8H9vOxAK6cfmgP5BVRh+8ih/2q1Nl6ykj9aw4wNqC4Uq8MVaW03d1B9/WFSoks1sjqgi9mjdFKdi01OT/qyvuuoqTJkyBX/88QcGDhyIjIyMWrcfcsghzbV/RNTMpA5AhaWgBq9fQ4VPU1Nycp3cphoyVo8uZTntaT0V1xA5OWjVajmjzq3O7Io6amgAEy6twtFD6z+jjpJnVEtqoGSbemMl1v9oUSNTy5Y78N33NqxabVfbnfOy0L9fsOaMvr77h/jvniaaFKBkIWFxzTXX1Fwn70hl2F8+hqRlLZHML/v0DsA7yn1wOkKqM23NEgHVnWr1okBqCfI36ZepuICmpuPk36PSK3VLGoIhfQpCapak0Ds3I3XXlWtO8vQmL6RPPOnGV9/oFcYyInHKSfoZdXIWF6UWCUT9+obQr68Hk67y4I/NZhWkZHRq9Robvv9B3x56NFNN2UoR+sgRfhx+WDCtW02kuiYFqI0bNzb/nlDKkeLi33ZUqM83FJbBVF2XEF1fqWaJAHN0QUx5Idc71UabpMkLuiwLEO1UK7U2ydSptrXpK7Pro0uybpw0qJTLcr2MmEi/FDnWmU6pW9JP6aXGkfqY1/6fE08+7cavv+m/y1IX85fRXoy7xIse3fnGMV3IlJ10N5etuMSEFe/pI1MffmxXU30LnnWrTfpQSb2UjEwdPTSgpggpzQMUa5+oMQpLPaqWRrTLcanCzmjjNNU0rbpTrZwKL9NH0dNMo0sDYLdOtRKqootf2mVpAGm+aJWwpY9iqeAVHdmqvi7ZlgYwQo6VBCUVmFQ376A+FSctBEIRddwkkMoactlWBs+mkrO0Fv7XhWf+48LOIn04Qc7OkrPp5Ky6ggIWE6czKSY/e7RPbbIQ9Eef2NXI1PL39CL0V/7nUpvbFcaxxwTUyNSw4wKqeJ2SW5NLG3/55Rc8+OCDNWfjHXDAAapDee/evZtz/yhJychHYYkH2W4bSuppnGYxN25cO7ZhWnQtJmnIKKFBPo+uxbSrbZq+2OWuLrX6cgDR0S1pnCajWDVTiTEjXXoIS8xRGTUVF6ebt0zFyR7bY7p5c1p0720tNOOZ/7jx4ktOVHn0qc3OnUK4dKwHfz3Lh4wMvgBSbdLmYNSIgNqCwQq1kPHS6qk+Wdj4naVOtVmtEdV1Xqb6hp/oV20VKE0C1DvvvIM///nPOOyww2rWxPv4449VV/I333wTI0eObO79pCQio0tbiqoQCgNOl7V5lgZoZGlOfYFL1leTkRkVuKSR2q57q4AV26VWLtussYHLsmsqUYJZ9ahW7GhXSwSu6MK66qw4f1CNLu3q5i3rUun7J0XeMhJHzeenny349wI33lzkRFDT/2377q+pwvBTT/azsSI1ivyeDD0yqLbp0yrV2odSOyebNO38+BO72mbenoVDDtaL0OWsvv16cyo4WTTp1W3q1Km4/vrrcdddd9W5/uabb2aASnM7y33YWelHuywHEGndd1ZGA9euRS93hS5fIIyq6DpMewhc0WAVLcTeNcJVe2RLha7qkbHdA1fswrrSmLJCunmr7t6yNpQ+FeeonorLScNu3q1FRgtkqRWZeokafEQAl4/34LhjAjyzippMfnfk5ALZrr+mCr/+Vn1G37t2fLnOppaWke2+BzPRa199WRnpN3XowZp6vqEUClAybffSSy/Vuf7SSy9V03qUvmSkZ3NxFdx2qwoPIRmGSmBqBXB9MaZGqVl/KSZ0xQYuVcNVTS04UL0OUzRMyfezW2X9cP24/LKtDN4g6u3mzam4lierwy9/z67OqPtirb1mHTUZDZgw3oPDDuEZddT8ZPFi6REm244dsqyMXRWhSxNPWfT48Sdly0CH9iE1xTfyxACGDA7Avmv9aUrWANW+fXusXbsWffr0qXW9XNehQ4fm2jdKMjJ9trW4SoWoDjmpueZBTeBq5KnJ0TqtaLiS0OXxhxEM6sX1pZUBOB32uAvrUsvwB4A3FjnVVN2GDfrToM0WwVln+DB+nAf79uQ0CrWO9u3DGHOOT20VlSa1nIyMTsnyMtt3WPDCf91qy8wM44TjAircH3dsAJmswUvOADVhwgRcfvnl2LBhA4466qiaGqh58+Zh8uTJzb2PlCRKqvzYXuZDbgbP1a2zDtNugSsUMmObLMyd6TC07ATtHXmBevEl/Yy6bdv1f5SsrDDOP9eLiy/0okP7xB4xpdSWlRnBaaf41SYhf+VKuypCf3e5HTt2WrDoLafaJOxLw1ZVhD7Mj3btIg32LVu52o513+6DsiL7HtcJpMZr0jP39OnTkZWVhfvuuw/Tpk1T13Xp0gUzZ86s1VyT0odMQcnUnYzOSC0QUSLZvkPOqHOpdgSVlfpIX8eOIYy72INz/uJTL1xEicRhhxppkm3WdGDdV7uK0KUP2fsfONQ2fVZENeyU9giy6HFsP7J3ljpw+9xMFG6T5+R26rpOHUO4dVqlWoSZWilAvfHGGzjllFNgs9lUnYYUkctWUaE3SpRARelrW5kX5VVBtMtxtvWuENXYsNGCJxe48dobTgSDej1Z714aLrvUgz+f5mNNCSUFqaMccJimthsnV+HnXyyqZmrpMge+/taGNV/a1XbXPcD+ffQidBlZvfu+TL2tXoxt282YdH02Hn6gnCGqtQLU6NGjUVhYqOqfLBYLtm7dquqdGJyowhtEYbEHWW4bmzVSQlj7lRThulX/nUhE/50cOEDWqPNg2AkBntlESUueYvvsF0Kf/Tz42+UebN0qRej6yNTK1Tb8+JNVbbraTYnVNRE5EziCO+7KVEGL03mtEKAkOH322Wc4/fTTa9a8I5LC6C0lVQiGw8i1s/aJ2o68037/Q7sKTqs/3zW0JDUiEpwGHq4X7lNyia5eEO3xFm09Ut1DFyEJCdJgN6SfMSlLGLmdev+2dNC5s94VX7ayMpMqPpc6v8+/kL+B+o+BhCi9sadDreHIl/MWDlBXXnklzjjjDBWcZOvUqVPc+3Ix4fRRVOHDzjIv8rM4dUdtQ05o/L+3naqHU/Sdt80awZ9P92H8WI96t05tL9r6Q/5TDW0l90Q/yrXVSzvJfWKpdiCqye2uPm/ygi+rCMjKAdJ3TTZTJIRt2/QTN8o8fvW4LrsFTptVXZcOcnIiOON0vxph1QNUw66dkoPbZoXRt6+Gvn001TBWPu/TO8RO+80ZoKRAfMyYMfj5559VF/IFCxYgNze3sV9OKUjWX5PCcZfDqp7AiFpTVZUJL73qxIJn3OrdtMjIkFPCvbjkQi86d+IZdS0ZgqJrWaqRoXDDIajmjNTdQ1B1g1m1SLjZDLvq9m+uvl1viKt/XfVWvai4frn24wc1fQRqv07Z8GsRVVpQ5gmgtEpGWExw2i1qqaN0KDNo7NmkZnMEZeVmrFptV1uUTPF16xpSYarf/iE9WO2vqes45dfEs/D69euHvn374pJLLsHZZ5+NzMxMI19OqdbzqaQKVT7p+cTRJ2oeMni96jMb1q3eB2U7HBgyJFLnCbuoyIRnn3fj+Rdc6slftCsIYexFXpx3rpeLtO5tCIp+Xl8Iqm4MGxuC5HOHvXp5I4ssfxRtGouazv3RJY/kayQA6eFJv09zk++T5bKp3modct3w+IIol60qgJJKv9oXFaaslpQtRRk0MKjOtpOC8WgNYCwJSLL+3uJFRfj1V71m6ocfLVi/3or1P1pVy4RNv1vVtnTZrq9zuSLos5+EKg3777/rY15uev7NGW5jIH9czz//PG655ZY6jTQpfZRWBVBY5kNepj1ln4Sodb3zlhV3THehcKuEokF1Trn+bZMFTz3twquvu+D3679zPXtouGycB2f+2QdHGpXgqaATCjccgqqnyGLFdsdXH7Hrc4etemkiqxmyvKLVIqM11WGpevRIRofko9wvGoL0x0RCkim+bLddbcEcl3rDJwudl3sCqPIG1c8qSySlWusVedMhfzdytp2EpdgQJZfF36dWwu0CDuivqS1WUbEJP/4ooUoPVLLJ+n1er6lm2ZlY0hIkdgpQgpU0o031s1wNByj5A5LgVFRU1KoBqri4GJMmTVKLFcs+yAjYP/7xj7ijYHL/GTNmYMmSJdi0aZMqgj/zzDMxZ84c5OTk1NxPbvvb3/6GFStWqMeS0bW5c+fCamVzw3iCobCaupN3k6n2xENtF56umeCu95Triddlqz43a7+yIRzWXwhk8VVZoy6dziLSQmFU+PRCeGkZYrWGIYsCxY4K2atDkD4KpE+N1Yz+RKfQZFmh6AhSdEFs+S9BQ1BzkU7/uRl2tQW0MCp9+hSffKzwBGGzSZiS9SxT4xdK3nRIq4JdfaB0MvIk4amhFgYF+ZGahZBjR4flTYyEqR/WR0etrPjjDwu2bdO3Dz7a9S7Gao2gdy99+k9aK/STOqv9NXTsIEtWISU0KSXIIsI33ngj/vWvf+Gggw5Ca7jgggtU64SlS5eqZTDGjRunuqEvXLiw3vtv2bJFbffeey8OOOAA/Pbbb6oQXq575ZVXaordTzvtNFUQ/8knn6jHv/jii1WvqzvvvLNVfq5ktL3Uo+oK2me79jwd86kV6z7bB2UBKwYP1d8ZEe3+eyIjT3p4qnvKtYiuU3f8sfoadYMHBVPmSXhPgqEQKn2aOiPd7dD/gHp1zoLDoq+VmE4hqLnIWpP5mQ61+YMhNTIlKylU+YMIagE4bFYVppK9tlNCkrzJWLnajHXf/opDD+zZ5E7k8jW99g2pTc7ci+3u/9NPFqz/yVozBSjBShrWRkevYuXmhNW0n4SpftUfZVrQnYSrf5kiMtZrUF5eHjweDzRNg91uh8vlqjP605xk8WIJQatXr8agQfrQ/uLFi3Hqqafijz/+UF3QG+Pll1/GhRdeiKqqKjXC9Pbbb+NPf/qTClUdO3ZU95k/fz5uvvlm7NixQ/1sjVFeXq5GtXbu3ImCggKkMnm39v0fJWrkye2wNnI6Rtepcxh/n+PFSaem9wKterAE1n32Ew49sk9aBEv5mT1VeuF3VaUJHo9Jv1xpwldfWvDPf+y5ju7O2WX469np0/jPF9Tg8WnqbLNslx0FWQ44rGYsW70CIwcPg42j5HWKyJeuavqxkVdCOTFGFgaXMCVrega1CJw2C1wOaYuQvGEqFNLww4bV6NfriFZZOioSgepPFTsFKNvGXy0IheqvyereTR+tkpGq/fuEVLjq1i3U4j3bft+6DSeOOARlZWXIzs429LVNOpIPPvggWtOnn36qzviLhicxYsQINZW3cuVK1eSzMaIHKDo9J4978MEH14QncdJJJ6kpvW+//RYDBgxogZ8mecmpxzJ1F9TCDa53F3c6ptCkrn/oCU/ahqh663wSLFiqFxIvVMiRgFNVHXRiQ48KQlW7Lnvkc3V/1LpNrpfbfb69HxpxpsG5CvJ+Vl64pZeR3WZB+1yXmnLKsMsKELvONKPmJ8dXziiWrSDLCU9AQ6U3gFKP1EwFVVG9hCmn3Zo2Pab25lh26RJGly4BnHhCoOZ6vx/4eYM+UhWdAly/3oKdRRb8tsmqtiUxRetuVxh9+lTXV1VPAcrnuQlStN6kACV1Qq1JOqBL1/NYEoLy8/PVbY0ho0NS/yTTfrGPGxueRPRyQ4/r9/vVFjsCJWRqUbZUtaPch+0llcjLdKh3NPGnY6LLB8TpgHubE8NGlKb8qMvulrxtx3VXxA+WDz5WiVGn7HqyaSz5ldsVcHYLNrWu2zX6U+u2WtfpoSdaa9TcLJYIMjIjqseMO0P/GNRM+P6bPT8VFRQE4/7eJTt5cfb6Q2o6Sc4Q65zrQLbboU67F1r1z737R9qluY+N3QLkZ9qRl2GHJxBElS+I0qogSio9ajo1mdoihMKhWh/bitUKNbLUb//a1xcVmVWgWv+TDet/lM2Kn3+xweM1Y91Xsu1WtN5BRquCapP6qv37BLHvvvKmw9j+yOvVF1/Ymv7zNPULf/nlF9ULSj5KMbcEHJkS6969Ow488MBGPcbUqVMxb968PU7f7S0JOFLrJNOA0s9qb0mR+axZs+pcL4Xo7mScyDWooQnar78uQOHWY+LeLiGqcIsFxw5ww+mU4dlIrU1vkFf7utjb6rs+ev94t9fejN9HQl993y96v91vj71fdJ/kGff2249soM4nglun2PHDlz/D75ezXazw+fRN/9xSc518jL1O01ouiTqdGlwurdZH+XeLvS66yfX13V+/Xb/NZqtbQCpPYpdfPgpFRTLEVN+LUQTt2nmRmfsxftiAtLBpD7evWPNhK+1J8uGxie+nX79AosrrABwp29H6ZZnq27o1A7/+mo3fftM3+Xz79gxs225R2wcf7RqWlpMqunatQI8e5Wrr2VPf8vJ89dYGfvppZ/z73wejqCizdQPU+++/rxYWPvroo/HBBx/gjjvuUAFq3bp1ePLJJ2uKtPdkypQpGDt2bIP36dWrlyry3r59e63rpf5Kaq0a6oguZLHjk08+Wa3Z99prr6kC8Sj52lWrVtW6/zZpZVt9WzzTpk3D5MmTawW0bt26YdiwYSlZAyXTCpt2VmJLsQftc5wNti34+ffG1Y2VlDRcgJ5+TKiocOBf/2r6tLHNXntkx13rczRwW8zHzAjcbv2yy60vYrr7fupPG81bR3HbXA3XXSGf1X/K9W23+HBgnyOQKmTpI6lvkpGnTJcN+RlOZLtse+yYLaMrEhCGDTwW1laoZUkmrX1spKRBis4r1Jl8GrzBkGqbICNTUqQuRf2JQkaeJDz16Xk4LObkGfo/UE70Py56SZ4LylBRUY6fftbX/FOjVdUfq6rM+PXXHLXFys0NqVGqvn1kxEqmAYNqqvDuu3Or39A2fdaoSb9lMnJ0++23qxARu5jwiSeeiEceeaTRjyOtBWTbk6FDh6K0tBRr1qzBwIED1XXLly9HOBzGkCFD4n6dBBupaXI4HHjjjTfg3K2IQh5Xwp+Es+gUoZzlJ3VSMloVjzyebLuTcBYb0FKFnHG3szKI/Gw3rHtoW9Cxc+OeNGbc6UW/A0Kqb42MQMgm00bqo/pc/uj1z+WdSPR++mW19BXCoer717pNv2/NdTW37XbfmvuY9K/b7fHlRbzmcuxtYf3+6utr3WaK2V8gUrP/+uNIHVF52Z6rIQ84SMO+vcPIyERNsKkJQNFwo6bAdl2W2+S+e99zRXX0QVs45U9hmJ/wVNeHmeo55VqmZZI/MPiDmnqxlRqavEyXOgss02U33FBSAgKLyNv22MgzvdNuQ0GWW7V2kTP5pC1ChdRNVWmwWS3qTL5EavUi4ak1ishbkiyAcsSgMI4YJOUOesmDBKEtUrS+vm7RemmpBatWyxb7mh2to9i757smHcmvv/663vYBEkKk1qi59e/fX40iTZgwQZ0lJ3VGEydOVEvLRM/A27x5M4YPH45nn30WgwcPVuFp1KhR6mzB5557Tl2O1ipJaLNYLOp2CUoXXXQR7r77blX3dOutt+Lqq6+uNyClI+k9I4XjMugUrcdoyKAhIRS0D6NoR/0vxqoDbucIxlwUSKsaqJWfWHDRX/Y8VDxtpg9DjkrPtdukiH7ESRVY9VEE61b/jEP796i3E3nyFoaHYLeZ0T7bqeoIMxx6YTilWI+pYAiVfk01G9ZHqFKvx1SiMZmAfbqEsU+XAIYP21VH6vMBv2yo3bfqm2+tKK9ewWBvNSlAyRlx0jNp3333rXX9l19+iX322QctQbqfS2iSkBRtpPnQQw/V3C6hav369SowiS+++EKdoSf222+/Wo+1ceNG9OzZU4WoRYsWqbPuZDQqIyNDFcjPnj27RX6GZLS9zKuWP9hTz6fYX2RZSqNoh7k65dedjrlltjfpXxSNkmApZ9tJwXjcpRU6R9T90pn8Xgw+Mogc12b069kJFos9yQvDNRWepNv1PgVu5Ljt6nNKXXL2ZL7NgryM6h5Tfik+lx5TWkr1mEoGMul04AGa2qLe/D8HJt9Ue5qvqZr0lywjP9IrSfoqST2MTKV9/PHHuOGGG1QjypYgZ9zFa5opJBDFtrQ64YQTal2Op0ePHnjrrbeabT9TiZx1sqWkCpnOPddmRL3+sg0bf7HAbo8gJzeCHdtjpmM6R1R4SpTT9Vs7GEirAjnbLt7SCukYLFNRKBxWL5YyEpHhtKF7u0zkZDhUXQylD3kzKfVQsuVnOlWPKemjJ20R5KP8njhtVnV7MveYStWFllssQEmXbhkNkjPupJhbpsGkq/f555+vpsAo+cm7Z5m6CwTDyMlp3HRmWSlw9+16ndm1N/pw6ZWBtGsY2RAJjg/VV+eTxsEy1aa75U1HSArDnTbsk5+h1mCTwmJKb7E9ptplu+rtMSWjUjI6xR5TbbvQcosFKBlpuueee1RBdiAQULVDMpVWWVmpmk5yceHUUVThw85yH3IzG18L9sA8J4qLzNhv/xAumaDXOA0eqiHHvhl9B+6b9MWLzVrnw2CZMgJaCJXeoFpSJdoxPKsJheGUPmEqw2FVW/scNzxSJ6UKzwMo8/hrekxJw85k6DGV/AstN/2xDL2iyRlr0kdJuoDL8i0ypSbTZE899VTT94ASjszby+iTFI1LcWRjfLXWgheetdecYZfqq3DvDQbL5CfPe75gSL34SWFwOxaGUxNIyJbRStk6VIepcm8Q5Z4ASip9qt5XdT+3WRpsH0N7s9AymszQM7ec4fbPf/4TV1yhGrZg2bJlqkHlv//9b/UPTalha4lHvaPukNO4wnE5TX/mNKcaDv3z2YG0PYuMUl9sYbjUr3TKdatiYZmaIdobMnUnI5eyyXOvLOcjQarCG0RRpR+2mh5TDFPNudDyW0vKMPmGpj2Gob/6TZs2qQV8o2QkSv4hZTHerl27Nm0PKKHI2SLbyjxqrbvG/pH+9zk7vllnRWZWBDdP97X4PhK1tmjTRCkMl0W0u7XLRK7brs64ImpuMvIvZ2zKFtD0HlPlXr8anZICdKsl8XpMJetswGGHNf01y1CAkoLx3ZtRSuPIVF7/Ld2KYLcUV6k5+Mb0fBJFO024/y79d+K6m3xo3yExFnkkarbCcL+swRdBhsuKffIykOW2NXpqm2hvydmbdlmTL7O6x5RPU8XncsKCjFBJiGePqbZhNTrvL0uvxDaZ9Pl8uPLKK1UPpaj//e9/zbuX1Cp2lHlRXOlXZ4k01j23O1FeZsIBB4Vw/iXGF8IlStTCcHnXL2SJlfwsJ7LdNhb1UmL0mMqs22NKCwXgsOptEdhjKgEDlDSZ3N2FF17YnPtDbUT+ELeUeFTfmsaeRvv5Sgv+95JeLT5zrlettE2UzKS2yesPwmIxIz/LoZZaYWE4JXqPKfm9lRGpEo+fPaZakaGXvAULFrTcnlCbFsbKQsFyVlFjC8c1DZh1i37fv54fwGEDWThOyfv7L00OpWhXpq475rpVDaDUOhElQ5iS31XZZPagKhBElTdY02NKZo7Y/L5l8LCSWqplR7lXnU3UWP95yo7131uQmxfGDbewcJySszBcQlNA05da6VqQoYJTY+v/iBIxTGU6bGprH3MmX0mlV90urREcdvkdN6s6Pp7Nt3cYoNKc1HpIzyf5Y7I1cqkJ6aL90D164biEp7x8Fo5TshWGS82IvtRKl7xsFoZTypF6vWiPqYJMO377A+ic54YnKItbh1R7BLmPhCk5m491U8YxQKU56fkk71Dk3Upj3TXLiaoqEw4bqOEv5/EMTEoOwZB0DNcLw7NcNhRkZbIwnNJCdC1T6XxutVjVG2cJUVL7KtN8qm4qFIbVaqkJVPy72DMGqDRW5gmgsNSjeo009o/l4w8seOsNO8zmCGbe6QXrEynR+YJSYKvBajGps5fyM+3IdNpZGE5pSX7vZZpattwMOzrn6SNSUgcoo1Iy7Vfq96slThwSqOwy3cdp7fowQKUpOUtDej7JH4msudQYAT8wu7pw/IKxARxwcPOtap0KhchSrBkO65+rLSwf9fYfsZfV5yGtpnWExWqF1WxWZz/KMLr+0cSzZ/aCHHN1Rl1Ag80qheEuvTDcbmVwIoohb56ja/MVZDkRDIVrTqqQxp1efwjloYBabcRhNavgxecmHQNUmtpe5lULBhvp+fTkYw5s3GBBu/Zh1TQzlejhRt8kVMpHKTLWw091QIoJQLHk9ViKMeU5xQwTTPLRZFLXSRCS2ho9GJmrL1tUgCr+GejVMRsRk1m90PuDYfXkFQiE1PeWfdK/VQRmeQyzHqr0cGVSoSs6NE86OWbyxC+jTnJW0j75LAwnMkLVw9YsKaOvjSojVJX+ICq9+pl98gbcLqNYVqmdTd/pPgaoNCQvMFsN9nz643cT/vWgfpbe1Bk+ZGUjYURiQs/uIz01AWi3gCTkbz76uTkmAEkokU3+s1hMsEv4seiBxWqVIGNWi4BKmFFfo77WBEv0o3x99efxnlii3fulbYR084+S4CRFzrJJmNJC+mW/JkPsIfVRXQ5EoFX/TBKwovsaDVUqbFWHrnQ400YVhgc0aFoYGU4reuZlIdttZ2E4UTP1m5JO6KGwG96gpp6Lyj0y3afXT8nzT3RaMJ2K0Rmg0owEii0lVWpB1A657kZ/3R3TXfD5TBhylIbTRzdv4XitUZ7dp7qqQ0JNQJIbEJHVZqrHfqpHgFRYqQ4z1eFF/pMzC/XRn+p3VtUjQdFwEw1A0TAUDUF7CkAtRcKXxaw/EcU7VlpNyIrUBK2gJuEqDH9QRrIiav0sLaCpJUj046X/LHrIih3J0qcMk7owXDqGR6Qw3Ir8fL0wPJl/JqJEJX9X0TYJ7bKc6nlGTfcFNFVTK0XpmhZRzy0yQiXF6Kn8t8gAlWZkqRaZvsvNbHzPp+VLrHh3iQ1WawQz7vQaqiGRF/Zo/xGYrWoaTL3aVQ//yCUVdWICkPzBqSkxk/wRyuiPBB+9Pij64l/fSI8KQDWjQ/p1qTb6Ij+PHIuGRlYkaO4+gqWmBiVkBUOqYapc7/HHTBXWCnAxtVgSsCytHyT3RKboPD5N7Zss6luQ5UCGU06GaOs9I0qzdfqsdjXa2zHHrf4u1XSfL1izVl84ElbTfE6bJeV6TzFApWnPp8au4u31ALdP1+ukxl0ewH77N75wXF6YSyr19fEKMp1wOPQplWjgscR+rBWGUjcAtQYVPK0Nr9QuNQzB6nAVG7ZkBMsXlJEsfapQ3l2GqgvjhfxrRKcKY4NWNPS2RmG4BD8J1u1VYbgdGXYutULU1kwmqIa0sskSSDJSHi1Gr/AG4KnuPaXOArSmxnQfA1QakZYFRns+PfaIA3/8bkanzmFcdb2xwnEZ0pUXuBIA+3bMrlXrQ21Ln7pTFaN7nCrUw5UesGREUUawZFPD90EJWHK/aMCKwGTeNVIYe0ZhU4fyo4XhEuqkFmOfAjdy3Q71ORElJqt5VyPP9tku9dwhIUrqpipkhEqm+0LV030SqKQYPcmGkBmg0oQEp8ISjxpqbex0zMZfzHjin/pU39/neJGRYWy0S174OuVlYGNTd5oSYqowXtzeveBdLstHCTrRRn36cikhveA9HFbTtRK1okXu+jThrs9jfzfla6u8ARXU5BTrbrkZyHE71LQBESUPkwl6TVR17ymp5JDRKSlGr/AFVJ+2Uo9flXXYrNakWWqGASqNej7JIIEMrzaGzNjM/rsTwYAJxw4LYtQpet+ixiqtCui9d9z2Ju41Jbo9FbyLXQGr9miWjGDFFryHYqYKw9VnKMpZPjnZLtWKQIK/BC0iSn7mmAWQ87MctXpPyeiUvPmSHlSWBF9qhgEqDewo82FnpR/tshpfOL54kRUff2CD3RHBbbf7DNWY6Kvam9W6Swn+BoJamCr8t5ihr5wYv+A9ttjdW+VD8QagZ4dM5OVksjCcKJ16T0WQNEvNMEClOCm6lbYF0oG5sd1jKyuBO2fqEzeXX+1Hj30NFI5HIqj0BtWLX4bDVtPviKjhgvdd1wWs+hOjPJkyPBGlF1MSLTXDAJXqPZ+Kq1SI6pDT+J5Pj9znxLatZnTrEVIBymitlfThMVKoTkRE1OSlZrSAWq2htZeaYYBKYSVV0vPJp5ayaKwffzDjmX/rdUvTb/fBaSAHqYaOobBanqSxbRKIiIiabakZb1CdsKKm+6wtO93HAJXiPZ/UUiSNDDMyJDpzmguhkAkjTwnihOEGC8elzirbqVa8JyIias2lZsIRt+qKHl1qRlbcqPJJ+x2Tmu6T+zVnMToDVIraVuZFeVUQ7XLile/W9forNny+0gqXK4K/z/Ia+n76qvdmdMnPSIjiPiIiSi9m056XmglqYRWiZKpvb2dKGKBSkBTaFRZ7kOW2NTrMlJUCd8/Rw9bV1/vQpWt0cY/GFY5L7VOPDlmqaRoREVGiLjUjy8zImX3yuuUPGJtpicUAlWKk+aBM3QXDYeTaGz+V9sA8J4p2mtG7TwhjL9eXXzES2GQ+uiMLx4mIKAmWmpHXSpk5ccPYbEusxOtMRXulqMKHonIv8gwUjn/zlRkvPKsXjstiwXYDvS+ld08gGEKXfDcLx4mIKGkaAcuMSX5W48tcdscAlUJkrldGn1wOa6ML5UIhYMZUFyIRE04fHcCRR4cMdxwvyHYiP7Ppv4RERETJhgEqhXo+bS2pUnO7RuqQXl5ox9drrcjMiuDm24wtFizDn5LTpON4UxeKJSIiSkYMUClCRoIKy3yqhUBjF2AsLjLhvrn6VN+1N/rQoWPEUGCTAryOuW5ku7jeHRERpRcGqBQgzStVz6fqZTEa6947nCgrNaPfASFcMNZ44XiG04ZOuY3vcE5ERJQqGKBSwLZSD0qr/MhxN34kaM0qC155Ub//rLu8sBo4HzMUDsMXDGGf/AzVS4OIiCjdMEAlOellsbXEoy+82sg6JE0DZt6itxz4y5gABgwyXjgup4EWZLHjOBERpScGqFTo+aSF4XY0fgjp+aftWP+dBbl5Ydzwd2OF47LukJRYScfx1lqwkYiIKNHwFTDJez7tlJ5PBtae21ZowoN36y0HJk/zIb/AWOG4jD51yHEh28WO40RElL4YoJKU1CDJ6JPT1vieT2LebCeqKk04ZICGc84PGvqelT4NbocFnXPdjT7Tj4iIKBUxQCV5z6csAyNBn3xowaLXpVYqgplzvTAyA6fa3vuDqueT084VgIiIKL0xQCUhWVV6W6kXuRmN7/kUCACz/64Xjp93cQAHHRI2+D39aqqwfTbXuyMiImKAStKeT5KbjLQQWPCYAxt+tqCgXRjX32yscDyghRCO6IXjRqYLiYiIUlXSvBoWFxfjggsuQHZ2NnJzczF+/HhUVlY2eP9Jkyahb9++cLlc6N69O6655hqUlZXVup+M4Oy+vfjii0hU20s9KKn0I9fd+MLxzX+Y8OgD+v1luZbsHGPfU3pMdcx2GuozRURElMqSpphFwtPWrVuxdOlSBINBjBs3DpdffjkWLlxY7/23bNmitnvvvRcHHHAAfvvtN1x55ZXquldeeaXWfRcsWICTTz655rIEtEQkNU9bSjxqrbvG9nwSd9zmgs9nwhFHajjjbKOF40FVqN4pL4OF40RERMkUoL7//nssXrwYq1evxqBBg9R1Dz/8ME499VQVkLp06VLnaw466CC8+uqrNZd79+6NO+64AxdeeCE0TYM1pvW2BKZOnTohkYUju3o+Se1TY61YZsWyxTZYrRHMuNOrpv4a/T3DERXaenXMNtRnioiIKNUlxavip59+qkJONDyJESNGwGw2Y+XKlRg9enSjHkem72QKMDY8iauvvhqXXXYZevXqpUapZHSrodEWv9+vtqjy8nL1UUbGZGsJOyt82F5SiZxMB0IhrVFf4/MCc27NVJ9fPN6H3n0CCIWMTd1lO63IdVub/HNFv66ljkuy4nGJLyit8qVjfkiDSUuaKoNWIcck9iPtwmMTH49NfJqRF8VkDFCFhYXo0KFDreskBOXn56vbGmPnzp2YM2eOmvaLNXv2bJx44olwu91YsmQJrrrqKlVbJfVS8cydOxezZs2qc/2KFSvU47SkIgP3feGFvvhjUwEKCrwYecK7WL+mab8oG7/CXpOpV6qLxyW+99Z+3Na7kLBWrPmwrXchYfHYxMdjU5fH40FSBqipU6di3rx5e5y+21syQnTaaaepWqiZM2fWum369Ok1nw8YMABVVVW45557GgxQ06ZNw+TJk2s9frdu3TBs2DAUFBSguW3aUamm79rnOBtdh/TrRjNee12v5Zp+h4bDjhlq6HvuKPOifY4LvTpk7VXtk4ywSEgYOXIkbDZ2L4/icYkv4Atg2fJlOOGwo2Gz88SFWDKCIC+CwwYeC6slKd7/thoem/h4bOIrKjIyLFFbmx7JKVOmYOzYsQ3eR6bVpD5p+/btta6XOiY5025PtUsVFRWqQDwrKwuvvfbaHl+shgwZokaqZIrO4ai/1kiur+82eezmfjGUabSdsnhvthtWa+PaFkQiwJ23uRHwm3DM8UGccnoYJlPj/6k9fg0upx3d2mXDbm+en6cljk0q4HGpK6LpPcrkid6223Q76Xhs4uOxiY/Hpi6rxdL0r0Ubat++vdr2ZOjQoSgtLcWaNWswcOBAdd3y5csRDodV4IlHRoZOOukkFXbeeOMNOJ36GnANWbt2LfLy8uKGp9akhcLYUlwFRIz1fFrylhUfvmeDzR7BbXf4jBWORyKo9AbRs0MmMpx8YSciIqpPUkTR/v37q1GkCRMmYP78+Wr6Y+LEiRgzZkzNGXibN2/G8OHD8eyzz2Lw4MEqPI0aNUrNbz733HPqcrTYW0KbxWLBm2++iW3btuHII49U4UqmVO68807ccMMNSAQyjVZc6Uc7A92/q6qA22/T73/5VX707GWs43i5J4Ast01N3xEREVESByjx/PPPq9AkIUnOvjv77LPx0EMP1dwuoWr9+vU1BWFffPGFOkNP7LfffrUea+PGjejZs6eaOnn00Udx/fXXq/Xl5H7333+/Cmptrcqv93ySUSCLgZ5Pj97vxLatZnTtHsYVk3adKdjYLueySdsCeyOnC4mIiNJR0gQoOeMuXtNMIYFIQlDUCSecUOtyfWRUK7aBZqKQabQtxR74giF0MDAS9NN6M55+Qi+6nT7HC6fBQaRSNdrlVGveERERUXxsspKAiiv82FHuRZ6BhpmSFWdOc0HTTBh+UhDDRhrr9+ENaLBZzeiclwEzO44TERE1iAEqwfiDIdWyQKbQJNA01hv/s2H1Z1Y4nRHcOsdr6HvKSJ3UPnXMdSHLxcJxIiKiPWGASjCFpR5UeAPINhBkysuAebP1Mwyvut6Pfbo2PHW5uwpvEFkuOzrltmwTUCIiolTBAJVAyjwBFaBy3HZDzSsfvNuJnTvM2Ld3CJde4TfcKsEX1NAl383CcSIiokZigEoQoXBYTd1JLZPT3vja/m+/MmPhM3rhuCwWbLRxc2lVAAVZLuRn7rlHFhEREekYoBLE9jIvSip8yDVQOB4OAzNvcSEcNuG0MwI46lhja935AhosZqBLnttQqwQiIqJ0xwCVAGTplK0lHrgN9nx6+QUb1n1hRUZmBFNn+IwXjnulcNyNbDfXGyMiIjKCASoRej6VVMHr15BpYOmU4iIT7rtTn3a75gYfOnYyVjhe6QvC7bCpAEVERETGMEC1sZJKv5q+yzNYg3TvnU6UlpjR94AQLro0YLjeyhMIYZ/8DDgNrLFHREREOgaoNhTQ9J5PNovZUM+nLz+34JUX9Gm3mXd6YXRxbVU4nulAQRY7jhMRETUFA1QbkpYF0sDSSA2Spukdx8VZ5wQwcHDIcKNO6ZDQWRWO85+fiIioKfgK2kYkOBWWeFR4MrJ0ysKn7fj+WwtycsO4abrxwvHSKr9aX096TREREVHTMEC1Yc+nUARwGej5tH2bCQ/eo9dKTZ7qR36BscLxKr8Gt8OqOo4badRJREREtTFAtYEdZT4UVfqRl2FsFOjuOU5UVphw8GEazrnAaOF4BB5fUE3dGQltREREVBcDVCvzBjTVtsBttxqqQfrsYwve+J8s8RLBzLk+WAyePFfm8SM304H22Xr9FBERETUdA1QrkhqkLcVVKkRlGVgsOBAAZt2iB5/zLg7g4ENDhs/2C0ek43gGrNJ6nIiIiPYKX01bUUmV9HwytlyLeOYJO375yYL8gjCuv9lY4bgok8LxbCdyDU4ZEhERUf0YoFq555PFYoLd2vj5ty1/mPDI/XrhuJx1l5Nr7PtW+YJw2KzonJfBwnEiIqJmwgDVSraVeVFeFTTcPuDOmS54vSYMHKxh9F+Dhr42HI6oJVukcFzOviMiIqLmwQDVCmTR3sJiD7LcNkM9n95fbsWSt2ywWKRw3KsaYBpR5g0gJ8OO9jksHCciImpODFAtTNoHbCn2IBgOG2of4PcBc27Vp+4uviyAvv3Dhr5vUAsjFIqownFZKoaIiIiaD19ZW1hRhU9t+ZnGCscff9SBTb9a0KFTGJOmGC8cl47j7bKdyDP4fYmIiGjPGKBakC+gqcJxl91iqOfTpl/NeOwRPfjcMtOHzExj39fj19TixF3y3IamDImIiKhxGKBasOfT1pIqdRZcprPxPZ8iEWD2rU4E/CYcdWwQp5xusHA8EkGlVy8czzDwfYmIiKjxGKBaSGlVAIWlPjWFZqR9wNLFVnyw3AabPYIZd/oMF47LIsVSrC4LBhMREVHLYIBqAcGQvliwxWys55PHA9wxXQ8+l/3Nj317GywcD4XVJoXjRr4vERERGcMA1QK2lXpUEbfRnk+PPuDA1i1m7NM1jCsn+ZtWOJ7lRH4WC8eJiIhaEgNUM5PGlVtLPMhy2WE2N37+7ecfzVjwmB58pt/uhctt7PvK+no2s1l1HGfhOBERUctigGrmnk8ydSc9mIx0/pbCcVksWNNMOHFkECeO0gwXrJd5AuiY5zK0SDERERE1Ddf3aObFgnf6gsjP1BtgNtai12xY+YkVDmcEf5/jNfx9K7xBFZw65hgctiIiIqIm4QhUMyos8cBps8JqoPN3RTkwd5YeuK661o9u3SOGvmcoHIYvqKnCcYeNheNEREStgQGqGUkDS6NTaP+4x4mdO8zYt1cI4680XjheUhVAQaYTBVnGRr2IiIio6RigmpGcdWek59N335jx3AL9TL3b7vDB7jDe6Vzq1LvkZ6iWCURERNQ6GKCakd3AFFo4DMyc5kI4bMIppwdw9PFNLBzPcSPbYLsEIiIi2jsMUG3k1RdtWLvGioyMCKbN9DWpXYIs1dIpj4XjRERErY0Bqg2UFJtwzx16zdLEKT506my0cDwCTyCkFgt2snCciIio1TFAtYH75jpRWmLG/v1CuHh8wPDXl1X5kZ9hR7tsFo4TERG1BQaoVrbuCwteXqifqTdjrhc2g30v/cEQZLxKLxznPx8REVFb4CtwKwqFgBlTXYhETBh9TgBHDAkZfowyjx8dc12G19kjIiKi5sMA1YoWPmPHd99YkJ0TwU23Nq1w3GW3olOu21C7BCIiImpeDFCtZMd2Ex68W69Zuv5mHwraGSscD4cjqPIF0TnXrUIUERERtR0GqFYyb44TFeUmHHSIhjEXNaFw3BNAXqYD7XNcLbJ/RERE1HgMUK1g1acWvPGqdCmPYOZdPlgMdh4IaCHVukDWuzOyzh4RERG1jKR5NS4uLsYFF1yA7Oxs5ObmYvz48aisrGzwa6644gr07t0bLpcL7du3xxlnnIEffvih1n02bdqE0047DW63Gx06dMCNN94ITTPWFbwhwaDecVyce2EAhxxmvHC8tNKPDjlO5GawcJyIiCgRJE2AkvD07bffYunSpVi0aBE++OADXH755Q1+zcCBA7FgwQJ8//33eOedd9TyJ6NGjUJITodTZ8WFVHgKBAL45JNP8Mwzz+Dpp5/Gbbfd1mz7/cwTdvz8owV5+WFMnmp8sWCpe3LYLarjOAvHiYiIEkNSBCgJQIsXL8a///1vDBkyBMcccwwefvhhvPjii9iyZUvcr5OAddxxx6Fnz544/PDDcfvtt+P333/Hr7/+qm5fsmQJvvvuOzz33HM47LDDcMopp2DOnDl49NFHVajaW4VbTHjkfr1wXM66y80zWDgeiagz76RwPMNhsGEUERERtZikOJ3r008/VdN2gwYNqrluxIgRMJvNWLlyJUaPHr3Hx6iqqlKjUfvuuy+6detW87gHH3wwOnbsWHO/k046CX/729/UaNeAAQPqfSy/36+2qPLycvUxFNLUFnXHjEx4PCYMGBTEn8/2qj5QRjuOZzktyMuwIShzgUkout/Juv8thcclvmD1FLoW0mDSkuI9XquRYxL7kXbhsYmPxyY+zegLc7IFqMLCQlWfFMtqtSI/P1/d1pB//vOfuOmmm1SA6tu3r5oCtNv1WiL52tjwJKKXG3rcuXPnYtasWXWu3/D1alVLJb78sj3e+b+jYDaHccmFH+GnL/WQ1RS/foWkJ8ed6uJxie+9tR+39S4krBVrPmzrXUhYPDbx8djU5fF4kJQBaurUqZg3b94ep+/2tnZq5MiR2Lp1K+69916cc845+Pjjj+F0Nn0duWnTpmHy5Mm1RqBkVKvXwUcgJzcPfh9wzXW56rYLx/lx0tmHGP4eO8u8aJfjxL4dsmFO4tonGWGRkCD/Bjaj69akMB6X+AK+AJYtX4YTDjsatuo3O4SaEQR5ERw28FhYLUnx/rfV8NjEx2MTX1FREZqqTY/klClTMHbs2Abv06tXL3Tq1Anbt2+vdb2cKSdn5sltDcnJyVFbnz59cOSRRyIvLw+vvfYazjvvPPW1q1atqnX/bdu2qY8NPa7D4VDb7iwWq9oWPO7Apl8t6NAxjOtuCqjrjPD4NTgcdnRtlwOHPTVeXCUkMCjUxeNSV0QLq4/yRG+z8sm+Pjw28fHYxMdjU5fVaF+h2K9FG5LWArLtydChQ1FaWoo1a9aoM+vE8uXLEQ6HVVF5Y8lZeLJF65fkce+44w4VzqJThDIqIK0SDjjggCb9TJt+M2H+w3q4mjrDh8wsGC4cr/AG0KNDFjKdfGElIiJKRElRodm/f3+cfPLJmDBhghoxkim4iRMnYsyYMejSpYu6z+bNm9GvX7+aEaUNGzaoWiUJXdLrSdoU/PWvf1U9oU499VR1H2lpIEHpoosuwrp161Srg1tvvRVXX311vSNMexKJAHNudcHvM+HIYzScdobxAuEKbxDZbjs6suM4ERFRwkqKACWef/55FZCGDx+uApC0Mnj88cdr1ZSsX7++piBMapw+/PBDdd/99tsP5557LrKyslSQio42WSwW1VNKPspo1IUXXoiLL74Ys2fPbtI+vveuFe+/K1MyEcy4wwujpUtaKIxAMKQ6jtutTR9WJCIiopaVNJOhcsbdwoUL494uvZ5kei5KRqbeeuutPT5ujx49GnW/xpg7M0N9vPRKP3r30es4jCip8qMg24n8LOOjX0RERNR6kmYEKhmUFFtgNkewfz/jfSW8AQ1Wswmd89xJfdYdERFROmCAambhMHDDRDfeeavxg3syclbuCaBjrhvZLp62TURElOgYoJqdPnp0522uRncel8LxTJcNnXL1JpxERESU2BigWkAkYsLWLWZ8vnLPheChcBi+gKYKxx02Fo4TERElAwaoFrRj254Pb0lVAAVZTrURERFRcmCAakHtOzZ8Jp4/GILZBHTOz4BFPiEiIqKkwADVAkymCDp3CWPQkFCDheOlVX7VMDPHzcJxIiKiZMIA1QLhSdwy24uGltip9GnIcLJwnIiIKBkxQDWzTp0jeOgJD046VYt7n1A4Ao8/iM65bjjtSdPLlIiIiKrx1bsZzX+6DMcPjzQ48iTKqvzIz3SgfQ4Lx4mIiJIRR6Ca0cDBoT2Gp4AWgpSWd1GF4zz8REREyYiv4K1MFY5nO1k4TkRElMQYoFpRpS8Il92q2haYuN4dERFR0mKAaiXhcARVPr1wXEIUERERJS8GqFZS5gkgN0MKx11tvStERES0lxigWoEUjkvrgi75blgtPORERETJjq/mraC0KqBaFuRlONp6V4iIiKgZMEC1MI9fg8NmRuc8NwvHiYiIUgQDVAsKRyKo9OqF4xkOW1vvDhERETUTBqgWVO4JIMttY+E4ERFRimGAaiHBUFht++RnwG7dQ3tyIiIiSioMUC2ktNKPdllO5GWycJyIiCjVMEC1AG9Ag81qVuvdmVk4TkRElHIYoFqgcFxqnzrluZHpZOE4ERFRKmKAamYV3iCyXHZ0ZOE4ERFRymKAakZaKAx/UFMdx1k4TkRElLoYoJqRTN21y3YhP9PZ1rtCRERELYgBqhnJMnfScdxiZuE4ERFRKmOAakYy+pTtsrf1bhAREVELY4BqRu2zWThORESUDhigmpHDxsJxIiKidMAARURERGQQAxQRERGRQQxQRERERAYxQBEREREZxABFREREZBADFBEREZFBDFBEREREBjFAERERERlkNfoFVFckElEfKyoqYLPZ2np3EkowGITH40F5eTmPTQwel/gCXr9+bErLYLPz2MQKhjR1bMpKS2Gz8Ok7Fo9NfDw28VWUldd6HTeCR7IZFBUVqY/77rtvW+8KERERNeF1PCcnx9DXMEA1g/z8fPVx06ZNhv8BUp2MsHTr1g2///47srOz23p3EgaPS3w8NvHx2MTHYxMfj018ZWVl6N69e83ruBEMUM3AbNZLySQ88ZezfnJceGzq4nGJj8cmPh6b+Hhs4uOx2fPruBEsIiciIiIyiAGKiIiIyCAGqGbgcDgwY8YM9ZFq47GpH49LfDw28fHYxMdjEx+PTcscG1OkKefuEREREaUxjkARERERGcQARURERGQQAxQRERGRQQxQRERERAYxQO2FDz74AKeffjq6dOkCk8mE119/va13KSHMnTsXRxxxBLKystChQweceeaZWL9+fVvvVkL417/+hUMOOaSmod3QoUPx9ttvt/VuJaS77rpL/V1dd911SHczZ85UxyJ269evX1vvVsLYvHkzLrzwQhQUFMDlcuHggw/G559/jnTXs2fPOr83sl199dVIZ6FQCNOnT1fLr8nvS+/evTFnzhzD6+GxE/leqKqqwqGHHopLL70UZ511VlvvTsJ4//331R+ohChN03DLLbdg1KhR+O6775CRkYF01rVrVxUM+vTpo/5Yn3nmGZxxxhn48ssvceCBB7b17iWM1atX47HHHlNhk3Ty+7Fs2bKay1Yrn75FSUkJjj76aAwbNky9GWnfvj1++ukn5OXlId3J35GEhahvvvkGI0eOxF//+leks3nz5qk3s/L8K39XErbHjRunVhO55pprGv04/AvcC6eccoraqLbFixfXuvz000+rkag1a9bguOOOQzqTEctYd9xxh/pD/uyzzxigqlVWVuKCCy7AE088gdtvv72tdydhSGDq1KlTW+9GQr4YyjpvCxYsqLmOC7vrJEzGkjdvMtpy/PHHI5198skn6o3raaedVjNS98ILL2DVqlWGHodTeNQqizWKpizWmMrkneGLL76oRjJlKo90MnopT2wjRoxo611JKDKqIuUCvXr1UgFTFi8n4I033sCgQYPUqIq8URswYIAK31RbIBDAc889p2ZMZBovnR111FF499138eOPP6rL69atw0cffWR4QIQjUNSiwuGwqmGRIfaDDjqorXcnIXz99dcqMPl8PmRmZuK1117DAQcc0Na7lRAkUH7xxRdq6oF2GTJkiBrJ7du3L7Zu3YpZs2bh2GOPVVMyUmuYzjZs2KBGcSdPnqzKBeR3R6Zh7HY7LrnkkrbevYQhNbqlpaUYO3Ys0t3UqVNRXl6u6ggtFot6MyuzAfLGxAgGKGrx0QR5kpd0Tzp5EVy7dq0amXvllVfUk7zUjaV7iPr9999x7bXXYunSpXA6nW29Owkl9p2x1IVJoOrRowdeeukljB8/Hun+Jk1GoO688051WUag5Dln/vz5DFAxnnzySfV7JKOY6e6ll17C888/j4ULF6rSCXk+ljf6cmyM/M4wQFGLmThxIhYtWqTOVpTiadLJO+P99ttPfT5w4ED1jvkf//iHKppOZ1Ijt337dhx++OE118k7Q/n9eeSRR+D3+9W7RQJyc3Ox//774+eff0a669y5c503H/3798err77aZvuUaH777Td1AsL//ve/tt6VhHDjjTeqUagxY8aoy3LWphwjOYOcAYralJxdNmnSJDU19d5777GgsxHvoCUcpLvhw4er6c1YcmaMDLPffPPNDE+7Fdr/8ssvuOiii5DupDxg9zYpUtsiI3SkkwJ7qQ+LFk2nO4/HA7O5dgm4PL/Ic7ERDFB7+SQW+w5w48aNaihQiqW7d++OdJ62k6HR//f//p+qzygsLFTXyymi0nMjnU2bNk0No8vvR0VFhTpOEjLfeecdpDv5Xdm9Tk7aXkhvn3Svn7vhhhvUGZwSCrZs2aJWj5cn/PPOOw/p7vrrr1dFwTKFd84556gzqR5//HG1kf4GTQKUjKyw9YVO/pak5kmeh2UKT9rI3H///arA3pAINdmKFSuk61ad7ZJLLomks/qOiWwLFiyIpLtLL7000qNHj4jdbo+0b98+Mnz48MiSJUvaercS1vHHHx+59tprI+nu3HPPjXTu3Fn93uyzzz7q8s8//9zWu5Uw3nzzzchBBx0UcTgckX79+kUef/zxtt6lhPHOO++o59/169e39a4kjPLycvW80r1794jT6Yz06tUr8ve//z3i9/sNPY5J/mcschERERGlN/aBIiIiIjKIAYqIiIjIIAYoIiIiIoMYoIiIiIgMYoAiIiIiMogBioiIiMggBigiIiIigxigiCjh/PrrrzCZTKqzf6L44YcfcOSRR6qFjg877LC23h0iamMMUERUx9ixY1WAueuuu2pd//rrr6vr05EsnyJLy8i6a++++26Dx233rbkW/X366afVQsJE1PYYoIioXjLSMm/ePJSUlCBVBAKBJn+tLN57zDHHqPXoZH2+eE4++WRs3bq11paIC2oHg8G23gWipMYARUT1GjFiBDp16oS5c+fGvc/MmTPrTGc9+OCD6NmzZ61RmTPPPFMt9tqxY0c1gjJ79mxomoYbb7xRLb7dtWtXteBpfdNmslCshDlZUPj999+vdfs333yjFmfOzMxUj33RRRdh586dNbefcMIJmDhxIq677jq0a9cOJ510UtwFV2WfZD8cDof6mRYvXlxzu4wirVmzRt1HPpefOx75ejlusZss/Ctkge3DDz9c/Ty9evXCrFmz1HGIkgVNDz74YDXS1a1bN1x11VVq0XIhi06PGzcOZWVlNSNb0f2Qz2V0MJYcZxmxip0S/e9//4vjjz9eff/nn39e3fbvf/8b/fv3V9f169cP//znP2sFTjl+nTt3VrdLeGzo94EonTBAEVG95EVfQs/DDz+MP/74Y68ea/ny5diyZQs++OADFRJkOuxPf/oT8vLysHLlSlx55ZW44oor6nwfCVhTpkxRq6UPHTpUraJeVFSkbistLcWJJ56IAQMG4PPPP1eBZ9u2bTjnnHNqPcYzzzwDu92Ojz/+GPPnz693//7xj3/gvvvuw7333ouvvvpKBa0///nP+Omnn9TtMookq7bLvsjnN9xwg+Fj8OGHH+Liiy/Gtddei++++w6PPfaYCjiyKnyU2WzGQw89hG+//Vbttxy3m266Sd0mQVLCaXZ2ds3IltH9mDp1qvr+33//vfoZJUTddtttah/kOvn3nj59uvreQvbljTfewEsvvaSmLuX+seGYKK211GrHRJS8LrnkksgZZ5yhPj/yyCMjl156qfr8tddeUyu7R82YMSNy6KGH1vraBx54INKjR49ajyWXQ6FQzXV9+/aNHHvssTWXNU2LZGRkRF544QV1eePGjer73HXXXTX3CQaDka5du0bmzZunLs+ZMycyatSoWt/7999/r7Xy/PHHHx8ZMGDAHn/eLl26RO64445a1x1xxBGRq666quay/Jzy8zZEflaLxaJ+luj2l7/8Rd02fPjwyJ133lnr/v/5z38inTt3jvt4L7/8cqSgoKDm8oIFCyI5OTl17ic/s/zbxJL7yf1jj+eDDz5Y6z69e/eOLFy4sNZ1clyHDh2qPp80aVLkxBNPjITD4QZ/bqJ0ZG3rAEdEiU3qoGSkpymjLlEyeiOjK1Ey3SZTcrGjXVJXtH379lpfJ6NOUVarFYMGDVIjJWLdunVYsWKFmr6rr15p//33V58PHDiwwX0rLy9Xo2NHH310revlsnwPo4YNG4Z//etfNZdlOi66vzIKFjviFAqF4PP54PF44Ha7sWzZMjVFJlOXsl8yvRd7+96S4xdVVVWljtP48eMxYcKEmuvle+bk5NRMv44cORJ9+/ZVtV0yajhq1Ki93g+iVMAARUQNOu6449R0z7Rp09QLaiwJRfoASMPFyTabrdZlqcep7zqpRWosqQ2SKT0JeLuTmp3dA0xrke+333771bu/UvN01lln1blN6oukTkkCyt/+9jcVsqQ27KOPPlIBR2qRGgpQcuwa8+8QeyyitVVPPPEEhgwZUut+0ZotqdfauHEj3n77bRXuZHpUauNeeeWVRh0LolTGAEVEeyTtDKSwWkYiYrVv3x6FhYXqxTva3qA5ezd99tlnKsBFR0akkFuKmqMv7q+++qqqyZHRqaaSmqIuXbqo0SEpsI6Sy4MHD0Zzkf2VOqL6wpWQn00CpNRiRUfrpPYoltRyyajV7uTfQWqioqR2S0atGiKjgPJzb9iwARdccEGDx+fcc89V21/+8hc1ElVcXKwCHlE6Y4Aioj2SM8PkRVaKimPJWW47duzA3XffrV5cpZBbRivkRbc5PProo+jTp486S+yBBx5QLRUuvfRSddvVV1+tRk/OO+88VWgtL+jSb+nFF19UZ5ZFR1EaQ4rVpbC9d+/eKijKGYESBKNnqjUHKdaWEabu3burYyUhSab15EzC22+/XQUrGTWSon0ZWauv6F3CoowcSR+qQw89VI1KySZTrI888oia8pSAdfPNN9cZ4auPjIhdc801aspOgpHf71cF+XKcJ0+erAr+ZTRPCvVlf19++WV1ViF7URHxLDwiaiQ5hX/3KTYJNnLauwQdeUFftWrVXtVK1TfyJZs8tkxnyRlh0o5AREeNJDBIXY6EPGlXIC/usfVWjSEhQgKDnGUnjyNBUL6XhLfmItOgixYtwpIlS3DEEUeoruYSCqU1gJCfUQKLTElKfZiEt91bBsiZeHLGoowGyaiTBFcho1bS9uDYY4/F+eefr/4NGlMzddlll6mwKYFRfm4ZgZMzA6N9q7KystT3kNop2WeZZnzrrbcMH1+iVGSSSvK23gkiIiKiZMK3EUREREQGMUARERERGcQARURERGQQAxQRERGRQQxQRERERAYxQBEREREZxABFREREZBADFBEREZFBDFBEREREBjFAERERERnEAEVERERkEAMUEREREYz5/w+Dfgdr4l9HAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, ax = plot_sequential_feature_selection(feature_selector, kind='std_dev')\n", "ax.grid(True)\n", "if isinstance(FILTERED_FEATURES_NUM, Sequence):\n", " _ = ax.axvspan(min(FILTERED_FEATURES_NUM), max(FILTERED_FEATURES_NUM), color=matplotlib.colormaps.get_cmap('tab10')(6), alpha=0.15)\n", "# хотелось бы поставить верхнюю границу `len(df_augd_features_train.columns)`, но SequentialFeatureSelector до неё не досчитывает-то\n", "_ = ax.set_xlim((1, (max(FILTERED_FEATURES_NUM) if isinstance(FILTERED_FEATURES_NUM, Sequence) else FILTERED_FEATURES_NUM)))\n", "_ = ax.set_ylim((None, 0.))" ] }, { "cell_type": "markdown", "id": "1fc207ba-f324-4980-9f6f-b0c83ef2e127", "metadata": {}, "source": [ "Составной пайплайн:" ] }, { "cell_type": "code", "execution_count": 64, "id": "1ff048d8-63a9-45cc-b613-50891aab4612", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('preprocess',\n",
       "                 ColumnTransformer(transformers=[('extend_features_as_polynomial',\n",
       "                                                  Pipeline(steps=[('extend_features',\n",
       "                                                                   PolynomialFeatures(include_bias=False)),\n",
       "                                                                  ('scale_to_standard',\n",
       "                                                                   StandardScaler())]),\n",
       "                                                  ('selling_price',\n",
       "                                                   'driven_kms')),\n",
       "                                                 ('extend_features_as_spline',\n",
       "                                                  SplineTransformer(include_bias=False,\n",
       "                                                                    knots='quantile',\n",
       "                                                                    n_knots=4),\n",
       "                                                  ('age',)),\n",
       "                                                 ('s...\n",
       "                                                  ('fuel_type', 'selling_type',\n",
       "                                                   'transmission'))])),\n",
       "                ('select_features',\n",
       "                 SequentialFeatureSelector(cv=4,\n",
       "                                           estimator=RandomForestRegressor(max_depth=8,\n",
       "                                                                           max_features='sqrt',\n",
       "                                                                           n_estimators=10),\n",
       "                                           floating=True, k_features=(4, 8),\n",
       "                                           scoring='neg_mean_absolute_percentage_error',\n",
       "                                           verbose=1)),\n",
       "                ('regress',\n",
       "                 RandomForestRegressor(max_depth=8, max_features='sqrt',\n",
       "                                       n_estimators=10))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('extend_features_as_polynomial',\n", " Pipeline(steps=[('extend_features',\n", " PolynomialFeatures(include_bias=False)),\n", " ('scale_to_standard',\n", " StandardScaler())]),\n", " ('selling_price',\n", " 'driven_kms')),\n", " ('extend_features_as_spline',\n", " SplineTransformer(include_bias=False,\n", " knots='quantile',\n", " n_knots=4),\n", " ('age',)),\n", " ('s...\n", " ('fuel_type', 'selling_type',\n", " 'transmission'))])),\n", " ('select_features',\n", " SequentialFeatureSelector(cv=4,\n", " estimator=RandomForestRegressor(max_depth=8,\n", " max_features='sqrt',\n", " n_estimators=10),\n", " floating=True, k_features=(4, 8),\n", " scoring='neg_mean_absolute_percentage_error',\n", " verbose=1)),\n", " ('regress',\n", " RandomForestRegressor(max_depth=8, max_features='sqrt',\n", " n_estimators=10))])" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = sklearn.pipeline.Pipeline([\n", " ('preprocess', build_preprocess_augmenting_transformer()),\n", " ('select_features', feature_selector),\n", " ('regress', regressor),\n", "])\n", "pipeline" ] }, { "cell_type": "code", "execution_count": 65, "id": "857ca3e3-39c2-4bea-99fa-526f9bb4fcf3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'preprocess__remainder': 'drop',\n", " 'preprocess__sparse_threshold': 0.3,\n", " 'preprocess__transformer_weights': None,\n", " 'preprocess__extend_features_as_spline': SplineTransformer(include_bias=False, knots='quantile', n_knots=4),\n", " 'preprocess__extend_features_as_polynomial__extend_features': PolynomialFeatures(include_bias=False),\n", " 'preprocess__extend_features_as_polynomial__extend_features__degree': 2,\n", " 'preprocess__extend_features_as_polynomial__extend_features__include_bias': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__interaction_only': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__order': 'C',\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_mean': True,\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_std': True,\n", " 'preprocess__extend_features_as_spline__degree': 3,\n", " 'preprocess__extend_features_as_spline__extrapolation': 'constant',\n", " 'preprocess__extend_features_as_spline__include_bias': False,\n", " 'preprocess__extend_features_as_spline__knots': 'quantile',\n", " 'preprocess__extend_features_as_spline__n_knots': 4,\n", " 'preprocess__extend_features_as_spline__order': 'C',\n", " 'preprocess__extend_features_as_spline__sparse_output': False,\n", " 'preprocess__scale_to_standard__with_mean': True,\n", " 'preprocess__scale_to_standard__with_std': True,\n", " 'select_features__cv': 4,\n", " 'select_features__feature_groups': None,\n", " 'select_features__fixed_features': None,\n", " 'select_features__floating': True,\n", " 'select_features__forward': True,\n", " 'select_features__k_features': (4, 8),\n", " 'select_features__scoring': 'neg_mean_absolute_percentage_error',\n", " 'regress__bootstrap': True,\n", " 'regress__ccp_alpha': 0.0,\n", " 'regress__criterion': 'squared_error',\n", " 'regress__max_depth': 8,\n", " 'regress__max_features': 'sqrt',\n", " 'regress__max_leaf_nodes': None,\n", " 'regress__max_samples': None,\n", " 'regress__min_impurity_decrease': 0.0,\n", " 'regress__min_samples_leaf': 1,\n", " 'regress__min_samples_split': 2,\n", " 'regress__min_weight_fraction_leaf': 0.0,\n", " 'regress__monotonic_cst': None,\n", " 'regress__n_estimators': 10,\n", " 'regress__oob_score': False,\n", " 'regress__random_state': None}" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_params = filter_params(\n", " pipeline.get_params(),\n", " include={\n", " 'preprocess': (False, PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_INCLUDE.copy()),\n", " 'select_features': (False, FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE.copy()),\n", " 'regress': (False, True),\n", " },\n", " exclude={\n", " 'preprocess': PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_EXCLUDE.copy(),\n", " 'select_features': FEATURE_SELECTOR_PARAMS_COMMON_EXCLUDE,\n", " 'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE,\n", " },\n", ")\n", "model_params" ] }, { "cell_type": "markdown", "id": "f05a1163-dced-4f54-be05-8da7fac7d611", "metadata": {}, "source": [ "Обучение модели:" ] }, { "cell_type": "code", "execution_count": 66, "id": "1a22889d-8cc7-42a4-a3f0-51af14723db8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=1)]: Done 14 out of 14 | elapsed: 0.5s finished\n", "Features: 1/8[Parallel(n_jobs=1)]: Done 13 out of 13 | elapsed: 0.5s finished\n", "Features: 2/8[Parallel(n_jobs=1)]: Done 12 out of 12 | elapsed: 0.6s finished\n", "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s finished\n", "Features: 3/8[Parallel(n_jobs=1)]: Done 11 out of 11 | elapsed: 0.4s finished\n", "[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 0.0s finished\n", "Features: 4/8[Parallel(n_jobs=1)]: Done 10 out of 10 | elapsed: 0.4s finished\n", "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 0.1s finished\n", "Features: 5/8[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 0.1s finished\n", "Features: 6/8[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 0.2s finished\n", "Features: 7/8[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.3s finished\n", "[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.2s finished\n", "Features: 8/8" ] } ], "source": [ "# XXX: SequentialFeatureSelector обучается опять!?\n", "_ = pipeline.fit(df_orig_features_train, df_target_train.iloc[:, 0])" ] }, { "cell_type": "markdown", "id": "8ebb22dc-2bb2-48b1-a80c-5cf73b414fd8", "metadata": {}, "source": [ "Оценка качества:" ] }, { "cell_type": "code", "execution_count": 67, "id": "f6a1ebfb-13b0-4c40-896c-dc6e5c588d11", "metadata": {}, "outputs": [], "source": [ "target_test_predicted = pipeline.predict(df_orig_features_test)" ] }, { "cell_type": "markdown", "id": "fca0ac78-1371-43e3-8b57-4f5921ccedbe", "metadata": {}, "source": [ "Метрики качества (MAPE, а также MSE, MAE):" ] }, { "cell_type": "code", "execution_count": 68, "id": "2690f68f-4e4e-456e-880d-7a13cf60b0ea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mse': 1.0194872911964548,\n", " 'mae': 0.6263087407494466,\n", " 'mape': 0.20033337884798225}" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = score_predictions(df_target_test, target_test_predicted)\n", "metrics" ] }, { "cell_type": "code", "execution_count": 69, "id": "1f3d069c-2c5b-4214-9bed-17e6fe92a8d3", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "15d75fa1d12046c8b197bf0ac21439b9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading artifacts: 0%| | 0/7 [00:00#sk-container-id-9 {\n", " /* Definition of color scheme common for light and dark mode */\n", " --sklearn-color-text: #000;\n", " --sklearn-color-text-muted: #666;\n", " --sklearn-color-line: gray;\n", " /* Definition of color scheme for unfitted estimators */\n", " --sklearn-color-unfitted-level-0: #fff5e6;\n", " --sklearn-color-unfitted-level-1: #f6e4d2;\n", " --sklearn-color-unfitted-level-2: #ffe0b3;\n", " --sklearn-color-unfitted-level-3: chocolate;\n", " /* Definition of color scheme for fitted estimators */\n", " --sklearn-color-fitted-level-0: #f0f8ff;\n", " --sklearn-color-fitted-level-1: #d4ebff;\n", " --sklearn-color-fitted-level-2: #b3dbfd;\n", " --sklearn-color-fitted-level-3: cornflowerblue;\n", "\n", " /* Specific color for light theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-icon: #696969;\n", "\n", " @media (prefers-color-scheme: dark) {\n", " /* Redefinition of color scheme for dark theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-icon: #878787;\n", " }\n", "}\n", "\n", "#sk-container-id-9 {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "#sk-container-id-9 pre {\n", " padding: 0;\n", "}\n", "\n", "#sk-container-id-9 input.sk-hidden--visually {\n", " border: 0;\n", " clip: rect(1px 1px 1px 1px);\n", " clip: rect(1px, 1px, 1px, 1px);\n", " height: 1px;\n", " margin: -1px;\n", " overflow: hidden;\n", " padding: 0;\n", " position: absolute;\n", " width: 1px;\n", "}\n", "\n", "#sk-container-id-9 div.sk-dashed-wrapped {\n", " border: 1px dashed var(--sklearn-color-line);\n", " margin: 0 0.4em 0.5em 0.4em;\n", " box-sizing: border-box;\n", " padding-bottom: 0.4em;\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "#sk-container-id-9 div.sk-container {\n", " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", " so we also need the `!important` here to be able to override the\n", " default hidden behavior on the sphinx rendered scikit-learn.org.\n", " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", " display: inline-block !important;\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-9 div.sk-text-repr-fallback {\n", " display: none;\n", "}\n", "\n", "div.sk-parallel-item,\n", "div.sk-serial,\n", "div.sk-item {\n", " /* draw centered vertical line to link estimators */\n", " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", " background-size: 2px 100%;\n", " background-repeat: no-repeat;\n", " background-position: center center;\n", "}\n", "\n", "/* Parallel-specific style estimator block */\n", "\n", "#sk-container-id-9 div.sk-parallel-item::after {\n", " content: \"\";\n", " width: 100%;\n", " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", " flex-grow: 1;\n", "}\n", "\n", "#sk-container-id-9 div.sk-parallel {\n", " display: flex;\n", " align-items: stretch;\n", " justify-content: center;\n", " background-color: var(--sklearn-color-background);\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-9 div.sk-parallel-item {\n", " display: flex;\n", " flex-direction: column;\n", "}\n", "\n", "#sk-container-id-9 div.sk-parallel-item:first-child::after {\n", " align-self: flex-end;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-9 div.sk-parallel-item:last-child::after {\n", " align-self: flex-start;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-9 div.sk-parallel-item:only-child::after {\n", " width: 0;\n", "}\n", "\n", "/* Serial-specific style estimator block */\n", "\n", "#sk-container-id-9 div.sk-serial {\n", " display: flex;\n", " flex-direction: column;\n", " align-items: center;\n", " background-color: var(--sklearn-color-background);\n", " padding-right: 1em;\n", " padding-left: 1em;\n", "}\n", "\n", "\n", "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", "clickable and can be expanded/collapsed.\n", "- Pipeline and ColumnTransformer use this feature and define the default style\n", "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", "*/\n", "\n", "/* Pipeline and ColumnTransformer style (default) */\n", "\n", "#sk-container-id-9 div.sk-toggleable {\n", " /* Default theme specific background. It is overwritten whether we have a\n", " specific estimator or a Pipeline/ColumnTransformer */\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "/* Toggleable label */\n", "#sk-container-id-9 label.sk-toggleable__label {\n", " cursor: pointer;\n", " display: flex;\n", " width: 100%;\n", " margin-bottom: 0;\n", " padding: 0.5em;\n", " box-sizing: border-box;\n", " text-align: center;\n", " align-items: start;\n", " justify-content: space-between;\n", " gap: 0.5em;\n", "}\n", "\n", "#sk-container-id-9 label.sk-toggleable__label .caption {\n", " font-size: 0.6rem;\n", " font-weight: lighter;\n", " color: var(--sklearn-color-text-muted);\n", "}\n", "\n", "#sk-container-id-9 label.sk-toggleable__label-arrow:before {\n", " /* Arrow on the left of the label */\n", " content: \"▸\";\n", " float: left;\n", " margin-right: 0.25em;\n", " color: var(--sklearn-color-icon);\n", "}\n", "\n", "#sk-container-id-9 label.sk-toggleable__label-arrow:hover:before {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "/* Toggleable content - dropdown */\n", "\n", "#sk-container-id-9 div.sk-toggleable__content {\n", " display: none;\n", " text-align: left;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-9 div.sk-toggleable__content.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-9 div.sk-toggleable__content pre {\n", " margin: 0.2em;\n", " border-radius: 0.25em;\n", " color: var(--sklearn-color-text);\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-9 div.sk-toggleable__content.fitted pre {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-9 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", " /* Expand drop-down */\n", " display: block;\n", " width: 100%;\n", " overflow: visible;\n", "}\n", "\n", "#sk-container-id-9 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", " content: \"▾\";\n", "}\n", "\n", "/* Pipeline/ColumnTransformer-specific style */\n", "\n", "#sk-container-id-9 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-9 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator-specific style */\n", "\n", "/* Colorize estimator box */\n", "#sk-container-id-9 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-9 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "#sk-container-id-9 div.sk-label label.sk-toggleable__label,\n", "#sk-container-id-9 div.sk-label label {\n", " /* The background is the default theme color */\n", " color: var(--sklearn-color-text-on-default-background);\n", "}\n", "\n", "/* On hover, darken the color of the background */\n", "#sk-container-id-9 div.sk-label:hover label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "/* Label box, darken color on hover, fitted */\n", "#sk-container-id-9 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator label */\n", "\n", "#sk-container-id-9 div.sk-label label {\n", " font-family: monospace;\n", " font-weight: bold;\n", " display: inline-block;\n", " line-height: 1.2em;\n", "}\n", "\n", "#sk-container-id-9 div.sk-label-container {\n", " text-align: center;\n", "}\n", "\n", "/* Estimator-specific */\n", "#sk-container-id-9 div.sk-estimator {\n", " font-family: monospace;\n", " border: 1px dotted var(--sklearn-color-border-box);\n", " border-radius: 0.25em;\n", " box-sizing: border-box;\n", " margin-bottom: 0.5em;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-9 div.sk-estimator.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "/* on hover */\n", "#sk-container-id-9 div.sk-estimator:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-9 div.sk-estimator.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", "\n", "/* Common style for \"i\" and \"?\" */\n", "\n", ".sk-estimator-doc-link,\n", "a:link.sk-estimator-doc-link,\n", "a:visited.sk-estimator-doc-link {\n", " float: right;\n", " font-size: smaller;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1em;\n", " height: 1em;\n", " width: 1em;\n", " text-decoration: none !important;\n", " margin-left: 0.5em;\n", " text-align: center;\n", " /* unfitted */\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-unfitted-level-1);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted,\n", "a:link.sk-estimator-doc-link.fitted,\n", "a:visited.sk-estimator-doc-link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "/* Span, style for the box shown on hovering the info icon */\n", ".sk-estimator-doc-link span {\n", " display: none;\n", " z-index: 9999;\n", " position: relative;\n", " font-weight: normal;\n", " right: .2ex;\n", " padding: .5ex;\n", " margin: .5ex;\n", " width: min-content;\n", " min-width: 20ex;\n", " max-width: 50ex;\n", " color: var(--sklearn-color-text);\n", " box-shadow: 2pt 2pt 4pt #999;\n", " /* unfitted */\n", " background: var(--sklearn-color-unfitted-level-0);\n", " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted span {\n", " /* fitted */\n", " background: var(--sklearn-color-fitted-level-0);\n", " border: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link:hover span {\n", " display: block;\n", "}\n", "\n", "/* \"?\"-specific style due to the `` HTML tag */\n", "\n", "#sk-container-id-9 a.estimator_doc_link {\n", " float: right;\n", " font-size: 1rem;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1rem;\n", " height: 1rem;\n", " width: 1rem;\n", " text-decoration: none;\n", " /* unfitted */\n", " color: var(--sklearn-color-unfitted-level-1);\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", "}\n", "\n", "#sk-container-id-9 a.estimator_doc_link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "#sk-container-id-9 a.estimator_doc_link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "#sk-container-id-9 a.estimator_doc_link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".estimator-table summary {\n", " padding: .5rem;\n", " font-family: monospace;\n", " cursor: pointer;\n", "}\n", "\n", ".estimator-table details[open] {\n", " padding-left: 0.1rem;\n", " padding-right: 0.1rem;\n", " padding-bottom: 0.3rem;\n", "}\n", "\n", ".estimator-table .parameters-table {\n", " margin-left: auto !important;\n", " margin-right: auto !important;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(odd) {\n", " background-color: #fff;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(even) {\n", " background-color: #f6f6f6;\n", "}\n", "\n", ".estimator-table .parameters-table tr:hover {\n", " background-color: #e0e0e0;\n", "}\n", "\n", ".estimator-table table td {\n", " border: 1px solid rgba(106, 105, 104, 0.232);\n", "}\n", "\n", ".user-set td {\n", " color:rgb(255, 94, 0);\n", " text-align: left;\n", "}\n", "\n", ".user-set td.value pre {\n", " color:rgb(255, 94, 0) !important;\n", " background-color: transparent !important;\n", "}\n", "\n", ".default td {\n", " color: black;\n", " text-align: left;\n", "}\n", "\n", ".user-set td i,\n", ".default td i {\n", " color: black;\n", "}\n", "\n", ".copy-paste-icon {\n", " background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n", " background-repeat: no-repeat;\n", " background-size: 14px 14px;\n", " background-position: 0;\n", " display: inline-block;\n", " width: 14px;\n", " height: 14px;\n", " cursor: pointer;\n", "}\n", "
Pipeline(steps=[('preprocess',\n",
       "                 ColumnTransformer(transformers=[('extend_features_as_polynomial',\n",
       "                                                  Pipeline(steps=[('extend_features',\n",
       "                                                                   PolynomialFeatures(include_bias=False)),\n",
       "                                                                  ('scale_to_standard',\n",
       "                                                                   StandardScaler())]),\n",
       "                                                  ('selling_price',\n",
       "                                                   'driven_kms')),\n",
       "                                                 ('extend_features_as_spline',\n",
       "                                                  SplineTransformer(include_bias=False,\n",
       "                                                                    knots='quantile',\n",
       "                                                                    n_knots=4),\n",
       "                                                  ('age',)),\n",
       "                                                 ('s...\n",
       "                                                  ('fuel_type', 'selling_type',\n",
       "                                                   'transmission'))])),\n",
       "                ('select_features',\n",
       "                 SequentialFeatureSelector(cv=4,\n",
       "                                           estimator=RandomForestRegressor(max_depth=8,\n",
       "                                                                           max_features='sqrt',\n",
       "                                                                           n_estimators=10),\n",
       "                                           floating=True, k_features=(4, 8),\n",
       "                                           scoring='neg_mean_absolute_percentage_error')),\n",
       "                ('regress',\n",
       "                 RandomForestRegressor(max_depth=10,\n",
       "                                       max_features=0.4752873867901817,\n",
       "                                       n_estimators=78))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('extend_features_as_polynomial',\n", " Pipeline(steps=[('extend_features',\n", " PolynomialFeatures(include_bias=False)),\n", " ('scale_to_standard',\n", " StandardScaler())]),\n", " ('selling_price',\n", " 'driven_kms')),\n", " ('extend_features_as_spline',\n", " SplineTransformer(include_bias=False,\n", " knots='quantile',\n", " n_knots=4),\n", " ('age',)),\n", " ('s...\n", " ('fuel_type', 'selling_type',\n", " 'transmission'))])),\n", " ('select_features',\n", " SequentialFeatureSelector(cv=4,\n", " estimator=RandomForestRegressor(max_depth=8,\n", " max_features='sqrt',\n", " n_estimators=10),\n", " floating=True, k_features=(4, 8),\n", " scoring='neg_mean_absolute_percentage_error')),\n", " ('regress',\n", " RandomForestRegressor(max_depth=10,\n", " max_features=0.4752873867901817,\n", " n_estimators=78))])" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = build_pipeline_optimized_best()\n", "pipeline" ] }, { "cell_type": "code", "execution_count": 78, "id": "445380bd-a56f-41f6-b148-9e4fee189a09", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'preprocess__remainder': 'drop',\n", " 'preprocess__sparse_threshold': 0.3,\n", " 'preprocess__transformer_weights': None,\n", " 'preprocess__extend_features_as_spline': SplineTransformer(include_bias=False, knots='quantile', n_knots=4),\n", " 'preprocess__extend_features_as_polynomial__extend_features': PolynomialFeatures(include_bias=False),\n", " 'preprocess__extend_features_as_polynomial__extend_features__degree': 2,\n", " 'preprocess__extend_features_as_polynomial__extend_features__include_bias': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__interaction_only': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__order': 'C',\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_mean': True,\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_std': True,\n", " 'preprocess__extend_features_as_spline__degree': 3,\n", " 'preprocess__extend_features_as_spline__extrapolation': 'constant',\n", " 'preprocess__extend_features_as_spline__include_bias': False,\n", " 'preprocess__extend_features_as_spline__knots': 'quantile',\n", " 'preprocess__extend_features_as_spline__n_knots': 4,\n", " 'preprocess__extend_features_as_spline__order': 'C',\n", " 'preprocess__extend_features_as_spline__sparse_output': False,\n", " 'preprocess__scale_to_standard__with_mean': True,\n", " 'preprocess__scale_to_standard__with_std': True,\n", " 'select_features__cv': 4,\n", " 'select_features__feature_groups': None,\n", " 'select_features__fixed_features': None,\n", " 'select_features__floating': True,\n", " 'select_features__forward': True,\n", " 'select_features__k_features': (4, 8),\n", " 'select_features__scoring': 'neg_mean_absolute_percentage_error',\n", " 'regress__bootstrap': True,\n", " 'regress__ccp_alpha': 0.0,\n", " 'regress__criterion': 'squared_error',\n", " 'regress__max_depth': 10,\n", " 'regress__max_features': 0.4752873867901817,\n", " 'regress__max_leaf_nodes': None,\n", " 'regress__max_samples': None,\n", " 'regress__min_impurity_decrease': 0.0,\n", " 'regress__min_samples_leaf': 1,\n", " 'regress__min_samples_split': 2,\n", " 'regress__min_weight_fraction_leaf': 0.0,\n", " 'regress__monotonic_cst': None,\n", " 'regress__n_estimators': 78,\n", " 'regress__oob_score': False,\n", " 'regress__random_state': None}" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_params = filter_params(\n", " pipeline.get_params(),\n", " include={\n", " 'preprocess': (False, PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_INCLUDE.copy()),\n", " 'select_features': (False, FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE.copy()),\n", " 'regress': (False, True),\n", " },\n", " exclude={\n", " 'preprocess': PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_EXCLUDE.copy(),\n", " 'select_features': FEATURE_SELECTOR_PARAMS_COMMON_EXCLUDE,\n", " 'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE,\n", " },\n", ")\n", "model_params" ] }, { "cell_type": "markdown", "id": "3f30dacc-3edd-4821-b45b-5dbb06327cbd", "metadata": {}, "source": [ "Обучение модели:" ] }, { "cell_type": "code", "execution_count": 79, "id": "3b4d37f6-e3e0-4dbf-98f4-2993b5e2216d", "metadata": {}, "outputs": [], "source": [ "_ = pipeline.fit(df_orig_features_train, df_target_train.iloc[:, 0])" ] }, { "cell_type": "markdown", "id": "dc586b98-7431-4fa6-848d-fa50c03d4952", "metadata": {}, "source": [ "Оценка качества:" ] }, { "cell_type": "code", "execution_count": 80, "id": "99b16840-f368-4b38-b3d5-e98cfd52ace8", "metadata": {}, "outputs": [], "source": [ "target_test_predicted = pipeline.predict(df_orig_features_test)" ] }, { "cell_type": "markdown", "id": "e4601f93-a431-494f-b047-6bcffb406c90", "metadata": {}, "source": [ "Метрики качества (MAPE, а также MSE, MAE):" ] }, { "cell_type": "code", "execution_count": 81, "id": "29bb2b58-fd88-40d0-9998-8376f72a83fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mse': 0.9370236080018509,\n", " 'mae': 0.6048078379366015,\n", " 'mape': 0.19721535277529492}" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = score_predictions(df_target_test, target_test_predicted)\n", "metrics" ] }, { "cell_type": "code", "execution_count": 82, "id": "57c13865-8763-41d6-9b1d-3103070be086", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6f4a84b68c834b93bc62c1982114ddea", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading artifacts: 0%| | 0/7 [00:00#sk-container-id-10 {\n", " /* Definition of color scheme common for light and dark mode */\n", " --sklearn-color-text: #000;\n", " --sklearn-color-text-muted: #666;\n", " --sklearn-color-line: gray;\n", " /* Definition of color scheme for unfitted estimators */\n", " --sklearn-color-unfitted-level-0: #fff5e6;\n", " --sklearn-color-unfitted-level-1: #f6e4d2;\n", " --sklearn-color-unfitted-level-2: #ffe0b3;\n", " --sklearn-color-unfitted-level-3: chocolate;\n", " /* Definition of color scheme for fitted estimators */\n", " --sklearn-color-fitted-level-0: #f0f8ff;\n", " --sklearn-color-fitted-level-1: #d4ebff;\n", " --sklearn-color-fitted-level-2: #b3dbfd;\n", " --sklearn-color-fitted-level-3: cornflowerblue;\n", "\n", " /* Specific color for light theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-icon: #696969;\n", "\n", " @media (prefers-color-scheme: dark) {\n", " /* Redefinition of color scheme for dark theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-icon: #878787;\n", " }\n", "}\n", "\n", "#sk-container-id-10 {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "#sk-container-id-10 pre {\n", " padding: 0;\n", "}\n", "\n", "#sk-container-id-10 input.sk-hidden--visually {\n", " border: 0;\n", " clip: rect(1px 1px 1px 1px);\n", " clip: rect(1px, 1px, 1px, 1px);\n", " height: 1px;\n", " margin: -1px;\n", " overflow: hidden;\n", " padding: 0;\n", " position: absolute;\n", " width: 1px;\n", "}\n", "\n", "#sk-container-id-10 div.sk-dashed-wrapped {\n", " border: 1px dashed var(--sklearn-color-line);\n", " margin: 0 0.4em 0.5em 0.4em;\n", " box-sizing: border-box;\n", " padding-bottom: 0.4em;\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "#sk-container-id-10 div.sk-container {\n", " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", " so we also need the `!important` here to be able to override the\n", " default hidden behavior on the sphinx rendered scikit-learn.org.\n", " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", " display: inline-block !important;\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-10 div.sk-text-repr-fallback {\n", " display: none;\n", "}\n", "\n", "div.sk-parallel-item,\n", "div.sk-serial,\n", "div.sk-item {\n", " /* draw centered vertical line to link estimators */\n", " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", " background-size: 2px 100%;\n", " background-repeat: no-repeat;\n", " background-position: center center;\n", "}\n", "\n", "/* Parallel-specific style estimator block */\n", "\n", "#sk-container-id-10 div.sk-parallel-item::after {\n", " content: \"\";\n", " width: 100%;\n", " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", " flex-grow: 1;\n", "}\n", "\n", "#sk-container-id-10 div.sk-parallel {\n", " display: flex;\n", " align-items: stretch;\n", " justify-content: center;\n", " background-color: var(--sklearn-color-background);\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-10 div.sk-parallel-item {\n", " display: flex;\n", " flex-direction: column;\n", "}\n", "\n", "#sk-container-id-10 div.sk-parallel-item:first-child::after {\n", " align-self: flex-end;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-10 div.sk-parallel-item:last-child::after {\n", " align-self: flex-start;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-10 div.sk-parallel-item:only-child::after {\n", " width: 0;\n", "}\n", "\n", "/* Serial-specific style estimator block */\n", "\n", "#sk-container-id-10 div.sk-serial {\n", " display: flex;\n", " flex-direction: column;\n", " align-items: center;\n", " background-color: var(--sklearn-color-background);\n", " padding-right: 1em;\n", " padding-left: 1em;\n", "}\n", "\n", "\n", "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", "clickable and can be expanded/collapsed.\n", "- Pipeline and ColumnTransformer use this feature and define the default style\n", "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", "*/\n", "\n", "/* Pipeline and ColumnTransformer style (default) */\n", "\n", "#sk-container-id-10 div.sk-toggleable {\n", " /* Default theme specific background. It is overwritten whether we have a\n", " specific estimator or a Pipeline/ColumnTransformer */\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "/* Toggleable label */\n", "#sk-container-id-10 label.sk-toggleable__label {\n", " cursor: pointer;\n", " display: flex;\n", " width: 100%;\n", " margin-bottom: 0;\n", " padding: 0.5em;\n", " box-sizing: border-box;\n", " text-align: center;\n", " align-items: start;\n", " justify-content: space-between;\n", " gap: 0.5em;\n", "}\n", "\n", "#sk-container-id-10 label.sk-toggleable__label .caption {\n", " font-size: 0.6rem;\n", " font-weight: lighter;\n", " color: var(--sklearn-color-text-muted);\n", "}\n", "\n", "#sk-container-id-10 label.sk-toggleable__label-arrow:before {\n", " /* Arrow on the left of the label */\n", " content: \"▸\";\n", " float: left;\n", " margin-right: 0.25em;\n", " color: var(--sklearn-color-icon);\n", "}\n", "\n", "#sk-container-id-10 label.sk-toggleable__label-arrow:hover:before {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "/* Toggleable content - dropdown */\n", "\n", "#sk-container-id-10 div.sk-toggleable__content {\n", " display: none;\n", " text-align: left;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-10 div.sk-toggleable__content.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-10 div.sk-toggleable__content pre {\n", " margin: 0.2em;\n", " border-radius: 0.25em;\n", " color: var(--sklearn-color-text);\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-10 div.sk-toggleable__content.fitted pre {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-10 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", " /* Expand drop-down */\n", " display: block;\n", " width: 100%;\n", " overflow: visible;\n", "}\n", "\n", "#sk-container-id-10 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", " content: \"▾\";\n", "}\n", "\n", "/* Pipeline/ColumnTransformer-specific style */\n", "\n", "#sk-container-id-10 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-10 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator-specific style */\n", "\n", "/* Colorize estimator box */\n", "#sk-container-id-10 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-10 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "#sk-container-id-10 div.sk-label label.sk-toggleable__label,\n", "#sk-container-id-10 div.sk-label label {\n", " /* The background is the default theme color */\n", " color: var(--sklearn-color-text-on-default-background);\n", "}\n", "\n", "/* On hover, darken the color of the background */\n", "#sk-container-id-10 div.sk-label:hover label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "/* Label box, darken color on hover, fitted */\n", "#sk-container-id-10 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator label */\n", "\n", "#sk-container-id-10 div.sk-label label {\n", " font-family: monospace;\n", " font-weight: bold;\n", " display: inline-block;\n", " line-height: 1.2em;\n", "}\n", "\n", "#sk-container-id-10 div.sk-label-container {\n", " text-align: center;\n", "}\n", "\n", "/* Estimator-specific */\n", "#sk-container-id-10 div.sk-estimator {\n", " font-family: monospace;\n", " border: 1px dotted var(--sklearn-color-border-box);\n", " border-radius: 0.25em;\n", " box-sizing: border-box;\n", " margin-bottom: 0.5em;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-10 div.sk-estimator.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "/* on hover */\n", "#sk-container-id-10 div.sk-estimator:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-10 div.sk-estimator.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", "\n", "/* Common style for \"i\" and \"?\" */\n", "\n", ".sk-estimator-doc-link,\n", "a:link.sk-estimator-doc-link,\n", "a:visited.sk-estimator-doc-link {\n", " float: right;\n", " font-size: smaller;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1em;\n", " height: 1em;\n", " width: 1em;\n", " text-decoration: none !important;\n", " margin-left: 0.5em;\n", " text-align: center;\n", " /* unfitted */\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-unfitted-level-1);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted,\n", "a:link.sk-estimator-doc-link.fitted,\n", "a:visited.sk-estimator-doc-link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "/* Span, style for the box shown on hovering the info icon */\n", ".sk-estimator-doc-link span {\n", " display: none;\n", " z-index: 9999;\n", " position: relative;\n", " font-weight: normal;\n", " right: .2ex;\n", " padding: .5ex;\n", " margin: .5ex;\n", " width: min-content;\n", " min-width: 20ex;\n", " max-width: 50ex;\n", " color: var(--sklearn-color-text);\n", " box-shadow: 2pt 2pt 4pt #999;\n", " /* unfitted */\n", " background: var(--sklearn-color-unfitted-level-0);\n", " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted span {\n", " /* fitted */\n", " background: var(--sklearn-color-fitted-level-0);\n", " border: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link:hover span {\n", " display: block;\n", "}\n", "\n", "/* \"?\"-specific style due to the `` HTML tag */\n", "\n", "#sk-container-id-10 a.estimator_doc_link {\n", " float: right;\n", " font-size: 1rem;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1rem;\n", " height: 1rem;\n", " width: 1rem;\n", " text-decoration: none;\n", " /* unfitted */\n", " color: var(--sklearn-color-unfitted-level-1);\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", "}\n", "\n", "#sk-container-id-10 a.estimator_doc_link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "#sk-container-id-10 a.estimator_doc_link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "#sk-container-id-10 a.estimator_doc_link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".estimator-table summary {\n", " padding: .5rem;\n", " font-family: monospace;\n", " cursor: pointer;\n", "}\n", "\n", ".estimator-table details[open] {\n", " padding-left: 0.1rem;\n", " padding-right: 0.1rem;\n", " padding-bottom: 0.3rem;\n", "}\n", "\n", ".estimator-table .parameters-table {\n", " margin-left: auto !important;\n", " margin-right: auto !important;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(odd) {\n", " background-color: #fff;\n", "}\n", "\n", ".estimator-table .parameters-table tr:nth-child(even) {\n", " background-color: #f6f6f6;\n", "}\n", "\n", ".estimator-table .parameters-table tr:hover {\n", " background-color: #e0e0e0;\n", "}\n", "\n", ".estimator-table table td {\n", " border: 1px solid rgba(106, 105, 104, 0.232);\n", "}\n", "\n", ".user-set td {\n", " color:rgb(255, 94, 0);\n", " text-align: left;\n", "}\n", "\n", ".user-set td.value pre {\n", " color:rgb(255, 94, 0) !important;\n", " background-color: transparent !important;\n", "}\n", "\n", ".default td {\n", " color: black;\n", " text-align: left;\n", "}\n", "\n", ".user-set td i,\n", ".default td i {\n", " color: black;\n", "}\n", "\n", ".copy-paste-icon {\n", " background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n", " background-repeat: no-repeat;\n", " background-size: 14px 14px;\n", " background-position: 0;\n", " display: inline-block;\n", " width: 14px;\n", " height: 14px;\n", " cursor: pointer;\n", "}\n", "
Pipeline(steps=[('preprocess',\n",
       "                 ColumnTransformer(transformers=[('extend_features_as_polynomial',\n",
       "                                                  Pipeline(steps=[('extend_features',\n",
       "                                                                   PolynomialFeatures(include_bias=False)),\n",
       "                                                                  ('scale_to_standard',\n",
       "                                                                   StandardScaler())]),\n",
       "                                                  ('selling_price',\n",
       "                                                   'driven_kms')),\n",
       "                                                 ('extend_features_as_spline',\n",
       "                                                  SplineTransformer(include_bias=False,\n",
       "                                                                    knots='quantile',\n",
       "                                                                    n_knots=4),\n",
       "                                                  ('age',)),\n",
       "                                                 ('s...\n",
       "                                                  ('fuel_type', 'selling_type',\n",
       "                                                   'transmission'))])),\n",
       "                ('select_features',\n",
       "                 SequentialFeatureSelector(cv=4,\n",
       "                                           estimator=RandomForestRegressor(max_depth=8,\n",
       "                                                                           max_features='sqrt',\n",
       "                                                                           n_estimators=10),\n",
       "                                           floating=True, k_features=(4, 8),\n",
       "                                           scoring='neg_mean_absolute_percentage_error')),\n",
       "                ('regress',\n",
       "                 RandomForestRegressor(max_depth=10,\n",
       "                                       max_features=0.4752873867901817,\n",
       "                                       n_estimators=78))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('extend_features_as_polynomial',\n", " Pipeline(steps=[('extend_features',\n", " PolynomialFeatures(include_bias=False)),\n", " ('scale_to_standard',\n", " StandardScaler())]),\n", " ('selling_price',\n", " 'driven_kms')),\n", " ('extend_features_as_spline',\n", " SplineTransformer(include_bias=False,\n", " knots='quantile',\n", " n_knots=4),\n", " ('age',)),\n", " ('s...\n", " ('fuel_type', 'selling_type',\n", " 'transmission'))])),\n", " ('select_features',\n", " SequentialFeatureSelector(cv=4,\n", " estimator=RandomForestRegressor(max_depth=8,\n", " max_features='sqrt',\n", " n_estimators=10),\n", " floating=True, k_features=(4, 8),\n", " scoring='neg_mean_absolute_percentage_error')),\n", " ('regress',\n", " RandomForestRegressor(max_depth=10,\n", " max_features=0.4752873867901817,\n", " n_estimators=78))])" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = build_pipeline_optimized_best()\n", "pipeline" ] }, { "cell_type": "code", "execution_count": 84, "id": "02ed0ad8-4068-4007-97a1-ffad1a79839e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'preprocess__remainder': 'drop',\n", " 'preprocess__sparse_threshold': 0.3,\n", " 'preprocess__transformer_weights': None,\n", " 'preprocess__extend_features_as_spline': SplineTransformer(include_bias=False, knots='quantile', n_knots=4),\n", " 'preprocess__extend_features_as_polynomial__extend_features': PolynomialFeatures(include_bias=False),\n", " 'preprocess__extend_features_as_polynomial__extend_features__degree': 2,\n", " 'preprocess__extend_features_as_polynomial__extend_features__include_bias': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__interaction_only': False,\n", " 'preprocess__extend_features_as_polynomial__extend_features__order': 'C',\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_mean': True,\n", " 'preprocess__extend_features_as_polynomial__scale_to_standard__with_std': True,\n", " 'preprocess__extend_features_as_spline__degree': 3,\n", " 'preprocess__extend_features_as_spline__extrapolation': 'constant',\n", " 'preprocess__extend_features_as_spline__include_bias': False,\n", " 'preprocess__extend_features_as_spline__knots': 'quantile',\n", " 'preprocess__extend_features_as_spline__n_knots': 4,\n", " 'preprocess__extend_features_as_spline__order': 'C',\n", " 'preprocess__extend_features_as_spline__sparse_output': False,\n", " 'preprocess__scale_to_standard__with_mean': True,\n", " 'preprocess__scale_to_standard__with_std': True,\n", " 'select_features__cv': 4,\n", " 'select_features__feature_groups': None,\n", " 'select_features__fixed_features': None,\n", " 'select_features__floating': True,\n", " 'select_features__forward': True,\n", " 'select_features__k_features': (4, 8),\n", " 'select_features__scoring': 'neg_mean_absolute_percentage_error',\n", " 'regress__bootstrap': True,\n", " 'regress__ccp_alpha': 0.0,\n", " 'regress__criterion': 'squared_error',\n", " 'regress__max_depth': 10,\n", " 'regress__max_features': 0.4752873867901817,\n", " 'regress__max_leaf_nodes': None,\n", " 'regress__max_samples': None,\n", " 'regress__min_impurity_decrease': 0.0,\n", " 'regress__min_samples_leaf': 1,\n", " 'regress__min_samples_split': 2,\n", " 'regress__min_weight_fraction_leaf': 0.0,\n", " 'regress__monotonic_cst': None,\n", " 'regress__n_estimators': 78,\n", " 'regress__oob_score': False,\n", " 'regress__random_state': None}" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_params = filter_params(\n", " pipeline.get_params(),\n", " include={\n", " 'preprocess': (False, PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_INCLUDE.copy()),\n", " 'select_features': (False, FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE.copy()),\n", " 'regress': (False, True),\n", " },\n", " exclude={\n", " 'preprocess': PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_EXCLUDE.copy(),\n", " 'select_features': FEATURE_SELECTOR_PARAMS_COMMON_EXCLUDE,\n", " 'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE,\n", " },\n", ")\n", "model_params" ] }, { "cell_type": "code", "execution_count": 85, "id": "5c8b4d0c-f777-4c2a-8263-b2dda900a577", "metadata": {}, "outputs": [], "source": [ "_ = pipeline.fit(df_orig_features, df_target.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 86, "id": "e01a1fe1-0e58-418a-9f05-7f4d304da7e5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "877854c58cbf4e3c959298d0959eea39", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading artifacts: 0%| | 0/7 [00:00