|
|
|
|
@ -51,6 +51,7 @@ mlflow_run_name: str = 'Baseline model'
|
|
|
|
|
import os
|
|
|
|
|
import pathlib
|
|
|
|
|
import pickle
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
import mlflow
|
|
|
|
|
@ -66,6 +67,17 @@ import sklearn.preprocessing
|
|
|
|
|
# %%
|
|
|
|
|
BASE_PATH = pathlib.Path('..')
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
CODE_PATH = BASE_PATH
|
|
|
|
|
sys.path.insert(0, str(CODE_PATH.resolve()))
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
from iis_project.sklearn_utils import filter_params
|
|
|
|
|
from iis_project.sklearn_utils.compose import COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE
|
|
|
|
|
from iis_project.sklearn_utils.ensemble import RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE
|
|
|
|
|
from iis_project.sklearn_utils.pipeline import PIPELINE_PARAMS_COMMON_INCLUDE
|
|
|
|
|
from iis_project.sklearn_utils.preprocessing import STANDARD_SCALER_PARAMS_COMMON_EXCLUDE
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
MODEL_INOUT_EXAMPLE_SIZE = 0x10
|
|
|
|
|
|
|
|
|
|
@ -196,6 +208,22 @@ tuple(map(len, (df_target_train, df_target_test)))
|
|
|
|
|
mlflow_model_signature = mlflow.models.infer_signature(model_input=df_orig_features, model_output=df_target)
|
|
|
|
|
mlflow_model_signature
|
|
|
|
|
|
|
|
|
|
# %% [raw] vscode={"languageId": "raw"}
|
|
|
|
|
# input_schema = mlflow.types.schema.Schema([
|
|
|
|
|
# mlflow.types.schema.ColSpec("double", "selling_price"),
|
|
|
|
|
# mlflow.types.schema.ColSpec("double", "driven_kms"),
|
|
|
|
|
# mlflow.types.schema.ColSpec("string", "fuel_type"),
|
|
|
|
|
# mlflow.types.schema.ColSpec("string", "selling_type"),
|
|
|
|
|
# mlflow.types.schema.ColSpec("string", "transmission"),
|
|
|
|
|
# mlflow.types.schema.ColSpec("double", "age"),
|
|
|
|
|
# ])
|
|
|
|
|
#
|
|
|
|
|
# output_schema = mlflow.types.schema.Schema([
|
|
|
|
|
# mlflow.types.schema.ColSpec("double", "present_price"),
|
|
|
|
|
# ])
|
|
|
|
|
#
|
|
|
|
|
# mlflow_model_signature = mlflow.models.ModelSignature(inputs=input_schema, outputs=output_schema)
|
|
|
|
|
|
|
|
|
|
# %% [markdown]
|
|
|
|
|
# Пайплайн предобработки признаков:
|
|
|
|
|
|
|
|
|
|
@ -235,7 +263,25 @@ pipeline = sklearn.pipeline.Pipeline([
|
|
|
|
|
pipeline
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
model_params = pipeline.get_params()
|
|
|
|
|
model_params = filter_params(
|
|
|
|
|
pipeline.get_params(),
|
|
|
|
|
include={
|
|
|
|
|
**{k: True for k in PIPELINE_PARAMS_COMMON_INCLUDE},
|
|
|
|
|
'preprocess': (
|
|
|
|
|
False,
|
|
|
|
|
{
|
|
|
|
|
**{k: True for k in COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE},
|
|
|
|
|
'scale_to_standard': True,
|
|
|
|
|
'encode_categorical_wrt_target': True,
|
|
|
|
|
},
|
|
|
|
|
),
|
|
|
|
|
'regress': (False, True),
|
|
|
|
|
},
|
|
|
|
|
exclude={
|
|
|
|
|
'preprocess': {'scale_to_standard': STANDARD_SCALER_PARAMS_COMMON_EXCLUDE},
|
|
|
|
|
'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
model_params
|
|
|
|
|
|
|
|
|
|
# %% [markdown]
|
|
|
|
|
|