@ -19,6 +19,9 @@
# %% [markdown]
# %% [markdown]
# Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill.
# Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill.
# %%
#XXX: разделить блокнот штук на 5
# %%
# %%
from typing import Optional
from typing import Optional
@ -211,7 +214,8 @@ tuple(map(len, (df_target_train, df_target_test)))
# ## Модели
# ## Модели
# %%
# %%
#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt'
# XXX: один файл requirements для всех моделей
MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / ' requirements ' / ' requirements-isolated-research-model.txt '
# %% [markdown]
# %% [markdown]
# Сигнатура модели для MLFlow:
# Сигнатура модели для MLFlow:
@ -286,7 +290,7 @@ def mlflow_log_model(
nested_run_name ,
nested_run_name ,
model_signature = None ,
model_signature = None ,
input_example = None ,
input_example = None ,
#pip_requirements=None ,
pip_requirements = None ,
comment_file_path = None ,
comment_file_path = None ,
) :
) :
global mlflow_root_run_id
global mlflow_root_run_id
@ -305,12 +309,14 @@ def mlflow_log_model(
mlflow_root_run_id = root_run . info . run_id
mlflow_root_run_id = root_run . info . run_id
# важно одновременно использовать nested=True и parent_run_id=...:
# важно одновременно использовать nested=True и parent_run_id=...:
with mlflow . start_run ( experiment_id = experiment_id , run_name = nested_run_name , nested = True , parent_run_id = mlflow_root_run_id ) :
with mlflow . start_run ( experiment_id = experiment_id , run_name = nested_run_name , nested = True , parent_run_id = mlflow_root_run_id ) :
if isinstance ( pip_requirements , pathlib . PurePath ) :
pip_requirements = str ( pip_requirements )
_ = mlflow . sklearn . log_model (
_ = mlflow . sklearn . log_model (
model ,
model ,
' model ' ,
' model ' ,
signature = model_signature ,
signature = model_signature ,
input_example = input_example ,
input_example = input_example ,
#pip_requirements=pip_requirements ,
pip_requirements = pip_requirements ,
)
)
if model_params is not None :
if model_params is not None :
_ = mlflow . log_params ( model_params )
_ = mlflow . log_params ( model_params )
@ -403,7 +409,7 @@ mlflow_log_model(
nested_run_name = ' Baseline model ' ,
nested_run_name = ' Baseline model ' ,
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
comment_file_path = (
model_comment_path
model_comment_path
if model_comment_path is not None
if model_comment_path is not None
@ -556,7 +562,7 @@ mlflow_log_model(
nested_run_name = ' Model with engineered features ' ,
nested_run_name = ' Model with engineered features ' ,
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
comment_file_path = (
model_comment_path
model_comment_path
if model_comment_path is not None
if model_comment_path is not None
@ -676,7 +682,7 @@ mlflow_log_model(
nested_run_name = ' Model with filtered engineered features ' ,
nested_run_name = ' Model with filtered engineered features ' ,
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
comment_file_path = (
model_comment_path
model_comment_path
if model_comment_path is not None
if model_comment_path is not None
@ -802,7 +808,7 @@ mlflow_log_model(
nested_run_name = ' Optimized model with filtered engineered features ' ,
nested_run_name = ' Optimized model with filtered engineered features ' ,
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
comment_file_path = (
model_comment_path
model_comment_path
if model_comment_path is not None
if model_comment_path is not None
@ -847,7 +853,7 @@ mlflow_log_model(
nested_run_name = ' Final model ' ,
nested_run_name = ' Final model ' ,
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
comment_file_path = (
model_comment_path
model_comment_path
if model_comment_path is not None
if model_comment_path is not None