в блокнот research добавлено логирование Python requirements в MLFlow

lab_2/master
syropiatovvv 2 дней назад
Родитель d39d8f98d6
Сommit bb1796e081
Подписано: syropiatovvv
Идентификатор GPG ключа: 297380B8143A31BD

@ -19,6 +19,9 @@
# %% [markdown] # %% [markdown]
# Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill. # Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill.
# %%
#XXX: разделить блокнот штук на 5
# %% # %%
from typing import Optional from typing import Optional
@ -211,7 +214,8 @@ tuple(map(len, (df_target_train, df_target_test)))
# ## Модели # ## Модели
# %% # %%
#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt' # XXX: один файл requirements для всех моделей
MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt'
# %% [markdown] # %% [markdown]
# Сигнатура модели для MLFlow: # Сигнатура модели для MLFlow:
@ -286,7 +290,7 @@ def mlflow_log_model(
nested_run_name, nested_run_name,
model_signature=None, model_signature=None,
input_example=None, input_example=None,
#pip_requirements=None, pip_requirements=None,
comment_file_path=None, comment_file_path=None,
): ):
global mlflow_root_run_id global mlflow_root_run_id
@ -305,12 +309,14 @@ def mlflow_log_model(
mlflow_root_run_id = root_run.info.run_id mlflow_root_run_id = root_run.info.run_id
# важно одновременно использовать nested=True и parent_run_id=...: # важно одновременно использовать nested=True и parent_run_id=...:
with mlflow.start_run(experiment_id=experiment_id, run_name=nested_run_name, nested=True, parent_run_id=mlflow_root_run_id): with mlflow.start_run(experiment_id=experiment_id, run_name=nested_run_name, nested=True, parent_run_id=mlflow_root_run_id):
if isinstance(pip_requirements, pathlib.PurePath):
pip_requirements = str(pip_requirements)
_ = mlflow.sklearn.log_model( _ = mlflow.sklearn.log_model(
model, model,
'model', 'model',
signature=model_signature, signature=model_signature,
input_example=input_example, input_example=input_example,
#pip_requirements=pip_requirements, pip_requirements=pip_requirements,
) )
if model_params is not None: if model_params is not None:
_ = mlflow.log_params(model_params) _ = mlflow.log_params(model_params)
@ -403,7 +409,7 @@ mlflow_log_model(
nested_run_name='Baseline model', nested_run_name='Baseline model',
model_signature=mlflow_model_signature, model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=( comment_file_path=(
model_comment_path model_comment_path
if model_comment_path is not None if model_comment_path is not None
@ -556,7 +562,7 @@ mlflow_log_model(
nested_run_name='Model with engineered features', nested_run_name='Model with engineered features',
model_signature=mlflow_model_signature, model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=( comment_file_path=(
model_comment_path model_comment_path
if model_comment_path is not None if model_comment_path is not None
@ -676,7 +682,7 @@ mlflow_log_model(
nested_run_name='Model with filtered engineered features', nested_run_name='Model with filtered engineered features',
model_signature=mlflow_model_signature, model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=( comment_file_path=(
model_comment_path model_comment_path
if model_comment_path is not None if model_comment_path is not None
@ -802,7 +808,7 @@ mlflow_log_model(
nested_run_name='Optimized model with filtered engineered features', nested_run_name='Optimized model with filtered engineered features',
model_signature=mlflow_model_signature, model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=( comment_file_path=(
model_comment_path model_comment_path
if model_comment_path is not None if model_comment_path is not None
@ -847,7 +853,7 @@ mlflow_log_model(
nested_run_name='Final model', nested_run_name='Final model',
model_signature=mlflow_model_signature, model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
#pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=( comment_file_path=(
model_comment_path model_comment_path
if model_comment_path is not None if model_comment_path is not None

Загрузка…
Отмена
Сохранить