From bb1796e081a92c1084db426fe53f78db4862d1e1 Mon Sep 17 00:00:00 2001 From: syropiatovvv Date: Sat, 1 Nov 2025 20:09:18 +0300 Subject: [PATCH] =?UTF-8?q?=D0=B2=20=D0=B1=D0=BB=D0=BE=D0=BA=D0=BD=D0=BE?= =?UTF-8?q?=D1=82=20research=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=BE=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20Python=20requirements=20=D0=B2=20MLFlow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ... requirements-isolated-research-model.txt} | 0 research/research.py | 22 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) rename requirements/{requirements-isolated-research-model.txt.unused => requirements-isolated-research-model.txt} (100%) diff --git a/requirements/requirements-isolated-research-model.txt.unused b/requirements/requirements-isolated-research-model.txt similarity index 100% rename from requirements/requirements-isolated-research-model.txt.unused rename to requirements/requirements-isolated-research-model.txt diff --git a/research/research.py b/research/research.py index 3439ef8..c39bb89 100644 --- a/research/research.py +++ b/research/research.py @@ -19,6 +19,9 @@ # %% [markdown] # Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill. +# %% +#XXX: разделить блокнот штук на 5 + # %% from typing import Optional @@ -211,7 +214,8 @@ tuple(map(len, (df_target_train, df_target_test))) # ## Модели # %% -#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt' +# XXX: один файл requirements для всех моделей +MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt' # %% [markdown] # Сигнатура модели для MLFlow: @@ -286,7 +290,7 @@ def mlflow_log_model( nested_run_name, model_signature=None, input_example=None, - #pip_requirements=None, + pip_requirements=None, comment_file_path=None, ): global mlflow_root_run_id @@ -305,12 +309,14 @@ def mlflow_log_model( mlflow_root_run_id = root_run.info.run_id # важно одновременно использовать nested=True и parent_run_id=...: with mlflow.start_run(experiment_id=experiment_id, run_name=nested_run_name, nested=True, parent_run_id=mlflow_root_run_id): + if isinstance(pip_requirements, pathlib.PurePath): + pip_requirements = str(pip_requirements) _ = mlflow.sklearn.log_model( model, 'model', signature=model_signature, input_example=input_example, - #pip_requirements=pip_requirements, + pip_requirements=pip_requirements, ) if model_params is not None: _ = mlflow.log_params(model_params) @@ -403,7 +409,7 @@ mlflow_log_model( nested_run_name='Baseline model', model_signature=mlflow_model_signature, input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), - #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), + pip_requirements=MODEL_PIP_REQUIREMENTS_PATH, comment_file_path=( model_comment_path if model_comment_path is not None @@ -556,7 +562,7 @@ mlflow_log_model( nested_run_name='Model with engineered features', model_signature=mlflow_model_signature, input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), - #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), + pip_requirements=MODEL_PIP_REQUIREMENTS_PATH, comment_file_path=( model_comment_path if model_comment_path is not None @@ -676,7 +682,7 @@ mlflow_log_model( nested_run_name='Model with filtered engineered features', model_signature=mlflow_model_signature, input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), - #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), + pip_requirements=MODEL_PIP_REQUIREMENTS_PATH, comment_file_path=( model_comment_path if model_comment_path is not None @@ -802,7 +808,7 @@ mlflow_log_model( nested_run_name='Optimized model with filtered engineered features', model_signature=mlflow_model_signature, input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), - #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), + pip_requirements=MODEL_PIP_REQUIREMENTS_PATH, comment_file_path=( model_comment_path if model_comment_path is not None @@ -847,7 +853,7 @@ mlflow_log_model( nested_run_name='Final model', model_signature=mlflow_model_signature, input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), - #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), + pip_requirements=MODEL_PIP_REQUIREMENTS_PATH, comment_file_path=( model_comment_path if model_comment_path is not None