From bb1796e081a92c1084db426fe53f78db4862d1e1 Mon Sep 17 00:00:00 2001
From: syropiatovvv <syrslava@yandex.ru>
Date: Sat, 1 Nov 2025 20:09:18 +0300
Subject: [PATCH] =?UTF-8?q?=D0=B2=20=D0=B1=D0=BB=D0=BE=D0=BA=D0=BD=D0=BE?=
 =?UTF-8?q?=D1=82=20research=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?=
 =?UTF-8?q?=D0=BD=D0=BE=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80=D0=BE=D0=B2=D0=B0?=
 =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20Python=20requirements=20=D0=B2=20MLFlow?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ... requirements-isolated-research-model.txt} |  0
 research/research.py                          | 22 ++++++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)
 rename requirements/{requirements-isolated-research-model.txt.unused => requirements-isolated-research-model.txt} (100%)

diff --git a/requirements/requirements-isolated-research-model.txt.unused b/requirements/requirements-isolated-research-model.txt
similarity index 100%
rename from requirements/requirements-isolated-research-model.txt.unused
rename to requirements/requirements-isolated-research-model.txt
diff --git a/research/research.py b/research/research.py
index 3439ef8..c39bb89 100644
--- a/research/research.py
+++ b/research/research.py
@@ -19,6 +19,9 @@
 # %% [markdown]
 # Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill.
 
+# %%
+#XXX: разделить блокнот штук на 5
+
 # %%
 from typing import Optional
 
@@ -211,7 +214,8 @@ tuple(map(len, (df_target_train, df_target_test)))
 # ## Модели
 
 # %%
-#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt'
+# XXX: один файл requirements для всех моделей
+MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt'
 
 # %% [markdown]
 # Сигнатура модели для MLFlow:
@@ -286,7 +290,7 @@ def mlflow_log_model(
     nested_run_name,
     model_signature=None,
     input_example=None,
-    #pip_requirements=None,
+    pip_requirements=None,
     comment_file_path=None,
 ):
     global mlflow_root_run_id
@@ -305,12 +309,14 @@ def mlflow_log_model(
             mlflow_root_run_id = root_run.info.run_id
         # важно одновременно использовать nested=True и parent_run_id=...:
         with mlflow.start_run(experiment_id=experiment_id, run_name=nested_run_name, nested=True, parent_run_id=mlflow_root_run_id):
+            if isinstance(pip_requirements, pathlib.PurePath):
+                pip_requirements = str(pip_requirements)
             _ = mlflow.sklearn.log_model(
                 model,
                 'model',
                 signature=model_signature,
                 input_example=input_example,
-                #pip_requirements=pip_requirements,
+                pip_requirements=pip_requirements,
             )
             if model_params is not None:
                 _ = mlflow.log_params(model_params)
@@ -403,7 +409,7 @@ mlflow_log_model(
     nested_run_name='Baseline model',
     model_signature=mlflow_model_signature,
     input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
-    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH),
+    pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
     comment_file_path=(
         model_comment_path
         if model_comment_path is not None
@@ -556,7 +562,7 @@ mlflow_log_model(
     nested_run_name='Model with engineered features',
     model_signature=mlflow_model_signature,
     input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
-    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH),
+    pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
     comment_file_path=(
         model_comment_path
         if model_comment_path is not None
@@ -676,7 +682,7 @@ mlflow_log_model(
     nested_run_name='Model with filtered engineered features',
     model_signature=mlflow_model_signature,
     input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
-    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH),
+    pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
     comment_file_path=(
         model_comment_path
         if model_comment_path is not None
@@ -802,7 +808,7 @@ mlflow_log_model(
     nested_run_name='Optimized model with filtered engineered features',
     model_signature=mlflow_model_signature,
     input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
-    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH),
+    pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
     comment_file_path=(
         model_comment_path
         if model_comment_path is not None
@@ -847,7 +853,7 @@ mlflow_log_model(
     nested_run_name='Final model',
     model_signature=mlflow_model_signature,
     input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
-    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH),
+    pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
     comment_file_path=(
         model_comment_path
         if model_comment_path is not None