в блокнот research добавить логирование списков выбранных признаков в MLFlow (через новый коллбек в mlflow_log_model), закомментировать логирование global_comment_file
Этот коммит содержится в:
@@ -31,10 +31,10 @@ data_aug_pickle_path: Optional[str] = None
|
||||
data_aug_pickle_relpath: str = 'cars.aug.pickle'
|
||||
# Путь к файлу (pickle) для сохранения очищенного датасета относительно директории данных `data`. Игнорируется, если установлен data_aug_pickle_path.
|
||||
|
||||
model_comment_path: Optional[str] = None
|
||||
# Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/<comment_relpath>`.
|
||||
model_comment_relpath: str = 'comment.txt'
|
||||
# Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.
|
||||
#model_global_comment_path: Optional[str] = None
|
||||
## Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/<comment_relpath>`.
|
||||
#model_comment_relpath: str = 'comment.txt'
|
||||
## Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.
|
||||
|
||||
mlflow_tracking_server_uri: str = 'http://localhost:5000'
|
||||
# URL tracking-сервера MLFlow.
|
||||
@@ -51,7 +51,7 @@ mlflow_root_run_name: str = 'Models'
|
||||
# Имя корневого прогона MLFlow (остальные прогоны будут созданы блокнотом внутри этого, как nested)
|
||||
|
||||
# %%
|
||||
from collections.abc import Sequence
|
||||
from collections.abc import Collection, Sequence
|
||||
import os
|
||||
import pathlib
|
||||
import pickle
|
||||
@@ -291,7 +291,8 @@ def mlflow_log_model(
|
||||
model_signature=None,
|
||||
input_example=None,
|
||||
pip_requirements=None,
|
||||
comment_file_path=None,
|
||||
#global_comment_file_path=None,
|
||||
extra_logs_handler=None,
|
||||
):
|
||||
global mlflow_root_run_id
|
||||
if not mlflow_do_log:
|
||||
@@ -322,8 +323,13 @@ def mlflow_log_model(
|
||||
_ = mlflow.log_params(model_params)
|
||||
if metrics is not None:
|
||||
_ = mlflow.log_metrics(metrics)
|
||||
if (comment_file_path is not None) and comment_file_path.exists():
|
||||
mlflow.log_artifact(str(comment_file_path))
|
||||
#if (global_comment_file_path is not None) and global_comment_file_path.exists():
|
||||
# mlflow.log_artifact(str(global_comment_file_path))
|
||||
if extra_logs_handler is not None:
|
||||
if callable(extra_logs_handler) and (not isinstance(extra_logs_handler, Collection)):
|
||||
extra_logs_handler = (extra_logs_handler,)
|
||||
for extr_logs_handler_fn in extra_logs_handler:
|
||||
extr_logs_handler_fn(mlflow)
|
||||
|
||||
|
||||
# %% [markdown]
|
||||
@@ -410,11 +416,11 @@ mlflow_log_model(
|
||||
model_signature=mlflow_model_signature,
|
||||
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
|
||||
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
|
||||
comment_file_path=(
|
||||
model_comment_path
|
||||
if model_comment_path is not None
|
||||
else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
),
|
||||
#global_comment_file_path=(
|
||||
# model_comment_path
|
||||
# if model_comment_path is not None
|
||||
# else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
#),
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
@@ -563,16 +569,48 @@ mlflow_log_model(
|
||||
model_signature=mlflow_model_signature,
|
||||
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
|
||||
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
|
||||
comment_file_path=(
|
||||
model_comment_path
|
||||
if model_comment_path is not None
|
||||
else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
),
|
||||
#global_comment_file_path=(
|
||||
# model_comment_path
|
||||
# if model_comment_path is not None
|
||||
# else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
#),
|
||||
)
|
||||
|
||||
|
||||
# %% [markdown]
|
||||
# ### Модель с дополнительными и отфильтрованными признаками
|
||||
|
||||
# %%
|
||||
def build_selected_columns_info_for_mlflow(names=None, indices=None):
|
||||
info = {}
|
||||
if names is not None:
|
||||
info['names'] = names
|
||||
if indices is not None:
|
||||
info['indices'] = indices
|
||||
return info
|
||||
|
||||
def build_extra_logs_handler_selected_columns(names=None, indices=None):
|
||||
def extra_log(mlf):
|
||||
if any((v is not None) for v in (names, indices)):
|
||||
info = build_selected_columns_info_for_mlflow(names=names, indices=indices)
|
||||
mlf.log_dict(info, 'selected_columns_info.json')
|
||||
return extra_log
|
||||
|
||||
|
||||
# %%
|
||||
def build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector, *, take_names=True, take_indices=True):
|
||||
return build_selected_columns_info_for_mlflow(
|
||||
names=(feature_selector.k_feature_names_ if take_names else None),
|
||||
indices=(tuple(feature_selector.k_feature_idx_) if take_indices else None),
|
||||
)
|
||||
|
||||
def build_extra_logs_handler_selected_columns_from_sequential_feature_selector(feature_selector):
|
||||
def extra_log(mlf):
|
||||
info = build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector)
|
||||
mlf.log_dict(info, 'selected_columns_info.json')
|
||||
return extra_log
|
||||
|
||||
|
||||
# %%
|
||||
regressor = build_regressor_baseline(random_state=0x8EDD)
|
||||
regressor
|
||||
@@ -610,10 +648,10 @@ feature_selector
|
||||
_ = feature_selector.fit(df_augd_features_train, df_target_train.iloc[:, 0])
|
||||
|
||||
# %% [markdown]
|
||||
# Имена выбранных признаков:
|
||||
# Выбранные признаки (имена и индексы):
|
||||
|
||||
# %%
|
||||
feature_selector.k_feature_names_
|
||||
build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector)
|
||||
|
||||
# %% [markdown]
|
||||
# MAPE в зависимости от количества выбранных признаков (указан регион выбора, ограниченный `FILTERED_FEATURES_NUM`):
|
||||
@@ -683,11 +721,12 @@ mlflow_log_model(
|
||||
model_signature=mlflow_model_signature,
|
||||
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
|
||||
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
|
||||
comment_file_path=(
|
||||
model_comment_path
|
||||
if model_comment_path is not None
|
||||
else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
),
|
||||
#global_comment_file_path=(
|
||||
# model_comment_path
|
||||
# if model_comment_path is not None
|
||||
# else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
#),
|
||||
extra_logs_handler=(build_extra_logs_handler_selected_columns_from_sequential_feature_selector(pipeline.named_steps['select_features']),),
|
||||
)
|
||||
|
||||
|
||||
@@ -809,11 +848,12 @@ mlflow_log_model(
|
||||
model_signature=mlflow_model_signature,
|
||||
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
|
||||
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
|
||||
comment_file_path=(
|
||||
model_comment_path
|
||||
if model_comment_path is not None
|
||||
else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
),
|
||||
#global_comment_file_path=(
|
||||
# model_comment_path
|
||||
# if model_comment_path is not None
|
||||
# else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
#),
|
||||
extra_logs_handler=(build_extra_logs_handler_selected_columns_from_sequential_feature_selector(pipeline.named_steps['select_features']),),
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
@@ -854,11 +894,12 @@ mlflow_log_model(
|
||||
model_signature=mlflow_model_signature,
|
||||
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
|
||||
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
|
||||
comment_file_path=(
|
||||
model_comment_path
|
||||
if model_comment_path is not None
|
||||
else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
),
|
||||
#global_comment_file_path=(
|
||||
# model_comment_path
|
||||
# if model_comment_path is not None
|
||||
# else (BASE_PATH / 'research' / model_comment_relpath)
|
||||
#),
|
||||
extra_logs_handler=(build_extra_logs_handler_selected_columns_from_sequential_feature_selector(pipeline.named_steps['select_features']),),
|
||||
)
|
||||
|
||||
# %%
|
||||
|
||||
Ссылка в новой задаче
Block a user