в блокнот research добавить логирование списков выбранных признаков в MLFlow (через новый коллбек в mlflow_log_model), закомментировать логирование global_comment_file

lab_2/master
syropiatovvv 2 дней назад
Родитель bb1796e081
Сommit 59897fbe61
Подписано: syropiatovvv
Идентификатор GPG ключа: 297380B8143A31BD

@ -31,10 +31,10 @@ data_aug_pickle_path: Optional[str] = None
data_aug_pickle_relpath: str = 'cars.aug.pickle'
# Путь к файлу (pickle) для сохранения очищенного датасета относительно директории данных `data`. Игнорируется, если установлен data_aug_pickle_path.
model_comment_path: Optional[str] = None
# Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/<comment_relpath>`.
model_comment_relpath: str = 'comment.txt'
# Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.
#model_global_comment_path: Optional[str] = None
## Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/<comment_relpath>`.
#model_comment_relpath: str = 'comment.txt'
## Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.
mlflow_tracking_server_uri: str = 'http://localhost:5000'
# URL tracking-сервера MLFlow.
@ -51,7 +51,7 @@ mlflow_root_run_name: str = 'Models'
# Имя корневого прогона MLFlow (остальные прогоны будут созданы блокнотом внутри этого, как nested)
# %%
from collections.abc import Sequence
from collections.abc import Collection, Sequence
import os
import pathlib
import pickle
@ -291,7 +291,8 @@ def mlflow_log_model(
model_signature=None,
input_example=None,
pip_requirements=None,
comment_file_path=None,
#global_comment_file_path=None,
extra_logs_handler=None,
):
global mlflow_root_run_id
if not mlflow_do_log:
@ -322,8 +323,13 @@ def mlflow_log_model(
_ = mlflow.log_params(model_params)
if metrics is not None:
_ = mlflow.log_metrics(metrics)
if (comment_file_path is not None) and comment_file_path.exists():
mlflow.log_artifact(str(comment_file_path))
#if (global_comment_file_path is not None) and global_comment_file_path.exists():
# mlflow.log_artifact(str(global_comment_file_path))
if extra_logs_handler is not None:
if callable(extra_logs_handler) and (not isinstance(extra_logs_handler, Collection)):
extra_logs_handler = (extra_logs_handler,)
for extr_logs_handler_fn in extra_logs_handler:
extr_logs_handler_fn(mlflow)
# %% [markdown]
@ -410,11 +416,11 @@ mlflow_log_model(
model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=(
model_comment_path
if model_comment_path is not None
else (BASE_PATH / 'research' / model_comment_relpath)
),
#global_comment_file_path=(
# model_comment_path
# if model_comment_path is not None
# else (BASE_PATH / 'research' / model_comment_relpath)
#),
)
# %% [markdown]
@ -563,16 +569,48 @@ mlflow_log_model(
model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=(
model_comment_path
if model_comment_path is not None
else (BASE_PATH / 'research' / model_comment_relpath)
),
#global_comment_file_path=(
# model_comment_path
# if model_comment_path is not None
# else (BASE_PATH / 'research' / model_comment_relpath)
#),
)
# %% [markdown]
# ### Модель с дополнительными и отфильтрованными признаками
# %%
def build_selected_columns_info_for_mlflow(names=None, indices=None):
info = {}
if names is not None:
info['names'] = names
if indices is not None:
info['indices'] = indices
return info
def build_extra_logs_handler_selected_columns(names=None, indices=None):
def extra_log(mlf):
if any((v is not None) for v in (names, indices)):
info = build_selected_columns_info_for_mlflow(names=names, indices=indices)
mlf.log_dict(info, 'selected_columns_info.json')
return extra_log
# %%
def build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector, *, take_names=True, take_indices=True):
return build_selected_columns_info_for_mlflow(
names=(feature_selector.k_feature_names_ if take_names else None),
indices=(tuple(feature_selector.k_feature_idx_) if take_indices else None),
)
def build_extra_logs_handler_selected_columns_from_sequential_feature_selector(feature_selector):
def extra_log(mlf):
info = build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector)
mlf.log_dict(info, 'selected_columns_info.json')
return extra_log
# %%
regressor = build_regressor_baseline(random_state=0x8EDD)
regressor
@ -610,10 +648,10 @@ feature_selector
_ = feature_selector.fit(df_augd_features_train, df_target_train.iloc[:, 0])
# %% [markdown]
# Имена выбранных признаков:
# Выбранные признаки (имена и индексы):
# %%
feature_selector.k_feature_names_
build_selected_columns_info_for_mlflow_from_sequential_feature_selector(feature_selector)
# %% [markdown]
# MAPE в зависимости от количества выбранных признаков (указан регион выбора, ограниченный `FILTERED_FEATURES_NUM`):
@ -683,11 +721,12 @@ mlflow_log_model(
model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=(
model_comment_path
if model_comment_path is not None
else (BASE_PATH / 'research' / model_comment_relpath)
),
#global_comment_file_path=(
# model_comment_path
# if model_comment_path is not None
# else (BASE_PATH / 'research' / model_comment_relpath)
#),
extra_logs_handler=(build_extra_logs_handler_selected_columns_from_sequential_feature_selector(pipeline.named_steps['select_features']),),
)
@ -809,11 +848,12 @@ mlflow_log_model(
model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=(
model_comment_path
if model_comment_path is not None
else (BASE_PATH / 'research' / model_comment_relpath)
),
#global_comment_file_path=(
# model_comment_path
# if model_comment_path is not None
# else (BASE_PATH / 'research' / model_comment_relpath)
#),
extra_logs_handler=(build_extra_logs_handler_selected_columns_from_sequential_feature_selector(pipeline.named_steps['select_features']),),
)
# %% [markdown]
@ -854,11 +894,12 @@ mlflow_log_model(
model_signature=mlflow_model_signature,
input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE),
pip_requirements=MODEL_PIP_REQUIREMENTS_PATH,
comment_file_path=(
model_comment_path
if model_comment_path is not None
else (BASE_PATH / 'research' / model_comment_relpath)
),
#global_comment_file_path=(
# model_comment_path
# if model_comment_path is not None
# else (BASE_PATH / 'research' / model_comment_relpath)
#),
extra_logs_handler=(build_extra_logs_handler_selected_columns_from_sequential_feature_selector(pipeline.named_steps['select_features']),),
)
# %%

Загрузка…
Отмена
Сохранить