@ -31,10 +31,10 @@ data_aug_pickle_path: Optional[str] = None
data_aug_pickle_relpath : str = ' cars.aug.pickle '
data_aug_pickle_relpath : str = ' cars.aug.pickle '
# Путь к файлу (pickle) для сохранения очищенного датасета относительно директории данных `data`. Игнорируется, если установлен data_aug_pickle_path.
# Путь к файлу (pickle) для сохранения очищенного датасета относительно директории данных `data`. Игнорируется, если установлен data_aug_pickle_path.
model_comment_path : Optional [ str ] = None
#model_global_comment_path: Optional[str] = None
# Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/<comment_relpath>`.
# # Полный путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью. Если не установлен, используется `research/<comment_relpath>`.
model_comment_relpath : str = ' comment.txt '
#model_comment_relpath: str = 'comment.txt '
# Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.
# # Путь к текстовому файлу с произвольным комментарием для сохранения в MLFlow как артефакт вместе с моделью относительно директории `research`. Игнорируется, если установлен comment_path.
mlflow_tracking_server_uri : str = ' http://localhost:5000 '
mlflow_tracking_server_uri : str = ' http://localhost:5000 '
# URL tracking-сервера MLFlow.
# URL tracking-сервера MLFlow.
@ -51,7 +51,7 @@ mlflow_root_run_name: str = 'Models'
# Имя корневого прогона MLFlow (остальные прогоны будут созданы блокнотом внутри этого, как nested)
# Имя корневого прогона MLFlow (остальные прогоны будут созданы блокнотом внутри этого, как nested)
# %%
# %%
from collections . abc import Sequence
from collections . abc import Collection, Sequence
import os
import os
import pathlib
import pathlib
import pickle
import pickle
@ -291,7 +291,8 @@ def mlflow_log_model(
model_signature = None ,
model_signature = None ,
input_example = None ,
input_example = None ,
pip_requirements = None ,
pip_requirements = None ,
comment_file_path = None ,
#global_comment_file_path=None,
extra_logs_handler = None ,
) :
) :
global mlflow_root_run_id
global mlflow_root_run_id
if not mlflow_do_log :
if not mlflow_do_log :
@ -322,8 +323,13 @@ def mlflow_log_model(
_ = mlflow . log_params ( model_params )
_ = mlflow . log_params ( model_params )
if metrics is not None :
if metrics is not None :
_ = mlflow . log_metrics ( metrics )
_ = mlflow . log_metrics ( metrics )
if ( comment_file_path is not None ) and comment_file_path . exists ( ) :
#if (global_comment_file_path is not None) and global_comment_file_path.exists():
mlflow . log_artifact ( str ( comment_file_path ) )
# mlflow.log_artifact(str(global_comment_file_path))
if extra_logs_handler is not None :
if callable ( extra_logs_handler ) and ( not isinstance ( extra_logs_handler , Collection ) ) :
extra_logs_handler = ( extra_logs_handler , )
for extr_logs_handler_fn in extra_logs_handler :
extr_logs_handler_fn ( mlflow )
# %% [markdown]
# %% [markdown]
@ -410,11 +416,11 @@ mlflow_log_model(
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
#global_comment_file_path= (
model_comment_path
# model_comment_path
if model_comment_path is not None
# if model_comment_path is not None
else ( BASE_PATH / ' research ' / model_comment_relpath )
# else (BASE_PATH / 'research' / model_comment_relpath )
) ,
#) ,
)
)
# %% [markdown]
# %% [markdown]
@ -563,16 +569,48 @@ mlflow_log_model(
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
#global_comment_file_path= (
model_comment_path
# model_comment_path
if model_comment_path is not None
# if model_comment_path is not None
else ( BASE_PATH / ' research ' / model_comment_relpath )
# else (BASE_PATH / 'research' / model_comment_relpath)
) ,
#) ,
)
)
# %% [markdown]
# %% [markdown]
# ### Модель с дополнительными и отфильтрованными признаками
# ### Модель с дополнительными и отфильтрованными признаками
# %%
def build_selected_columns_info_for_mlflow ( names = None , indices = None ) :
info = { }
if names is not None :
info [ ' names ' ] = names
if indices is not None :
info [ ' indices ' ] = indices
return info
def build_extra_logs_handler_selected_columns ( names = None , indices = None ) :
def extra_log ( mlf ) :
if any ( ( v is not None ) for v in ( names , indices ) ) :
info = build_selected_columns_info_for_mlflow ( names = names , indices = indices )
mlf . log_dict ( info , ' selected_columns_info.json ' )
return extra_log
# %%
def build_selected_columns_info_for_mlflow_from_sequential_feature_selector ( feature_selector , * , take_names = True , take_indices = True ) :
return build_selected_columns_info_for_mlflow (
names = ( feature_selector . k_feature_names_ if take_names else None ) ,
indices = ( tuple ( feature_selector . k_feature_idx_ ) if take_indices else None ) ,
)
def build_extra_logs_handler_selected_columns_from_sequential_feature_selector ( feature_selector ) :
def extra_log ( mlf ) :
info = build_selected_columns_info_for_mlflow_from_sequential_feature_selector ( feature_selector )
mlf . log_dict ( info , ' selected_columns_info.json ' )
return extra_log
# %%
# %%
regressor = build_regressor_baseline ( random_state = 0x8EDD )
regressor = build_regressor_baseline ( random_state = 0x8EDD )
regressor
regressor
@ -610,10 +648,10 @@ feature_selector
_ = feature_selector . fit ( df_augd_features_train , df_target_train . iloc [ : , 0 ] )
_ = feature_selector . fit ( df_augd_features_train , df_target_train . iloc [ : , 0 ] )
# %% [markdown]
# %% [markdown]
# Имена выбранных признаков :
# Выбранные признаки (имена и индексы) :
# %%
# %%
feature_selector. k_feature_names_
build_selected_columns_info_for_mlflow_from_sequential_feature_selector( feature_selector )
# %% [markdown]
# %% [markdown]
# MAPE в зависимости от количества выбранных признаков (указан регион выбора, ограниченный `FILTERED_FEATURES_NUM`):
# MAPE в зависимости от количества выбранных признаков (указан регион выбора, ограниченный `FILTERED_FEATURES_NUM`):
@ -683,11 +721,12 @@ mlflow_log_model(
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
#global_comment_file_path=(
model_comment_path
# model_comment_path
if model_comment_path is not None
# if model_comment_path is not None
else ( BASE_PATH / ' research ' / model_comment_relpath )
# else (BASE_PATH / 'research' / model_comment_relpath)
) ,
#),
extra_logs_handler = ( build_extra_logs_handler_selected_columns_from_sequential_feature_selector ( pipeline . named_steps [ ' select_features ' ] ) , ) ,
)
)
@ -809,11 +848,12 @@ mlflow_log_model(
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
#global_comment_file_path=(
model_comment_path
# model_comment_path
if model_comment_path is not None
# if model_comment_path is not None
else ( BASE_PATH / ' research ' / model_comment_relpath )
# else (BASE_PATH / 'research' / model_comment_relpath)
) ,
#),
extra_logs_handler = ( build_extra_logs_handler_selected_columns_from_sequential_feature_selector ( pipeline . named_steps [ ' select_features ' ] ) , ) ,
)
)
# %% [markdown]
# %% [markdown]
@ -854,11 +894,12 @@ mlflow_log_model(
model_signature = mlflow_model_signature ,
model_signature = mlflow_model_signature ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
pip_requirements = MODEL_PIP_REQUIREMENTS_PATH ,
comment_file_path = (
#global_comment_file_path=(
model_comment_path
# model_comment_path
if model_comment_path is not None
# if model_comment_path is not None
else ( BASE_PATH / ' research ' / model_comment_relpath )
# else (BASE_PATH / 'research' / model_comment_relpath)
) ,
#),
extra_logs_handler = ( build_extra_logs_handler_selected_columns_from_sequential_feature_selector ( pipeline . named_steps [ ' select_features ' ] ) , ) ,
)
)
# %%
# %%