diff --git a/research/research.py b/research/research.py index c6c29b0..3439ef8 100644 --- a/research/research.py +++ b/research/research.py @@ -742,15 +742,21 @@ repr(optuna_study.best_params) # %% regressor_best_params = dict(optuna_study.best_params.items()) + # %% [markdown] # Составной пайплайн: # %% -pipeline = build_pipeline( - regressor_best_params['n_estimators'], - regressor_max_depth=regressor_best_params['max_depth'], - regressor_max_features=regressor_best_params['max_features'], -) +def build_pipeline_optimized_best(): + return build_pipeline( + regressor_best_params['n_estimators'], + regressor_max_depth=regressor_best_params['max_depth'], + regressor_max_features=regressor_best_params['max_features'], + ) + + +# %% +pipeline = build_pipeline_optimized_best() pipeline # %% @@ -804,4 +810,49 @@ mlflow_log_model( ), ) +# %% [markdown] +# ### И в продакшн + +# %% [markdown] +# Лучшая выбранная модель — с автоматически подобранными гиперпараметрами. + +# %% +pipeline = build_pipeline_optimized_best() +pipeline + +# %% +model_params = filter_params( + pipeline.get_params(), + include={ + 'preprocess': (False, PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_INCLUDE.copy()), + 'select_features': (False, FEATURE_SELECTOR_PARAMS_COMMON_INCLUDE.copy()), + 'regress': (False, True), + }, + exclude={ + 'preprocess': PREPROCESS_AUGMENTING_TRANSFORMER_PARAMS_COMMON_EXCLUDE.copy(), + 'select_features': FEATURE_SELECTOR_PARAMS_COMMON_EXCLUDE, + 'regress': RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE, + }, +) +model_params + +# %% +_ = pipeline.fit(df_orig_features, df_target.iloc[:, 0]) + +# %% +mlflow_log_model( + pipeline, + model_params=model_params, + metrics=None, + nested_run_name='Final model', + model_signature=mlflow_model_signature, + input_example=df_orig_features.head(MODEL_INOUT_EXAMPLE_SIZE), + #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), + comment_file_path=( + model_comment_path + if model_comment_path is not None + else (BASE_PATH / 'research' / model_comment_relpath) + ), +) + # %%