@ -39,13 +39,15 @@ mlflow_registry_uri: Optional[str] = None
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# URL сервера registry MLFlow (если не указан, используется `mlflow_tracking_server_uri`). 
 
					 
					 
					 
					# URL сервера registry MLFlow (если не указан, используется `mlflow_tracking_server_uri`). 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					mlflow_do_log :  bool  =  False 
 
					 
					 
					 
					mlflow_do_log :  bool  =  False 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Записывать ли прогон  (run) в MLFlow; если True, при каждом исполнении блокнота создаётся новый прогон с именем `mlflow_run_name` .
 
					 
					 
					 
					# Записывать ли прогон ы  (runs ) в MLFlow.
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					mlflow_experiment_id :  Optional [ str ]  =  None 
 
					 
					 
					 
					mlflow_experiment_id :  Optional [ str ]  =  None 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# ID эксперимента MLFlow, имеет приоритет над `mlflow_experiment_name`. 
 
					 
					 
					 
					# ID эксперимента MLFlow, имеет приоритет над `mlflow_experiment_name`. 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					mlflow_experiment_name :  Optional [ str ]  =  ' Current price predicion for used cars ' 
 
					 
					 
					 
					mlflow_experiment_name :  Optional [ str ]  =  ' Current price predicion for used cars ' 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Имя эксперимента MLFlow (ниже приоритетом, чем `mlflow_experiment_id`). 
 
					 
					 
					 
					# Имя эксперимента MLFlow (ниже приоритетом, чем `mlflow_experiment_id`). 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					mlflow_run_name :  str  =  ' Baseline model ' 
 
					 
					 
					 
					mlflow_baseline_run_name :  str  =  ' Baseline model ' 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					# Имя нового прогона MLFlow (используется для создания нового прогона, если `mlflow_do_log` установлен в True). 
 
					 
					 
					 
					# Имя ноговго прогона MLFlow для baseline модели. 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					mlflow_feateng_run_name :  str  =  ' Model with engineered features ' 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Имя ноговго прогона MLFlow для модели, использующей дополнительные признаки 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					import  os 
 
					 
					 
					 
					import  os 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -75,7 +77,7 @@ sys.path.insert(0, str(CODE_PATH.resolve()))
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					from  iis_project . sklearn_utils  import  filter_params 
 
					 
					 
					 
					from  iis_project . sklearn_utils  import  filter_params 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					from  iis_project . sklearn_utils . compose  import  COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE 
 
					 
					 
					 
					from  iis_project . sklearn_utils . compose  import  COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					from  iis_project . sklearn_utils . ensemble  import  RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE 
 
					 
					 
					 
					from  iis_project . sklearn_utils . ensemble  import  RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					from  iis_project . sklearn_utils . p ipeline import  PIPELINE_PARAMS_COMMON_INCLUDE  
 
					 
					 
					 
					from  iis_project . sklearn_utils . p andas import  pandas_dataframe_from_transformed_artifacts  
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					from  iis_project . sklearn_utils . preprocessing  import  STANDARD_SCALER_PARAMS_COMMON_EXCLUDE 
 
					 
					 
					 
					from  iis_project . sklearn_utils . preprocessing  import  STANDARD_SCALER_PARAMS_COMMON_EXCLUDE 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -112,23 +114,17 @@ with open(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    df_orig  =  pickle . load ( input_file ) 
 
					 
					 
					 
					    df_orig  =  pickle . load ( input_file ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Обзор строк датасета: 
 
					 
					 
					 
					# Обзор датасета: 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					df_orig . head ( 0x10 ) 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Размер датасета: 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					len ( df_orig ) 
 
					 
					 
					 
					len ( df_orig ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Количество непустых значений и тип каждого столбца: 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					df_orig . info ( ) 
 
					 
					 
					 
					df_orig . info ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					df_orig . head ( 0x10 ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# ## Разделение датасета на выборки 
 
					 
					 
					 
					# ## Разделение датасета на выборки 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -196,7 +192,7 @@ df_orig_features_train, df_orig_features_test, df_target_train, df_target_test =
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					tuple ( map ( len ,  ( df_target_train ,  df_target_test ) ) ) 
 
					 
					 
					 
					tuple ( map ( len ,  ( df_target_train ,  df_target_test ) ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# ##  Создание пайплайнов обработки признаков и обучения м одели
 
					 
					 
					 
					# ##  М одели
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt' 
 
					 
					 
					 
					#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt' 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -208,6 +204,7 @@ tuple(map(len, (df_target_train, df_target_test)))
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					mlflow_model_signature  =  mlflow . models . infer_signature ( model_input = df_orig_features ,  model_output = df_target ) 
 
					 
					 
					 
					mlflow_model_signature  =  mlflow . models . infer_signature ( model_input = df_orig_features ,  model_output = df_target ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					mlflow_model_signature 
 
					 
					 
					 
					mlflow_model_signature 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [raw] vscode={"languageId": "raw"} 
 
					 
					 
					 
					# %% [raw] vscode={"languageId": "raw"} 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# input_schema = mlflow.types.schema.Schema([ 
 
					 
					 
					 
					# input_schema = mlflow.types.schema.Schema([ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					#     mlflow.types.schema.ColSpec("double", "selling_price"), 
 
					 
					 
					 
					#     mlflow.types.schema.ColSpec("double", "selling_price"), 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -224,33 +221,95 @@ mlflow_model_signature
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# 
 
					 
					 
					 
					# 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# mlflow_model_signature = mlflow.models.ModelSignature(inputs=input_schema, outputs=output_schema) 
 
					 
					 
					 
					# mlflow_model_signature = mlflow.models.ModelSignature(inputs=input_schema, outputs=output_schema) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					def  build_features_scaler_standard ( ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    return  sklearn . preprocessing . StandardScaler ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					#def build_categorical_features_encoder_onehot(): 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					#    return sklearn.preprocessing.OneHotEncoder() 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					def  build_categorical_features_encoder_target ( * ,  random_state = None ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    return  sklearn . preprocessing . TargetEncoder ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        target_type = ' continuous ' ,  smooth = ' auto ' ,  shuffle = True ,  random_state = random_state , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Регрессор — небольшой случайный лес, цель — минимизация квадрата ошибки предсказания: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					def  build_regressor ( * ,  random_state = None ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    return  sklearn . ensemble . RandomForestRegressor ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        10 ,  criterion = ' squared_error ' ,  max_features = ' sqrt ' ,  random_state = random_state , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					def  score_predictions ( target_test ,  target_test_predicted ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    return  { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' mse ' :  sklearn . metrics . mean_squared_error ( target_test ,  target_test_predicted ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' mae ' :  sklearn . metrics . mean_absolute_error ( target_test ,  target_test_predicted ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' mape ' :  sklearn . metrics . mean_absolute_percentage_error ( target_test ,  target_test_predicted ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# использует глобальные переменные mlflow_do_log, mlflow_experiment 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					def  mlflow_log_model ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    model , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    model_params , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    metrics , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    * , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    run_name , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    model_signature = None , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    input_example = None , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    #pip_requirements=None, 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    comment_file_path = None , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    if  not  mlflow_do_log : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    with  mlflow . start_run ( experiment_id = mlflow_experiment . experiment_id ,  run_name = run_name ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _  =  mlflow . sklearn . log_model ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            model , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' model ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            signature = model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            input_example = input_example , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            #pip_requirements=pip_requirements, 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  model_params  is  not  None : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _  =  mlflow . log_params ( model_params ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  metrics  is  not  None : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _  =  mlflow . log_metrics ( metrics ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  ( comment_file_path  is  not  None )  and  comment_file_path . exists ( ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            mlflow . log_artifact ( str ( comment_file_path ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# ### Baseline модель 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Пайплайн предобработки признаков: 
 
					 
					 
					 
					# Пайплайн предобработки признаков: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					preprocess_transformer  =  sklearn . compose . ColumnTransformer ( 
 
					 
					 
					 
					preprocess_transformer  =  sklearn . compose . ColumnTransformer ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    [ 
 
					 
					 
					 
					    [ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        ( ' scale_to_standard ' ,  sklearn . preprocessing . StandardScaler ( ) ,  features_to_scale_to_standard_columns ) , 
 
					 
					 
					 
					        ( ' scale_to_standard ' ,  build_features_scaler_standard ( ) ,  features_to_scale_to_standard_columns ) , 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					        ( 
 
					 
					 
					 
					        ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            #'encode_categoricals_one_hot', 
 
					 
					 
					 
					            #'encode_categoricals_one_hot', 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            ' encode_categoricals_wrt_target ' , 
 
					 
					 
					 
					            ' encode_categoricals_wrt_target ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            #sklearn.preprocessing.OneHotEncoder(), 
 
					 
					 
					 
					            #build_categorical_features_encoder_onehot(), 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					            sklearn . preprocessing . TargetEncoder ( 
 
					 
					 
					 
					            build_categorical_features_encoder_target ( random_state = 0x2ED6 ) , 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					                target_type = ' continuous ' ,  smooth = ' auto ' ,  shuffle = True ,  random_state = 0x2ED6 , 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            ) , 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					            features_to_encode_wrt_target_columns , 
 
					 
					 
					 
					            features_to_encode_wrt_target_columns , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        ) , 
 
					 
					 
					 
					        ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    ] , 
 
					 
					 
					 
					    ] , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    remainder = ' drop ' , 
 
					 
					 
					 
					    remainder = ' drop ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					) 
 
					 
					 
					 
					) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Регрессор — небольшой случайный лес, цель — минимизация квадрата ошибки предсказания: 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					regressor  =  sklearn . ensemble . RandomForestRegressor ( 
 
					 
					 
					 
					regressor  =  build_regressor ( random_state = 0x016B ) 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					    10 ,  criterion = ' squared_error ' ,  max_features = ' sqrt ' ,  random_state = 0x016B , 
 
					 
					 
					 
					regressor 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					) 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Составной пайплайн: 
 
					 
					 
					 
					# Составной пайплайн: 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -266,7 +325,6 @@ pipeline
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					model_params  =  filter_params ( 
 
					 
					 
					 
					model_params  =  filter_params ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    pipeline . get_params ( ) , 
 
					 
					 
					 
					    pipeline . get_params ( ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    include = { 
 
					 
					 
					 
					    include = { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        * * { k :  True  for  k  in  PIPELINE_PARAMS_COMMON_INCLUDE } , 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        ' preprocess ' :  ( 
 
					 
					 
					 
					        ' preprocess ' :  ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            False , 
 
					 
					 
					 
					            False , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            { 
 
					 
					 
					 
					            { 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -285,11 +343,14 @@ model_params = filter_params(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					model_params 
 
					 
					 
					 
					model_params 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					#  ## Baseline модель 
 
					 
					 
					 
					#  Обучение модели: 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					_  =  pipeline . fit ( df_orig_features_train ,  df_target_train . iloc [ : ,  0 ] ) 
 
					 
					 
					 
					_  =  pipeline . fit ( df_orig_features_train ,  df_target_train . iloc [ : ,  0 ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Оценка качества: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					target_test_predicted  =  pipeline . predict ( df_orig_features_test ) 
 
					 
					 
					 
					target_test_predicted  =  pipeline . predict ( df_orig_features_test ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -297,31 +358,168 @@ target_test_predicted = pipeline.predict(df_orig_features_test)
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Метрики качества (MAPE, а также MSE, MAE): 
 
					 
					 
					 
					# Метрики качества (MAPE, а также MSE, MAE): 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					metrics  =  { 
 
					 
					 
					 
					metrics  =  score_predictions ( df_target_test ,  target_test_predicted ) 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					    ' mse ' :  sklearn . metrics . mean_squared_error ( df_target_test ,  target_test_predicted ) , 
 
					 
					 
					 
					metrics 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					    ' mae ' :  sklearn . metrics . mean_absolute_error ( df_target_test ,  target_test_predicted ) , 
 
					 
					 
					 
					
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					    ' mape ' :  sklearn . metrics . mean_absolute_percentage_error ( df_target_test ,  target_test_predicted ) , 
 
					 
					 
					 
					# %% 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					} 
 
					 
					 
					 
					mlflow_log_model ( 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    pipeline , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    model_params = model_params , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    metrics = { k :  float ( v )  for  k ,  v  in  metrics . items ( ) } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    run_name = mlflow_baseline_run_name , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        else  ( BASE_PATH  /  ' research '  /  model_comment_relpath ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# ### Модель с дополнительными признаками 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Пайплайн предобработки признаков: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					features_to_extend_as_polynomial  =  ( ' selling_price ' ,  ' driven_kms ' ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					features_to_extend_as_spline  =  ( ' age ' , ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					assert  set ( features_to_extend_as_polynomial )  < =  { * features_to_scale_to_standard_columns } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					assert  set ( features_to_extend_as_spline )  < =  { * features_to_scale_to_standard_columns } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					preprocess_transformer  =  sklearn . compose . ColumnTransformer ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    [ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' extend_features_as_polynomial ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            sklearn . pipeline . Pipeline ( [ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                    ' extend_features ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                    sklearn . preprocessing . PolynomialFeatures ( 2 ,  include_bias = False ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ( ' scale_to_standard ' ,  build_features_scaler_standard ( ) ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ] ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            features_to_extend_as_polynomial , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' extend_features_as_spline ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            sklearn . preprocessing . SplineTransformer ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                4 ,  knots = ' quantile ' ,  extrapolation = ' constant ' ,  include_bias = False , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            features_to_extend_as_spline , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' scale_to_standard ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            build_features_scaler_standard ( ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            tuple ( filter ( lambda  f :  f  not  in  features_to_extend_as_polynomial ,  features_to_scale_to_standard_columns ) ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' encode_categoricals_wrt_target ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            build_categorical_features_encoder_target ( random_state = 0x2ED6 ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            features_to_encode_wrt_target_columns , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ] , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    remainder = ' drop ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					preprocess_transformer 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Демонстрация предобработки данных: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					df_tfd_features_matrix_test  =  preprocess_transformer . fit_transform ( df_orig_features_test ,  df_target_test . iloc [ : ,  0 ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					df_tfd_features_test  =  pandas_dataframe_from_transformed_artifacts ( df_tfd_features_matrix_test ,  preprocess_transformer ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Обзор предобработанного датасета: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					df_tfd_features_test . info ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					df_tfd_features_test . head ( 0x8 ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					regressor  =  build_regressor ( random_state = 0x3AEF ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					regressor 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Составной пайплайн: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					pipeline  =  sklearn . pipeline . Pipeline ( [ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ( ' preprocess ' ,  preprocess_transformer ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ( ' regress ' ,  regressor ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					pipeline 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					model_params  =  filter_params ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    pipeline . get_params ( ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    include = { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' preprocess ' :  ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            False , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                * * { k :  True  for  k  in  COLUMN_TRANSFORMER_PARAMS_COMMON_INCLUDE } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ' extend_features_as_polynomial ' :  { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                    ' extend_features ' :  True , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                    ' scale_to_standard ' :  True , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ' extend_features_as_spline ' :  True , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ' scale_to_standard ' :  True , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ' encode_categorical_wrt_target ' :  True , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' regress ' :  ( False ,  True ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    exclude = { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' preprocess ' :  { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' extend_features_as_polynomial ' :  { 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                ' scale_to_standard ' :  STANDARD_SCALER_PARAMS_COMMON_EXCLUDE , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            ' scale_to_standard ' :  STANDARD_SCALER_PARAMS_COMMON_EXCLUDE , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        ' regress ' :  RANDOM_FOREST_REGRESSOR_PARAMS_COMMON_EXCLUDE , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    } , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					model_params 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Обучение модели: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					_  =  pipeline . fit ( df_orig_features_train ,  df_target_train . iloc [ : ,  0 ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Оценка качества: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					target_test_predicted  =  pipeline . predict ( df_orig_features_test ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# Метрики качества (MAPE, а также MSE, MAE): 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					metrics  =  score_predictions ( df_target_test ,  target_test_predicted ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					metrics 
 
					 
					 
					 
					metrics 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					if  mlflow_do_log : 
 
					 
					 
					 
					mlflow_log_model ( 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					    with  mlflow . start_run ( experiment_id = mlflow_experiment . experiment_id ,  run_name = mlflow_run_name ) : 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        _  =  mlflow . sklearn . log_model ( 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    pipeline , 
 
					 
					 
					 
					    pipeline , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            ' model ' , 
 
					 
					 
					 
					    model_params = model_params , 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					            signature = mlflow_model_signature , 
 
					 
					 
					 
					    metrics = { k :  float ( v )  for  k ,  v  in  metrics . items ( ) } , 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    run_name = mlflow_feateng_run_name , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), 
 
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH), 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        ) 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        _  =  mlflow . log_params ( model_params ) 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        _  =  mlflow . log_metrics ( { k :  float ( v )  for  k ,  v  in  metrics . items ( ) } ) 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = ( 
 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        model_comment_path 
 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        else  ( BASE_PATH  /  ' research '  /  model_comment_relpath ) 
 
					 
					 
					 
					        else  ( BASE_PATH  /  ' research '  /  model_comment_relpath ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					) 
 
					 
					 
					 
					) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  comment_file_path . exists ( ) : 
 
					 
					 
					 
					
 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					            mlflow . log_artifact ( str ( comment_file_path ) ) 
 
					 
					 
					 
					# %%