@ -19,6 +19,9 @@
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill. 
 
					 
					 
					 
					# Блокнот использует файл аугментированных данных датасета о подержанных автомобилях, создаваемый блокнотом `eda/cars_eda.py`. См. ниже параметры блокнота для papermill. 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					#XXX: разделить блокнот штук на 5 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					from  typing  import  Optional 
 
					 
					 
					 
					from  typing  import  Optional 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -211,7 +214,8 @@ tuple(map(len, (df_target_train, df_target_test)))
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# ## Модели 
 
					 
					 
					 
					# ## Модели 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% 
 
					 
					 
					 
					# %% 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					#MODEL_PIP_REQUIREMENTS_PATH = BASE_PATH / 'requirements' / 'requirements-isolated-research-model.txt' 
 
					 
					 
					 
					# XXX: один файл requirements для всех моделей 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					MODEL_PIP_REQUIREMENTS_PATH  =  BASE_PATH  /  ' requirements '  /  ' requirements-isolated-research-model.txt ' 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# %% [markdown] 
 
					 
					 
					 
					# %% [markdown] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					# Сигнатура модели для MLFlow: 
 
					 
					 
					 
					# Сигнатура модели для MLFlow: 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -286,7 +290,7 @@ def mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    nested_run_name , 
 
					 
					 
					 
					    nested_run_name , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    model_signature = None , 
 
					 
					 
					 
					    model_signature = None , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = None , 
 
					 
					 
					 
					    input_example = None , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=None  ,
 
					 
					 
					 
					    pip_requirements = None  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = None , 
 
					 
					 
					 
					    comment_file_path = None , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					) : 
 
					 
					 
					 
					) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    global  mlflow_root_run_id 
 
					 
					 
					 
					    global  mlflow_root_run_id 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -305,12 +309,14 @@ def mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            mlflow_root_run_id  =  root_run . info . run_id 
 
					 
					 
					 
					            mlflow_root_run_id  =  root_run . info . run_id 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # важно одновременно использовать nested=True и parent_run_id=...: 
 
					 
					 
					 
					        # важно одновременно использовать nested=True и parent_run_id=...: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        with  mlflow . start_run ( experiment_id = experiment_id ,  run_name = nested_run_name ,  nested = True ,  parent_run_id = mlflow_root_run_id ) : 
 
					 
					 
					 
					        with  mlflow . start_run ( experiment_id = experiment_id ,  run_name = nested_run_name ,  nested = True ,  parent_run_id = mlflow_root_run_id ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            if  isinstance ( pip_requirements ,  pathlib . PurePath ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                pip_requirements  =  str ( pip_requirements ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            _  =  mlflow . sklearn . log_model ( 
 
					 
					 
					 
					            _  =  mlflow . sklearn . log_model ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                model , 
 
					 
					 
					 
					                model , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                ' model ' , 
 
					 
					 
					 
					                ' model ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                signature = model_signature , 
 
					 
					 
					 
					                signature = model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                input_example = input_example , 
 
					 
					 
					 
					                input_example = input_example , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                #pip_requirements=pip_requirements  ,
 
					 
					 
					 
					                pip_requirements = pip_requirements  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					            ) 
 
					 
					 
					 
					            ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            if  model_params  is  not  None : 
 
					 
					 
					 
					            if  model_params  is  not  None : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                _  =  mlflow . log_params ( model_params ) 
 
					 
					 
					 
					                _  =  mlflow . log_params ( model_params ) 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -403,7 +409,7 @@ mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    nested_run_name = ' Baseline model ' , 
 
					 
					 
					 
					    nested_run_name = ' Baseline model ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH)  ,
 
					 
					 
					 
					    pip_requirements = MODEL_PIP_REQUIREMENTS_PATH  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = ( 
 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        model_comment_path 
 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -556,7 +562,7 @@ mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    nested_run_name = ' Model with engineered features ' , 
 
					 
					 
					 
					    nested_run_name = ' Model with engineered features ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH)  ,
 
					 
					 
					 
					    pip_requirements = MODEL_PIP_REQUIREMENTS_PATH  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = ( 
 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        model_comment_path 
 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -676,7 +682,7 @@ mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    nested_run_name = ' Model with filtered engineered features ' , 
 
					 
					 
					 
					    nested_run_name = ' Model with filtered engineered features ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH)  ,
 
					 
					 
					 
					    pip_requirements = MODEL_PIP_REQUIREMENTS_PATH  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = ( 
 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        model_comment_path 
 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -802,7 +808,7 @@ mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    nested_run_name = ' Optimized model with filtered engineered features ' , 
 
					 
					 
					 
					    nested_run_name = ' Optimized model with filtered engineered features ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH)  ,
 
					 
					 
					 
					    pip_requirements = MODEL_PIP_REQUIREMENTS_PATH  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = ( 
 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        model_comment_path 
 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -847,7 +853,7 @@ mlflow_log_model(
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    nested_run_name = ' Final model ' , 
 
					 
					 
					 
					    nested_run_name = ' Final model ' , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
					 
					 
					 
					    model_signature = mlflow_model_signature , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
					 
					 
					 
					    input_example = df_orig_features . head ( MODEL_INOUT_EXAMPLE_SIZE ) , 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    #pip_requirements=str(MODEL_PIP_REQUIREMENTS_PATH)  ,
 
					 
					 
					 
					    pip_requirements = MODEL_PIP_REQUIREMENTS_PATH  ,
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    comment_file_path = ( 
 
					 
					 
					 
					    comment_file_path = ( 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        model_comment_path 
 
					 
					 
					 
					        model_comment_path 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        if  model_comment_path  is  not  None 
 
					 
					 
					 
					        if  model_comment_path  is  not  None