Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

22 KiB

import os
import mlflow
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=3,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    random_state=0,
    shuffle=False,
)
X_train, X_test, y_train, y_test = train_test_split(X, y,  random_state=42)
from sklearn.ensemble import RandomForestClassifier

feature_names = [f"feature {i}" for i in range(X.shape[1])]
forest = RandomForestClassifier(random_state=0, n_estimators=1)
forest.fit(X_train, y_train)
RandomForestClassifier(n_estimators=1, random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(n_estimators=1, random_state=0)
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score
predictions = forest.predict(X_test)
metrics = {}
metrics["recall"] = recall_score(y_test, predictions)   
metrics["precision"] = precision_score(y_test, predictions)
metrics["f1"] = f1_score(y_test, predictions)
metrics["roc_auc"] = roc_auc_score(y_test, predictions)
metrics
{'recall': 0.5853658536585366,
 'precision': 0.7578947368421053,
 'f1': 0.6605504587155964,
 'roc_auc': 0.7021317457269061}

# Работаем с MLflow локально
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000

registry_uri = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"
tracking_uri = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"

mlflow.set_tracking_uri(tracking_uri)   
mlflow.set_registry_uri(registry_uri)   

# название тестового эксперимента и запуска (run) внутри него
EXPERIMENT_NAME = "lection_test"
RUN_NAME = "n_est=1 model"
REGISTRY_MODEL_NAME = "lection_model"
# Когда создаем новый эксперимент, то: 
experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)

# Впоследствии. чтобы добавлять запуски в этот же эксепримент мы должны получить его id:
#experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    # получаем уникальный идентификатор запуска эксперимента
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(forest, artifact_path="models")
    mlflow.log_metrics(metrics)
#experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)

run = mlflow.get_run(run_id) 



assert (run.info.status =='FINISHED')
#mlflow.delete_experiment(experiment_id)