diff --git a/README.md b/README.md index 8af1951..16007a6 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ | Дата |Лекция | |:----------:|:-------------------------------------------------------------------| -| 05.09.2024 | [Вводная лекция](./lectures/lec1.odp) | -| 12.09.2024 | [Изолирование окружения. Docker](./lectures/lec2-docker.odp) | -| 19.09.2024 | [Разведочный анализ данных](./lectures/lec3-eda.odp) | +| 05.09.2024 | [Вводная лекция](./lectures/lec1.odp) - [в формате pptx](./lectures/lec1.pptx)| +| 12.09.2024 | [Изолирование окружения. Docker](./lectures/lec2-docker.odp) - [в формате pptx](./lectures/lec2-docker.pptx) | +| 19.09.2024 | [Разведочный анализ данных](./lectures/lec3-eda.odp) - [в формате pptx](./lectures/lec3-eda.pptx) | +| 26.09.2024 | [MLFlow](./lectures/lec3-eda.odp) - [в формате pptx](./lectures/lec4-mlflow.pptx) | ## Лабораторные работы diff --git a/assets/mlflow/mlflow.ipynb b/assets/mlflow/mlflow.ipynb new file mode 100644 index 0000000..51a8700 --- /dev/null +++ b/assets/mlflow/mlflow.ipynb @@ -0,0 +1,599 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import mlflow" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import make_classification\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X, y = make_classification(\n", + " n_samples=1000,\n", + " n_features=10,\n", + " n_informative=3,\n", + " n_redundant=0,\n", + " n_repeated=0,\n", + " n_classes=2,\n", + " random_state=0,\n", + " shuffle=False,\n", + ")\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestClassifier(n_estimators=1, random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomForestClassifier(n_estimators=1, random_state=0)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "feature_names = [f\"feature {i}\" for i in range(X.shape[1])]\n", + "forest = RandomForestClassifier(random_state=0, n_estimators=1)\n", + "forest.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score\n", + "predictions = forest.predict(X_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'recall': 0.5853658536585366,\n", + " 'precision': 0.7578947368421053,\n", + " 'f1': 0.6605504587155964,\n", + " 'roc_auc': 0.7021317457269061}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics = {}\n", + "metrics[\"recall\"] = recall_score(y_test, predictions) \n", + "metrics[\"precision\"] = precision_score(y_test, predictions)\n", + "metrics[\"f1\"] = f1_score(y_test, predictions)\n", + "metrics[\"roc_auc\"] = roc_auc_score(y_test, predictions)\n", + "metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Работаем с MLflow локально\n", + "TRACKING_SERVER_HOST = \"127.0.0.1\"\n", + "TRACKING_SERVER_PORT = 5000\n", + "\n", + "registry_uri = f\"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}\"\n", + "tracking_uri = f\"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}\"\n", + "\n", + "mlflow.set_tracking_uri(tracking_uri) \n", + "mlflow.set_registry_uri(registry_uri) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# название тестового эксперимента и запуска (run) внутри него\n", + "EXPERIMENT_NAME = \"lection_test\"\n", + "RUN_NAME = \"n_est=1 model\"\n", + "REGISTRY_MODEL_NAME = \"lection_model\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Когда создаем новый эксперимент, то: \n", + "experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "# Впоследствии. чтобы добавлять запуски в этот же эксепримент мы должны получить его id:\n", + "#experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id\n", + "\n", + "with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:\n", + " # получаем уникальный идентификатор запуска эксперимента\n", + " run_id = run.info.run_id \n", + " mlflow.sklearn.log_model(forest, artifact_path=\"models\")\n", + " mlflow.log_metrics(metrics)\n", + "#experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)\n", + "\n", + "run = mlflow.get_run(run_id) \n", + "\n", + "\n", + "\n", + "assert (run.info.status =='FINISHED')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#mlflow.delete_experiment(experiment_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv_eda", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lectures/lec1.pptx b/lectures/lec1.pptx new file mode 100644 index 0000000..7d95b8e Binary files /dev/null and b/lectures/lec1.pptx differ diff --git a/lectures/lec2-docker.pptx b/lectures/lec2-docker.pptx new file mode 100644 index 0000000..0e62ed5 Binary files /dev/null and b/lectures/lec2-docker.pptx differ diff --git a/lectures/lec3-eda.pptx b/lectures/lec3-eda.pptx new file mode 100644 index 0000000..f253bb8 Binary files /dev/null and b/lectures/lec3-eda.pptx differ diff --git a/lectures/lec4-mlflow.odp b/lectures/lec4-mlflow.odp index 5b136ea..07b329c 100644 Binary files a/lectures/lec4-mlflow.odp and b/lectures/lec4-mlflow.odp differ diff --git a/lectures/lec4-mlflow.pptx b/lectures/lec4-mlflow.pptx new file mode 100644 index 0000000..09dcdfc Binary files /dev/null and b/lectures/lec4-mlflow.pptx differ