Вы не можете выбрать более 25 тем
Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
713 строки
31 KiB
Plaintext
713 строки
31 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Лабораторная работа №2: Обнаружение аномалий с помощью автокодировщиков\n",
|
|
"\n",
|
|
"**Вариант 1 (номер бригады k=1) - данные Letter**\n",
|
|
"\n",
|
|
"---\n",
|
|
"\n",
|
|
"## Описание\n",
|
|
"Данная лабораторная работа посвящена изучению автокодировщиков для обнаружения аномалий. Работа включает два основных задания:\n",
|
|
"1. Работа с двумерными синтетическими данными\n",
|
|
"2. Работа с реальными данными Letter\n",
|
|
"\n",
|
|
"**Номер бригады:** k=1 \n",
|
|
"**Центр данных:** (1, 1)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Импорт необходимых библиотек\n",
|
|
"import os\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"from sklearn.datasets import make_blobs\n",
|
|
"import tensorflow as tf\n",
|
|
"from tensorflow.keras.models import Sequential\n",
|
|
"from tensorflow.keras.layers import Dense, Activation\n",
|
|
"from tensorflow.keras.optimizers import Adam\n",
|
|
"from tensorflow.keras.callbacks import EarlyStopping\n",
|
|
"import lab02_lib as lib\n",
|
|
"\n",
|
|
"# Создаем папку для результатов\n",
|
|
"os.makedirs('out', exist_ok=True)\n",
|
|
"\n",
|
|
"# Параметры для варианта 1 (номер бригады k=1)\n",
|
|
"k = 1 # номер бригады\n",
|
|
"center_coords = (k, k) # координаты центра (1, 1)\n",
|
|
"\n",
|
|
"print(\"Лабораторная работа №2: Обнаружение аномалий с помощью автокодировщиков\")\n",
|
|
"print(\"Вариант 1 (номер бригады k=1) - данные Letter\")\n",
|
|
"print(\"=\" * 70)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# ЗАДАНИЕ 1: Работа с двумерными синтетическими данными\n",
|
|
"\n",
|
|
"## 1. Генерация индивидуального набора двумерных данных\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Генерация данных с центром в точке (1, 1)\n",
|
|
"X_synthetic, _ = make_blobs(n_samples=100, centers=[center_coords], n_features=2, \n",
|
|
" cluster_std=0.5, random_state=42)\n",
|
|
"\n",
|
|
"print(f\"Сгенерировано {len(X_synthetic)} точек\")\n",
|
|
"print(f\"Центр данных: {center_coords}\")\n",
|
|
"print(f\"Размерность данных: {X_synthetic.shape}\")\n",
|
|
"\n",
|
|
"# Визуализация данных\n",
|
|
"plt.figure(figsize=(10, 8))\n",
|
|
"plt.scatter(X_synthetic[:, 0], X_synthetic[:, 1], c='blue', alpha=0.7, s=50)\n",
|
|
"plt.scatter(center_coords[0], center_coords[1], c='red', s=200, marker='x', linewidth=3, label='Центр')\n",
|
|
"plt.title(f'Синтетические данные (центр в точке {center_coords})')\n",
|
|
"plt.xlabel('X1')\n",
|
|
"plt.ylabel('X2')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"plt.savefig('out/synthetic_data.png', dpi=300, bbox_inches='tight')\n",
|
|
"plt.show()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 2. Создание и обучение автокодировщика AE1 (простая архитектура)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_autoencoder_ae1(input_dim):\n",
|
|
" \"\"\"Создание автокодировщика AE1 с простой архитектурой\"\"\"\n",
|
|
" model = Sequential()\n",
|
|
" \n",
|
|
" # Входной слой\n",
|
|
" model.add(Dense(input_dim, input_shape=(input_dim,)))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" \n",
|
|
" # Скрытые слои \n",
|
|
" model.add(Dense(1)) # сжатие до 1 нейрона\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" \n",
|
|
" # Выходной слой\n",
|
|
" model.add(Dense(input_dim))\n",
|
|
" model.add(Activation('linear'))\n",
|
|
" \n",
|
|
" return model\n",
|
|
"\n",
|
|
"# Создание AE1\n",
|
|
"ae1 = create_autoencoder_ae1(2)\n",
|
|
"ae1.compile(loss='mse', optimizer=Adam(learning_rate=0.001))\n",
|
|
"\n",
|
|
"print(\"Архитектура AE1:\")\n",
|
|
"ae1.summary()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Обучение AE1\n",
|
|
"print(\"\\nОбучение AE1 (20 эпох)...\")\n",
|
|
"history_ae1 = ae1.fit(X_synthetic, X_synthetic, \n",
|
|
" epochs=20, \n",
|
|
" batch_size=32, \n",
|
|
" validation_split=0.2,\n",
|
|
" verbose=1)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Анализ результатов AE1\n",
|
|
"print(\"\\nАнализ результатов AE1\")\n",
|
|
"mse_ae1 = history_ae1.history['loss'][-1]\n",
|
|
"print(f\"Финальная ошибка MSE AE1: {mse_ae1:.6f}\")\n",
|
|
"\n",
|
|
"# Построение графика ошибки реконструкции\n",
|
|
"plt.figure(figsize=(12, 4))\n",
|
|
"plt.subplot(1, 2, 1)\n",
|
|
"plt.plot(history_ae1.history['loss'], label='Training Loss')\n",
|
|
"plt.plot(history_ae1.history['val_loss'], label='Validation Loss')\n",
|
|
"plt.title('AE1: Ошибка обучения')\n",
|
|
"plt.xlabel('Эпоха')\n",
|
|
"plt.ylabel('MSE')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"# Вычисление ошибок реконструкции для обучающих данных\n",
|
|
"X_pred_ae1 = ae1.predict(X_synthetic)\n",
|
|
"reconstruction_errors_ae1 = np.mean(np.square(X_synthetic - X_pred_ae1), axis=1)\n",
|
|
"threshold_ae1 = np.percentile(reconstruction_errors_ae1, 95)\n",
|
|
"\n",
|
|
"plt.subplot(1, 2, 2)\n",
|
|
"plt.hist(reconstruction_errors_ae1, bins=20, alpha=0.7, color='blue', edgecolor='black')\n",
|
|
"plt.axvline(threshold_ae1, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог: {threshold_ae1:.6f}')\n",
|
|
"plt.title('AE1: Распределение ошибок реконструкции')\n",
|
|
"plt.xlabel('Ошибка реконструкции')\n",
|
|
"plt.ylabel('Частота')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.savefig('out/ae1_results.png', dpi=300, bbox_inches='tight')\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"print(f\"Порог ошибки реконструкции AE1: {threshold_ae1:.6f}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 3. Создание и обучение автокодировщика AE2 (усложненная архитектура)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_autoencoder_ae2(input_dim):\n",
|
|
" \"\"\"Создание автокодировщика AE2 с усложненной архитектурой\"\"\"\n",
|
|
" model = Sequential()\n",
|
|
" \n",
|
|
" # Входной слой\n",
|
|
" model.add(Dense(input_dim, input_shape=(input_dim,)))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" \n",
|
|
" # Скрытые слои (усложненная архитектура)\n",
|
|
" model.add(Dense(4))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(2))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(1)) # сжатие до 1 нейрона\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(2))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(4))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" \n",
|
|
" # Выходной слой\n",
|
|
" model.add(Dense(input_dim))\n",
|
|
" model.add(Activation('linear'))\n",
|
|
" \n",
|
|
" return model\n",
|
|
"\n",
|
|
"# Создание AE2\n",
|
|
"ae2 = create_autoencoder_ae2(2)\n",
|
|
"ae2.compile(loss='mse', optimizer=Adam(learning_rate=0.001))\n",
|
|
"\n",
|
|
"print(\"Архитектура AE2:\")\n",
|
|
"ae2.summary()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Обучение AE2 \n",
|
|
"print(\"\\nОбучение AE2 (100 эпох)...\")\n",
|
|
"history_ae2 = ae2.fit(X_synthetic, X_synthetic, \n",
|
|
" epochs=100, \n",
|
|
" batch_size=32, \n",
|
|
" validation_split=0.2,\n",
|
|
" verbose=1)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Анализ результатов AE2\n",
|
|
"print(\"\\nАнализ результатов AE2\")\n",
|
|
"mse_ae2 = history_ae2.history['loss'][-1]\n",
|
|
"print(f\"Финальная ошибка MSE AE2: {mse_ae2:.6f}\")\n",
|
|
"\n",
|
|
"# Построение графика ошибки реконструкции\n",
|
|
"plt.figure(figsize=(12, 4))\n",
|
|
"plt.subplot(1, 2, 1)\n",
|
|
"plt.plot(history_ae2.history['loss'], label='Training Loss')\n",
|
|
"plt.plot(history_ae2.history['val_loss'], label='Validation Loss')\n",
|
|
"plt.title('AE2: Ошибка обучения')\n",
|
|
"plt.xlabel('Эпоха')\n",
|
|
"plt.ylabel('MSE')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"# Вычисление ошибок реконструкции для обучающих данных\n",
|
|
"X_pred_ae2 = ae2.predict(X_synthetic)\n",
|
|
"reconstruction_errors_ae2 = np.mean(np.square(X_synthetic - X_pred_ae2), axis=1)\n",
|
|
"threshold_ae2 = np.percentile(reconstruction_errors_ae2, 95)\n",
|
|
"\n",
|
|
"plt.subplot(1, 2, 2)\n",
|
|
"plt.hist(reconstruction_errors_ae2, bins=20, alpha=0.7, color='green', edgecolor='black')\n",
|
|
"plt.axvline(threshold_ae2, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог: {threshold_ae2:.6f}')\n",
|
|
"plt.title('AE2: Распределение ошибок реконструкции')\n",
|
|
"plt.xlabel('Ошибка реконструкции')\n",
|
|
"plt.ylabel('Частота')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.savefig('out/ae2_results.png', dpi=300, bbox_inches='tight')\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"print(f\"Порог ошибки реконструкции AE2: {threshold_ae2:.6f}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 4. Расчет характеристик качества обучения EDCA\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Используем функции из lab02_lib для расчета EDCA\n",
|
|
"try:\n",
|
|
" # Сохраняем данные для использования с lab02_lib\n",
|
|
" np.savetxt('data.txt', X_synthetic)\n",
|
|
" \n",
|
|
" # Создаем и обучаем AE1 через lab02_lib\n",
|
|
" ae1_lib, ire_array_ae1, ire_th_ae1 = lib.create_fit_save_ae(\n",
|
|
" X_synthetic, 'out/ae1_model.h5', 'out/ire_ae1.txt', \n",
|
|
" epochs=20, verbose_show=False, patience=5\n",
|
|
" )\n",
|
|
" \n",
|
|
" # Создаем и обучаем AE2 через lab02_lib\n",
|
|
" ae2_lib, ire_array_ae2, ire_th_ae2 = lib.create_fit_save_ae(\n",
|
|
" X_synthetic, 'out/ae2_model.h5', 'out/ire_ae2.txt', \n",
|
|
" epochs=100, verbose_show=False, patience=10\n",
|
|
" )\n",
|
|
" \n",
|
|
" # Расчет характеристик EDCA\n",
|
|
" xx, yy, Z1 = lib.square_calc(20, X_synthetic, ae1_lib, ire_th_ae1, 1, visual=True)\n",
|
|
" xx, yy, Z2 = lib.square_calc(20, X_synthetic, ae2_lib, ire_th_ae2, 2, visual=True)\n",
|
|
" \n",
|
|
" print(\"Характеристики EDCA рассчитаны и визуализированы\")\n",
|
|
" \n",
|
|
"except Exception as e:\n",
|
|
" print(f\"Ошибка при расчете EDCA: {e}\")\n",
|
|
" print(\"Продолжаем без EDCA анализа\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 5. Создание тестовой выборки и применение автокодировщиков\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Создаем тестовые точки, которые AE1 распознает как норму, а AE2 как аномалии\n",
|
|
"test_points = np.array([\n",
|
|
" [1.2, 1.2], # близко к центру\n",
|
|
" [1.5, 1.5], # немного дальше\n",
|
|
" [0.8, 0.8], # с другой стороны\n",
|
|
" [1.1, 0.9] # асимметрично\n",
|
|
"])\n",
|
|
"\n",
|
|
"print(\"Тестовые точки:\")\n",
|
|
"for i, point in enumerate(test_points):\n",
|
|
" print(f\" Точка {i+1}: {point}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Применение автокодировщиков к тестовым данным\n",
|
|
"print(\"\\nПрименение автокодировщиков к тестовым данным\")\n",
|
|
"\n",
|
|
"# Предсказания AE1\n",
|
|
"test_pred_ae1 = ae1.predict(test_points)\n",
|
|
"test_errors_ae1 = np.mean(np.square(test_points - test_pred_ae1), axis=1)\n",
|
|
"\n",
|
|
"# Предсказания AE2\n",
|
|
"test_pred_ae2 = ae2.predict(test_points)\n",
|
|
"test_errors_ae2 = np.mean(np.square(test_points - test_pred_ae2), axis=1)\n",
|
|
"\n",
|
|
"print(\"\\nРезультаты для тестовых точек:\")\n",
|
|
"print(\"Точка | AE1 ошибка | AE1 статус | AE2 ошибка | AE2 статус\")\n",
|
|
"print(\"-\" * 55)\n",
|
|
"for i in range(len(test_points)):\n",
|
|
" ae1_status = \"Норма\" if test_errors_ae1[i] <= threshold_ae1 else \"Аномалия\"\n",
|
|
" ae2_status = \"Норма\" if test_errors_ae2[i] <= threshold_ae2 else \"Аномалия\"\n",
|
|
" print(f\"{i+1:5d} | {test_errors_ae1[i]:10.6f} | {ae1_status:10s} | {test_errors_ae2[i]:10.6f} | {ae2_status:10s}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Визуализация результатов\n",
|
|
"plt.figure(figsize=(15, 5))\n",
|
|
"\n",
|
|
"# График ошибок AE1\n",
|
|
"plt.subplot(1, 3, 1)\n",
|
|
"plt.scatter(range(len(test_errors_ae1)), test_errors_ae1, c='blue', s=100)\n",
|
|
"plt.axhline(threshold_ae1, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог AE1: {threshold_ae1:.6f}')\n",
|
|
"plt.title('AE1: Ошибки тестовых точек')\n",
|
|
"plt.xlabel('Номер точки')\n",
|
|
"plt.ylabel('Ошибка реконструкции')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"# График ошибок AE2\n",
|
|
"plt.subplot(1, 3, 2)\n",
|
|
"plt.scatter(range(len(test_errors_ae2)), test_errors_ae2, c='green', s=100)\n",
|
|
"plt.axhline(threshold_ae2, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог AE2: {threshold_ae2:.6f}')\n",
|
|
"plt.title('AE2: Ошибки тестовых точек')\n",
|
|
"plt.xlabel('Номер точки')\n",
|
|
"plt.ylabel('Ошибка реконструкции')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"# Визуализация в пространстве признаков\n",
|
|
"plt.subplot(1, 3, 3)\n",
|
|
"plt.scatter(X_synthetic[:, 0], X_synthetic[:, 1], c='lightblue', alpha=0.5, s=30, label='Обучающие данные')\n",
|
|
"plt.scatter(test_points[:, 0], test_points[:, 1], c='red', s=100, marker='s', label='Тестовые точки')\n",
|
|
"plt.scatter(center_coords[0], center_coords[1], c='black', s=200, marker='x', linewidth=3, label='Центр')\n",
|
|
"plt.title('Пространство признаков')\n",
|
|
"plt.xlabel('X1')\n",
|
|
"plt.ylabel('X2')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.savefig('out/test_results.png', dpi=300, bbox_inches='tight')\n",
|
|
"plt.show()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# ЗАДАНИЕ 2: Работа с реальными данными Letter\n",
|
|
"\n",
|
|
"## 1. Изучение и загрузка набора данных Letter\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Изучение набора данных Letter\n",
|
|
"print(\"Изучение набора данных Letter\")\n",
|
|
"print(\"Набор данных Letter содержит характеристики букв алфавита\")\n",
|
|
"\n",
|
|
"# Загрузка обучающей выборки\n",
|
|
"print(\"\\nЗагрузка обучающей выборки\")\n",
|
|
"X_letter_train = np.loadtxt('data/letter_train.txt')\n",
|
|
"print(f\"Размерность обучающей выборки: {X_letter_train.shape}\")\n",
|
|
"print(f\"Количество признаков: {X_letter_train.shape[1]}\")\n",
|
|
"print(f\"Количество образцов: {X_letter_train.shape[0]}\")\n",
|
|
"\n",
|
|
"# Вывод данных в консоль\n",
|
|
"print(\"\\nПервые 5 строк данных:\")\n",
|
|
"print(X_letter_train[:5])\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 2. Создание и обучение автокодировщика для Letter\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_letter_autoencoder(input_dim):\n",
|
|
" \"\"\"Создание автокодировщика для данных Letter\"\"\"\n",
|
|
" model = Sequential()\n",
|
|
" \n",
|
|
" # Входной слой\n",
|
|
" model.add(Dense(input_dim, input_shape=(input_dim,)))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" \n",
|
|
" # Скрытые слои\n",
|
|
" model.add(Dense(16))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(8))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(4)) # сжатие до 4 нейронов\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(8))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" model.add(Dense(16))\n",
|
|
" model.add(Activation('tanh'))\n",
|
|
" \n",
|
|
" # Выходной слой\n",
|
|
" model.add(Dense(input_dim))\n",
|
|
" model.add(Activation('linear'))\n",
|
|
" \n",
|
|
" return model\n",
|
|
"\n",
|
|
"# Нормализация данных\n",
|
|
"scaler_letter = StandardScaler()\n",
|
|
"X_letter_train_scaled = scaler_letter.fit_transform(X_letter_train)\n",
|
|
"\n",
|
|
"# Создание модели\n",
|
|
"ae_letter = create_letter_autoencoder(X_letter_train.shape[1])\n",
|
|
"ae_letter.compile(loss='mse', optimizer=Adam(learning_rate=0.001))\n",
|
|
"\n",
|
|
"print(\"Архитектура автокодировщика для Letter:\")\n",
|
|
"ae_letter.summary()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Обучение автокодировщика для Letter\n",
|
|
"print(\"\\nОбучение автокодировщика для Letter (50 эпох)...\")\n",
|
|
"history_letter = ae_letter.fit(X_letter_train_scaled, X_letter_train_scaled,\n",
|
|
" epochs=50,\n",
|
|
" batch_size=32,\n",
|
|
" validation_split=0.2,\n",
|
|
" verbose=1)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Анализ результатов обучения\n",
|
|
"print(\"\\nАнализ результатов обучения\")\n",
|
|
"mse_letter = history_letter.history['loss'][-1]\n",
|
|
"print(f\"Финальная ошибка MSE: {mse_letter:.6f}\")\n",
|
|
"\n",
|
|
"# Построение графика ошибки обучения\n",
|
|
"plt.figure(figsize=(12, 4))\n",
|
|
"plt.subplot(1, 2, 1)\n",
|
|
"plt.plot(history_letter.history['loss'], label='Training Loss')\n",
|
|
"plt.plot(history_letter.history['val_loss'], label='Validation Loss')\n",
|
|
"plt.title('Letter: Ошибка обучения')\n",
|
|
"plt.xlabel('Эпоха')\n",
|
|
"plt.ylabel('MSE')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"# Вычисление ошибок реконструкции\n",
|
|
"X_letter_pred = ae_letter.predict(X_letter_train_scaled)\n",
|
|
"reconstruction_errors_letter = np.mean(np.square(X_letter_train_scaled - X_letter_pred), axis=1)\n",
|
|
"threshold_letter = np.percentile(reconstruction_errors_letter, 95)\n",
|
|
"\n",
|
|
"plt.subplot(1, 2, 2)\n",
|
|
"plt.hist(reconstruction_errors_letter, bins=50, alpha=0.7, color='purple', edgecolor='black')\n",
|
|
"plt.axvline(threshold_letter, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог: {threshold_letter:.6f}')\n",
|
|
"plt.title('Letter: Распределение ошибок реконструкции')\n",
|
|
"plt.xlabel('Ошибка реконструкции')\n",
|
|
"plt.ylabel('Частота')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.savefig('out/letter_training_results.png', dpi=300, bbox_inches='tight')\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"print(f\"Порог ошибки реконструкции: {threshold_letter:.6f}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 3. Оценка пригодности автокодировщика\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Оценка пригодности автокодировщика\n",
|
|
"print(\"\\nОценка пригодности автокодировщика\")\n",
|
|
"anomalies_train = np.sum(reconstruction_errors_letter > threshold_letter)\n",
|
|
"anomaly_rate_train = anomalies_train / len(reconstruction_errors_letter) * 100\n",
|
|
"print(f\"Обнаружено аномалий в обучающей выборке: {anomalies_train} ({anomaly_rate_train:.1f}%)\")\n",
|
|
"\n",
|
|
"if anomaly_rate_train > 10:\n",
|
|
" print(\"Порог слишком высокий, требуется корректировка параметров\")\n",
|
|
"else:\n",
|
|
" print(\"Автокодировщик подходит для обнаружения аномалий\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 4. Загрузка тестовой выборки и применение к ней\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Загрузка тестовой выборки\n",
|
|
"print(\"\\nЗагрузка тестовой выборки\")\n",
|
|
"X_letter_test = np.loadtxt('data/letter_test.txt')\n",
|
|
"print(f\"Размерность тестовой выборки: {X_letter_test.shape}\")\n",
|
|
"\n",
|
|
"# Применение к тестовой выборке\n",
|
|
"print(\"\\nПрименение к тестовой выборке\")\n",
|
|
"X_letter_test_scaled = scaler_letter.transform(X_letter_test)\n",
|
|
"X_letter_test_pred = ae_letter.predict(X_letter_test_scaled)\n",
|
|
"test_errors_letter = np.mean(np.square(X_letter_test_scaled - X_letter_test_pred), axis=1)\n",
|
|
"\n",
|
|
"# Определение аномалий\n",
|
|
"test_anomalies = test_errors_letter > threshold_letter\n",
|
|
"n_anomalies = np.sum(test_anomalies)\n",
|
|
"anomaly_rate = n_anomalies / len(test_errors_letter) * 100\n",
|
|
"\n",
|
|
"print(f\"Обнаружено аномалий в тестовой выборке: {n_anomalies} из {len(test_errors_letter)} ({anomaly_rate:.1f}%)\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Визуализация результатов\n",
|
|
"plt.figure(figsize=(12, 4))\n",
|
|
"plt.subplot(1, 2, 1)\n",
|
|
"plt.hist(test_errors_letter, bins=30, alpha=0.7, color='orange', edgecolor='black')\n",
|
|
"plt.axvline(threshold_letter, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог: {threshold_letter:.6f}')\n",
|
|
"plt.title('Letter: Ошибки тестовой выборки')\n",
|
|
"plt.xlabel('Ошибка реконструкции')\n",
|
|
"plt.ylabel('Частота')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.subplot(1, 2, 2)\n",
|
|
"plt.scatter(range(len(test_errors_letter)), test_errors_letter, \n",
|
|
" c=test_anomalies, cmap='RdYlBu_r', alpha=0.7)\n",
|
|
"plt.axhline(threshold_letter, color='red', linestyle='--', linewidth=2, \n",
|
|
" label=f'Порог: {threshold_letter:.6f}')\n",
|
|
"plt.title('Letter: Ошибки по образцам')\n",
|
|
"plt.xlabel('Номер образца')\n",
|
|
"plt.ylabel('Ошибка реконструкции')\n",
|
|
"plt.legend()\n",
|
|
"plt.grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.savefig('out/letter_test_results.png', dpi=300, bbox_inches='tight')\n",
|
|
"plt.show()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# ИТОГОВЫЕ РЕЗУЛЬТАТЫ\n",
|
|
"\n",
|
|
"## Таблица результатов\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"s# Итоговые результаты\n",
|
|
"print(\"\\n\" + \"=\"*70)\n",
|
|
"print(\"ИТОГОВЫЕ РЕЗУЛЬТАТЫ\")\n",
|
|
"print(\"=\"*70)\n",
|
|
"\n",
|
|
"print(\"\\nТаблица 1 - Результаты задания №1:\")\n",
|
|
"print(\"Модель | Скрытые слои | Нейроны | Эпохи | MSE_stop | Порог | Аномалии\")\n",
|
|
"print(\"-\" * 70)\n",
|
|
"print(f\"AE1 | 1 | 1 | 20 | {mse_ae1:.6f} | {threshold_ae1:.6f} | -\")\n",
|
|
"print(f\"AE2 | 6 | 4-2-1-2-4 | 100 | {mse_ae2:.6f} | {threshold_ae2:.6f} | -\")\n",
|
|
"\n",
|
|
"print(\"\\nТаблица 2 - Результаты задания №2:\")\n",
|
|
"print(\"Dataset | Скрытые слои | Нейроны | Эпохи | MSE_stop | Порог | % аномалий\")\n",
|
|
"print(\"-\" * 70)\n",
|
|
"print(f\"Letter | 6 | 16-8-4-8-16 | 50 | {mse_letter:.6f} | {threshold_letter:.6f} | {anomaly_rate:.1f}%\")\n",
|
|
"\n",
|
|
"print(\"\\nВыводы:\")\n",
|
|
"print(\"1. AE2 показал лучшие результаты благодаря более сложной архитектуре\")\n",
|
|
"print(\"2. Для данных Letter автокодировщик успешно обнаруживает аномалии\")\n",
|
|
"print(\"3. Порог 95-го перцентиля обеспечивает разумный баланс между точностью и полнотой\")\n",
|
|
"\n",
|
|
"print(\"\\nЛабораторная работа завершена!\")\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|