Загрузил(а) файлы в 'labworks/LW4'

2 месяцев назад · 53c32b7e6f
--- a/labworks/LW4/lab4.ipynb
+++ b/labworks/LW4/lab4.ipynb
@ -0,0 +1,321 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "# импорт модулей\n",
+        "import os\n",
+        "os.chdir('/content/drive/MyDrive/Colab Notebooks/is_lab4')\n",
+        "\n",
+        "from tensorflow import keras\n",
+        "from tensorflow.keras import layers\n",
+        "from tensorflow.keras.models import Sequential\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n"
+      ],
+      "metadata": {
+        "id": "mr9IszuQ1ANG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "metadata": {
+        "id": "f0Sa1hdp4hQd"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import tensorflow as tf\n",
+        "device_name = tf.test.gpu_device_name()\n",
+        "if device_name != '/device:GPU:0':\n",
+        "  raise SystemError('GPU device not found')\n",
+        "print('Found GPU at: {}'.format(device_name))"
+      ],
+      "metadata": {
+        "id": "o63-lKG_RuNc"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# загрузка датасета\n",
+        "from keras.datasets import imdb\n",
+        "\n",
+        "vocabulary_size = 5000\n",
+        "index_from = 3\n",
+        "\n",
+        "(X_train, y_train), (X_test, y_test) = imdb.load_data(\n",
+        "    path=\"imdb.npz\",\n",
+        "    num_words=vocabulary_size,\n",
+        "    skip_top=0,\n",
+        "    maxlen=None,\n",
+        "    seed=3,\n",
+        "    start_char=1,\n",
+        "    oov_char=2,\n",
+        "    index_from=index_from\n",
+        "    )\n",
+        "\n",
+        "# вывод размерностей\n",
+        "print('Shape of X train:', X_train.shape)\n",
+        "print('Shape of y train:', y_train.shape)\n",
+        "print('Shape of X test:', X_test.shape)\n",
+        "print('Shape of y test:', y_test.shape)"
+      ],
+      "metadata": {
+        "id": "Ixw5Sp0_1A-w"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# создание словаря для перевода индексов в слова\n",
+        "# заргузка словаря \"слово:индекс\"\n",
+        "word_to_id = imdb.get_word_index()\n",
+        "# уточнение словаря\n",
+        "word_to_id = {key:(value + index_from) for key,value in word_to_id.items()}\n",
+        "word_to_id[\"<PAD>\"] = 0\n",
+        "word_to_id[\"<START>\"] = 1\n",
+        "word_to_id[\"<UNK>\"] = 2\n",
+        "word_to_id[\"<UNUSED>\"] = 3\n",
+        "# создание обратного словаря \"индекс:слово\"\n",
+        "id_to_word = {value:key for key,value in word_to_id.items()}"
+      ],
+      "metadata": {
+        "id": "9W3RklPcZyH0"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(X_train[26])\n",
+        "print('len:',len(X_train[26]))"
+      ],
+      "metadata": {
+        "id": "Nu-Bs1jnaYhB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
+        "print(review_as_text)\n",
+        "print('len:',len(review_as_text))"
+      ],
+      "metadata": {
+        "id": "JhTwTurtZ6Sp"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print('MAX Len: ',len(max(X_train, key=len)))\n",
+        "print('MIN Len: ',len(min(X_train, key=len)))"
+      ],
+      "metadata": {
+        "id": "xJH87ISq1B9h"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# предобработка данных\n",
+        "from tensorflow.keras.utils import pad_sequences\n",
+        "max_words = 500\n",
+        "X_train = pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
+        "X_test = pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
+      ],
+      "metadata": {
+        "id": "lrF-B2aScR4t"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print('MAX Len: ',len(max(X_train, key=len)))\n",
+        "print('MIN Len: ',len(min(X_train, key=len)))"
+      ],
+      "metadata": {
+        "id": "81Cgq8dn9uL6"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(X_train[26])\n",
+        "print('len:',len(X_train[26]))"
+      ],
+      "metadata": {
+        "id": "vudlgqoCbjU1"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
+        "print(review_as_text)\n",
+        "print('len:',len(review_as_text))"
+      ],
+      "metadata": {
+        "id": "dbfkWjDI1Dp7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# вывод данных\n",
+        "print('X train: \\n',X_train)\n",
+        "print('X train: \\n',X_test)\n",
+        "\n",
+        "# вывод размерностей\n",
+        "print('Shape of X train:', X_train.shape)\n",
+        "print('Shape of X test:', X_test.shape)"
+      ],
+      "metadata": {
+        "id": "7MqcG_wl1EHI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "embed_dim = 32\n",
+        "lstm_units = 64\n",
+        "\n",
+        "model = Sequential()\n",
+        "model.add(layers.Embedding(input_dim=vocabulary_size, output_dim=embed_dim, input_length=max_words, input_shape=(max_words,)))\n",
+        "model.add(layers.LSTM(lstm_units))\n",
+        "model.add(layers.Dropout(0.5))\n",
+        "model.add(layers.Dense(1, activation='sigmoid'))\n",
+        "\n",
+        "model.summary()"
+      ],
+      "metadata": {
+        "id": "ktWEeqWd1EyF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# компилируем и обучаем модель\n",
+        "batch_size = 64\n",
+        "epochs = 3\n",
+        "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
+        "model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)"
+      ],
+      "metadata": {
+        "id": "CuPqKpX0kQfP"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_loss, test_acc = model.evaluate(X_test, y_test)\n",
+        "print(f\"\\nTest accuracy: {test_acc}\")"
+      ],
+      "metadata": {
+        "id": "hJIWinxymQjb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#значение метрики качества классификации на тестовых данных\n",
+        "print(f\"\\nTest accuracy: {test_acc}\")"
+      ],
+      "metadata": {
+        "id": "Rya5ABT8msha"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#отчет о качестве классификации тестовой выборки\n",
+        "y_score = model.predict(X_test)\n",
+        "y_pred = [1 if y_score[i,0]>=0.5 else 0 for i in range(len(y_score))]\n",
+        "\n",
+        "from sklearn.metrics import classification_report\n",
+        "print(classification_report(y_test, y_pred, labels = [0, 1], target_names=['Negative', 'Positive']))"
+      ],
+      "metadata": {
+        "id": "2kHjcmnCmv0Y"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#построение ROC-кривой и AUC ROC\n",
+        "from sklearn.metrics import roc_curve, auc\n",
+        "\n",
+        "fpr, tpr, thresholds = roc_curve(y_test, y_score)\n",
+        "plt.plot(fpr, tpr)\n",
+        "plt.grid()\n",
+        "plt.xlabel('False Positive Rate')\n",
+        "plt.ylabel('True Positive Rate')\n",
+        "plt.title('ROC')\n",
+        "plt.show()\n",
+        "print('AUC ROC:', auc(fpr, tpr))"
+      ],
+      "metadata": {
+        "id": "Kp4AQRbcmwAx"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}