From 53c32b7e6f0760bdd8f3db236b3e38dd8d5b45c4 Mon Sep 17 00:00:00 2001 From: TekotovaVA Date: Sun, 14 Dec 2025 17:55:07 +0000 Subject: [PATCH] =?UTF-8?q?=D0=97=D0=B0=D0=B3=D1=80=D1=83=D0=B7=D0=B8?= =?UTF-8?q?=D0=BB(=D0=B0)=20=D1=84=D0=B0=D0=B9=D0=BB=D1=8B=20=D0=B2=20'lab?= =?UTF-8?q?works/LW4'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- labworks/LW4/lab4.ipynb | 321 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 labworks/LW4/lab4.ipynb diff --git a/labworks/LW4/lab4.ipynb b/labworks/LW4/lab4.ipynb new file mode 100644 index 0000000..0977011 --- /dev/null +++ b/labworks/LW4/lab4.ipynb @@ -0,0 +1,321 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "# импорт модулей\n", + "import os\n", + "os.chdir('/content/drive/MyDrive/Colab Notebooks/is_lab4')\n", + "\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "from tensorflow.keras.models import Sequential\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n" + ], + "metadata": { + "id": "mr9IszuQ1ANG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "id": "f0Sa1hdp4hQd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import tensorflow as tf\n", + "device_name = tf.test.gpu_device_name()\n", + "if device_name != '/device:GPU:0':\n", + " raise SystemError('GPU device not found')\n", + "print('Found GPU at: {}'.format(device_name))" + ], + "metadata": { + "id": "o63-lKG_RuNc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# загрузка датасета\n", + "from keras.datasets import imdb\n", + "\n", + "vocabulary_size = 5000\n", + "index_from = 3\n", + "\n", + "(X_train, y_train), (X_test, y_test) = imdb.load_data(\n", + " path=\"imdb.npz\",\n", + " num_words=vocabulary_size,\n", + " skip_top=0,\n", + " maxlen=None,\n", + " seed=3,\n", + " start_char=1,\n", + " oov_char=2,\n", + " index_from=index_from\n", + " )\n", + "\n", + "# вывод размерностей\n", + "print('Shape of X train:', X_train.shape)\n", + "print('Shape of y train:', y_train.shape)\n", + "print('Shape of X test:', X_test.shape)\n", + "print('Shape of y test:', y_test.shape)" + ], + "metadata": { + "id": "Ixw5Sp0_1A-w" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# создание словаря для перевода индексов в слова\n", + "# заргузка словаря \"слово:индекс\"\n", + "word_to_id = imdb.get_word_index()\n", + "# уточнение словаря\n", + "word_to_id = {key:(value + index_from) for key,value in word_to_id.items()}\n", + "word_to_id[\"\"] = 0\n", + "word_to_id[\"\"] = 1\n", + "word_to_id[\"\"] = 2\n", + "word_to_id[\"\"] = 3\n", + "# создание обратного словаря \"индекс:слово\"\n", + "id_to_word = {value:key for key,value in word_to_id.items()}" + ], + "metadata": { + "id": "9W3RklPcZyH0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(X_train[26])\n", + "print('len:',len(X_train[26]))" + ], + "metadata": { + "id": "Nu-Bs1jnaYhB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n", + "print(review_as_text)\n", + "print('len:',len(review_as_text))" + ], + "metadata": { + "id": "JhTwTurtZ6Sp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('MAX Len: ',len(max(X_train, key=len)))\n", + "print('MIN Len: ',len(min(X_train, key=len)))" + ], + "metadata": { + "id": "xJH87ISq1B9h" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# предобработка данных\n", + "from tensorflow.keras.utils import pad_sequences\n", + "max_words = 500\n", + "X_train = pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n", + "X_test = pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')" + ], + "metadata": { + "id": "lrF-B2aScR4t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('MAX Len: ',len(max(X_train, key=len)))\n", + "print('MIN Len: ',len(min(X_train, key=len)))" + ], + "metadata": { + "id": "81Cgq8dn9uL6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(X_train[26])\n", + "print('len:',len(X_train[26]))" + ], + "metadata": { + "id": "vudlgqoCbjU1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n", + "print(review_as_text)\n", + "print('len:',len(review_as_text))" + ], + "metadata": { + "id": "dbfkWjDI1Dp7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# вывод данных\n", + "print('X train: \\n',X_train)\n", + "print('X train: \\n',X_test)\n", + "\n", + "# вывод размерностей\n", + "print('Shape of X train:', X_train.shape)\n", + "print('Shape of X test:', X_test.shape)" + ], + "metadata": { + "id": "7MqcG_wl1EHI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "embed_dim = 32\n", + "lstm_units = 64\n", + "\n", + "model = Sequential()\n", + "model.add(layers.Embedding(input_dim=vocabulary_size, output_dim=embed_dim, input_length=max_words, input_shape=(max_words,)))\n", + "model.add(layers.LSTM(lstm_units))\n", + "model.add(layers.Dropout(0.5))\n", + "model.add(layers.Dense(1, activation='sigmoid'))\n", + "\n", + "model.summary()" + ], + "metadata": { + "id": "ktWEeqWd1EyF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# компилируем и обучаем модель\n", + "batch_size = 64\n", + "epochs = 3\n", + "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n", + "model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)" + ], + "metadata": { + "id": "CuPqKpX0kQfP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "test_loss, test_acc = model.evaluate(X_test, y_test)\n", + "print(f\"\\nTest accuracy: {test_acc}\")" + ], + "metadata": { + "id": "hJIWinxymQjb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#значение метрики качества классификации на тестовых данных\n", + "print(f\"\\nTest accuracy: {test_acc}\")" + ], + "metadata": { + "id": "Rya5ABT8msha" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#отчет о качестве классификации тестовой выборки\n", + "y_score = model.predict(X_test)\n", + "y_pred = [1 if y_score[i,0]>=0.5 else 0 for i in range(len(y_score))]\n", + "\n", + "from sklearn.metrics import classification_report\n", + "print(classification_report(y_test, y_pred, labels = [0, 1], target_names=['Negative', 'Positive']))" + ], + "metadata": { + "id": "2kHjcmnCmv0Y" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#построение ROC-кривой и AUC ROC\n", + "from sklearn.metrics import roc_curve, auc\n", + "\n", + "fpr, tpr, thresholds = roc_curve(y_test, y_score)\n", + "plt.plot(fpr, tpr)\n", + "plt.grid()\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC')\n", + "plt.show()\n", + "print('AUC ROC:', auc(fpr, tpr))" + ], + "metadata": { + "id": "Kp4AQRbcmwAx" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file