diff --git a/labworks/LW4/is_lab4 clean.ipynb b/labworks/LW4/is_lab4 clean.ipynb new file mode 100644 index 0000000..617c7ba --- /dev/null +++ b/labworks/LW4/is_lab4 clean.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","mount_file_id":"1QDNLSNOEh8CzPlfGrXHMYrWq0jmFbU4-","authorship_tag":"ABX9TyMtUFVBwg6CyweIbh55cecX"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"collapsed":true,"id":"6ddB3mqgJ6Ma"},"outputs":[],"source":["# импорт модулей\n","import os\n","os.chdir('/content/drive/MyDrive/Colab Notebooks/is_lab4')\n","\n","from tensorflow import keras\n","from tensorflow.keras import layers\n","from tensorflow.keras.models import Sequential\n","import matplotlib.pyplot as plt\n","import numpy as np"]},{"cell_type":"code","source":["import tensorflow as tf\n","device_name = tf.test.gpu_device_name()\n","if device_name != '/device:GPU:0':\n"," raise SystemError('GPU device not found')\n","print('Found GPU at: {}'.format(device_name))"],"metadata":{"id":"bzZZSexXKpxd"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# загрузка датасета\n","from keras.datasets import imdb\n","\n","vocabulary_size = 5000\n","index_from = 3\n","\n","(X_train, y_train), (X_test, y_test) = imdb.load_data(\n"," path=\"imdb.npz\",\n"," num_words=vocabulary_size,\n"," skip_top=0,\n"," maxlen=None,\n"," seed=11,\n"," start_char=1,\n"," oov_char=2,\n"," index_from=index_from\n"," )\n","\n","# вывод размерностей\n","print('Shape of X train:', X_train.shape)\n","print('Shape of y train:', y_train.shape)\n","print('Shape of X test:', X_test.shape)\n","print('Shape of y test:', y_test.shape)"],"metadata":{"id":"6_ALNo-gK6rl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# создание словаря для перевода индексов в слова\n","# заргузка словаря \"слово:индекс\"\n","word_to_id = imdb.get_word_index()\n","# уточнение словаря\n","word_to_id = {key:(value + index_from) for key,value in word_to_id.items()}\n","word_to_id[\"\"] = 0\n","word_to_id[\"\"] = 1\n","word_to_id[\"\"] = 2\n","word_to_id[\"\"] = 3\n","# создание обратного словаря \"индекс:слово\"\n","id_to_word = {value:key for key,value in word_to_id.items()}"],"metadata":{"id":"Mbxh8wEbLGg5"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(X_train[26])\n","print('len:',len(X_train[26]))"],"metadata":{"id":"VUwU74JwLKZB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n","print(review_as_text)\n","print('len:',len(review_as_text))"],"metadata":{"id":"TUZVVH-kLP3F"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print('MAX Len: ',len(max(X_train, key=len)))\n","print('MIN Len: ',len(min(X_train, key=len)))\n"],"metadata":{"id":"vXoobHZwLRpv"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# предобработка данных\n","from tensorflow.keras.utils import pad_sequences\n","max_words = 500\n","X_train = pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n","X_test = pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"],"metadata":{"id":"i96YT3MjLUdh"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print('MAX Len: ',len(max(X_train, key=len)))\n","print('MIN Len: ',len(min(X_train, key=len)))"],"metadata":{"id":"fwnL3SflLbdc"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(X_train[26])\n","print('len:',len(X_train[26]))"],"metadata":{"id":"d7FdCsEyLfV0"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n","print(review_as_text)\n","print('len:',len(review_as_text))"],"metadata":{"id":"CA5ZwsXyLh5C"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# вывод данных\n","print('X train: \\n',X_train)\n","print('X train: \\n',X_test)\n","\n","# вывод размерностей\n","print('Shape of X train:', X_train.shape)\n","print('Shape of X test:', X_test.shape)"],"metadata":{"id":"JZ5nEa1ILliE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["embed_dim = 32\n","lstm_units = 64\n","\n","model = Sequential()\n","model.add(layers.Embedding(input_dim=vocabulary_size, output_dim=embed_dim, input_length=max_words, input_shape=(max_words,)))\n","model.add(layers.LSTM(lstm_units))\n","model.add(layers.Dropout(0.5))\n","model.add(layers.Dense(1, activation='sigmoid'))\n","\n","model.summary()"],"metadata":{"id":"kkrjirH4Lnuz"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# компилируем и обучаем модель\n","batch_size = 64\n","epochs = 3\n","model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n","model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)"],"metadata":{"id":"xnVLvP8dLvQw"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["test_loss, test_acc = model.evaluate(X_test, y_test)\n","print(f\"\\nTest accuracy: {test_acc}\")"],"metadata":{"id":"-c7bIgHhLy44"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#значение метрики качества классификации на тестовых данных\n","print(f\"\\nTest accuracy: {test_acc}\")"],"metadata":{"id":"AdPKXQ8RMBC8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#отчет о качестве классификации тестовой выборки\n","y_score = model.predict(X_test)\n","y_pred = [1 if y_score[i,0]>=0.5 else 0 for i in range(len(y_score))]\n","\n","from sklearn.metrics import classification_report\n","print(classification_report(y_test, y_pred, labels = [0, 1], target_names=['Negative', 'Positive']))"],"metadata":{"id":"5cuHltNNMDwl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#построение ROC-кривой и AUC ROC\n","from sklearn.metrics import roc_curve, auc\n","\n","fpr, tpr, thresholds = roc_curve(y_test, y_score)\n","plt.plot(fpr, tpr)\n","plt.grid()\n","plt.xlabel('False Positive Rate')\n","plt.ylabel('True Positive Rate')\n","plt.title('ROC')\n","plt.show()\n","print('AUC ROC:', auc(fpr, tpr))"],"metadata":{"id":"xWBS6H9-MIp0"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"OsxJzr8IMOuQ"},"execution_count":null,"outputs":[]}]} \ No newline at end of file