Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

1 строка
59 KiB
Plaintext

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","mount_file_id":"1QDNLSNOEh8CzPlfGrXHMYrWq0jmFbU4-","authorship_tag":"ABX9TyMtUFVBwg6CyweIbh55cecX"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","execution_count":20,"metadata":{"collapsed":true,"id":"6ddB3mqgJ6Ma","executionInfo":{"status":"ok","timestamp":1764532393794,"user_tz":-180,"elapsed":24,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}}},"outputs":[],"source":["# импорт модулей\n","import os\n","os.chdir('/content/drive/MyDrive/Colab Notebooks/is_lab4')\n","\n","from tensorflow import keras\n","from tensorflow.keras import layers\n","from tensorflow.keras.models import Sequential\n","import matplotlib.pyplot as plt\n","import numpy as np"]},{"cell_type":"code","source":["import tensorflow as tf\n","device_name = tf.test.gpu_device_name()\n","if device_name != '/device:GPU:0':\n"," raise SystemError('GPU device not found')\n","print('Found GPU at: {}'.format(device_name))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bzZZSexXKpxd","executionInfo":{"status":"ok","timestamp":1764532393812,"user_tz":-180,"elapsed":15,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"d85b6fcb-3ab8-43c2-e36e-27c42dccbd47"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["Found GPU at: /device:GPU:0\n"]}]},{"cell_type":"code","source":["# загрузка датасета\n","from keras.datasets import imdb\n","\n","vocabulary_size = 5000\n","index_from = 3\n","\n","(X_train, y_train), (X_test, y_test) = imdb.load_data(\n"," path=\"imdb.npz\",\n"," num_words=vocabulary_size,\n"," skip_top=0,\n"," maxlen=None,\n"," seed=11,\n"," start_char=1,\n"," oov_char=2,\n"," index_from=index_from\n"," )\n","\n","# вывод размерностей\n","print('Shape of X train:', X_train.shape)\n","print('Shape of y train:', y_train.shape)\n","print('Shape of X test:', X_test.shape)\n","print('Shape of y test:', y_test.shape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6_ALNo-gK6rl","executionInfo":{"status":"ok","timestamp":1764532397478,"user_tz":-180,"elapsed":3664,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"48eea1b2-11b0-40ac-a500-ee949aa0db19"},"execution_count":22,"outputs":[{"output_type":"stream","name":"stdout","text":["Shape of X train: (25000,)\n","Shape of y train: (25000,)\n","Shape of X test: (25000,)\n","Shape of y test: (25000,)\n"]}]},{"cell_type":"code","source":["# создание словаря для перевода индексов в слова\n","# заргузка словаря \"слово:индекс\"\n","word_to_id = imdb.get_word_index()\n","# уточнение словаря\n","word_to_id = {key:(value + index_from) for key,value in word_to_id.items()}\n","word_to_id[\"<PAD>\"] = 0\n","word_to_id[\"<START>\"] = 1\n","word_to_id[\"<UNK>\"] = 2\n","word_to_id[\"<UNUSED>\"] = 3\n","# создание обратного словаря \"индекс:слово\"\n","id_to_word = {value:key for key,value in word_to_id.items()}"],"metadata":{"id":"Mbxh8wEbLGg5","executionInfo":{"status":"ok","timestamp":1764532397598,"user_tz":-180,"elapsed":117,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}}},"execution_count":23,"outputs":[]},{"cell_type":"code","source":["print(X_train[26])\n","print('len:',len(X_train[26]))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"VUwU74JwLKZB","executionInfo":{"status":"ok","timestamp":1764532397613,"user_tz":-180,"elapsed":11,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"216a791d-c3b5-43b5-ab34-2a2ae7a1a64b"},"execution_count":24,"outputs":[{"output_type":"stream","name":"stdout","text":["[1, 2489, 723, 2, 9, 399, 2301, 11, 551, 2, 29, 47, 1391, 6, 1692, 15, 29, 70, 361, 8, 97, 35, 3258, 40, 6, 2, 106, 42, 2, 4298, 64, 8, 28, 15, 3258, 796, 2, 11, 6, 275, 1622, 21, 50, 26, 148, 33, 27, 2301, 2, 15, 81, 24, 40, 42, 2, 7, 27, 4646, 5, 80, 81, 845, 12, 304, 8, 67, 15, 29, 152, 3115, 103, 6, 1196, 2, 15, 238, 28, 1894, 27, 2, 2489, 2, 1068, 8, 2181, 27, 1692, 23, 309, 17, 873, 183, 140, 2357, 355, 5, 29, 9, 2, 83, 6, 2699, 1765, 2, 625, 2691, 1229, 80, 516, 10, 10, 11, 2, 279, 12, 286, 141, 6, 52, 326, 8, 796, 106, 4, 2, 132, 11, 4, 172, 1269, 13, 296, 4, 2223, 994, 7, 4, 2223, 5, 3176, 7, 4, 2223, 50, 186, 8, 30, 64, 38, 111, 102, 44, 551, 2, 5, 4, 4616, 3388, 302, 12, 70, 28, 23, 4, 406, 648, 15, 31, 415, 144, 30, 93, 8, 4325, 11, 6, 289, 42, 689, 251, 810, 146, 24, 252, 51, 148, 1893, 18, 4, 20, 1029, 17, 68, 2436, 819, 18, 4, 2, 132, 21, 76, 7, 12, 9, 38, 729, 8, 4, 2223, 102, 15, 12, 566, 30, 2691, 2, 190, 4, 2, 132, 218, 60, 754, 17, 52, 17, 4, 249, 7, 4, 2223, 2355, 10, 10, 1371, 112, 1905, 4981, 4, 2, 132, 47, 450, 85, 712, 15, 66, 1487, 4, 3129, 7, 4, 20, 6, 194, 1834, 13, 28, 9, 19, 2, 2, 11, 4, 485, 240, 141, 6, 2, 1995, 15, 24, 64, 81, 13, 24, 459, 44, 27, 2073, 13, 165, 3663, 18, 12, 696, 177, 1066, 1083, 2, 5, 2, 1602, 26, 220, 17, 78, 507, 38, 1904, 5, 753, 36, 983, 551, 11, 192, 225, 55, 117, 8, 79, 2229, 44, 137, 149, 4, 2, 132, 4, 816, 475, 24, 55, 906, 4, 168, 475, 13, 62, 1634, 76, 7, 12, 17, 2, 4, 114, 475, 727, 4, 206, 475, 50, 218, 101, 444, 14, 9, 31, 8, 798, 10, 10, 2994, 13, 296, 4, 2, 132, 2864, 6, 1039, 7, 4, 736, 1067, 750, 2, 390, 163, 538, 137, 24, 35, 1557, 55, 400, 4, 2, 4, 20, 475, 4, 128, 4, 3179, 2, 4, 493, 569, 220, 32, 7, 68, 3734, 19, 4, 2, 132, 637, 202, 12, 6, 55, 2, 470, 457, 23, 61, 3179, 675, 2407]\n","len: 413\n"]}]},{"cell_type":"code","source":["review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n","print(review_as_text)\n","print('len:',len(review_as_text))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TUZVVH-kLP3F","executionInfo":{"status":"ok","timestamp":1764532397648,"user_tz":-180,"elapsed":33,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"3347ef8e-330f-4e33-95a8-ad28cfd8ab3d"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["<START> professor paul <UNK> is doing research in matter <UNK> he has developed a machine that he can use to make an object like a <UNK> watch or <UNK> disappear only to have that object re <UNK> in a different location but there are those at his research <UNK> that do not like or <UNK> of his experiments and will do whatever it takes to see that he doesn't succeed after a failed <UNK> that might have saved his <UNK> professor <UNK> decides to test his machine on himself as expected things go horribly wrong and he is <UNK> into a heavily scared <UNK> whose mere touch will kill br br in <UNK> maybe it wasn't such a good idea to re watch the <UNK> man in the same week i watched the fly return of the fly and curse of the fly there seems to be only so many movies about matter <UNK> and the potentially horrendous effects it can have on the human body that one person should be made to endure in a three or four day period i'm not sure what those responsible for the movie list as their source material for the <UNK> man but much of it is so similar to the fly movies that it cannot be mere <UNK> however the <UNK> man isn't even nearly as good as the worst of the fly trilogy br br besides being terribly unoriginal the <UNK> man has several other problems that really hurt the enjoyment of the movie a big issue i have is with <UNK> <UNK> in the lead he's such a <UNK> ass that not only do i not care about his suffering i actually root for it supporting cast members mary <UNK> and <UNK> allen are almost as bad they're so bland and dull they hardly matter in fact there's very little to get excited about while watching the <UNK> man the soundtrack – not very memorable the look – i would describe much of it as <UNK> the plot – predictable the action – there isn't any overall this is one to avoid br br fortunately i watched the <UNK> man via a copy of the mystery science theater <UNK> episode funny stuff while not an absolute very often the <UNK> the movie – the better the mst3k <UNK> the guys hit almost all of their marks with the <UNK> man i'll give it a very <UNK> 4 5 on my mst3k rating scale\n","len: 2113\n"]}]},{"cell_type":"code","source":["print('MAX Len: ',len(max(X_train, key=len)))\n","print('MIN Len: ',len(min(X_train, key=len)))\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vXoobHZwLRpv","executionInfo":{"status":"ok","timestamp":1764532397662,"user_tz":-180,"elapsed":13,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"d5dad9ab-2639-44f7-8e4c-2f15ca8dc8fd"},"execution_count":26,"outputs":[{"output_type":"stream","name":"stdout","text":["MAX Len: 2494\n","MIN Len: 11\n"]}]},{"cell_type":"code","source":["# предобработка данных\n","from tensorflow.keras.utils import pad_sequences\n","max_words = 500\n","X_train = pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n","X_test = pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"],"metadata":{"id":"i96YT3MjLUdh","executionInfo":{"status":"ok","timestamp":1764532398524,"user_tz":-180,"elapsed":860,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}}},"execution_count":27,"outputs":[]},{"cell_type":"code","source":["print('MAX Len: ',len(max(X_train, key=len)))\n","print('MIN Len: ',len(min(X_train, key=len)))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fwnL3SflLbdc","executionInfo":{"status":"ok","timestamp":1764532398600,"user_tz":-180,"elapsed":63,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"e536562f-bb65-4349-9a37-8e908b8ea51c"},"execution_count":28,"outputs":[{"output_type":"stream","name":"stdout","text":["MAX Len: 500\n","MIN Len: 500\n"]}]},{"cell_type":"code","source":["print(X_train[26])\n","print('len:',len(X_train[26]))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"d7FdCsEyLfV0","executionInfo":{"status":"ok","timestamp":1764532398643,"user_tz":-180,"elapsed":21,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"e6247a79-d731-4450-ff1d-3589c90f48b6"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 1 2489 723 2 9 399 2301 11 551 2 29\n"," 47 1391 6 1692 15 29 70 361 8 97 35 3258 40 6\n"," 2 106 42 2 4298 64 8 28 15 3258 796 2 11 6\n"," 275 1622 21 50 26 148 33 27 2301 2 15 81 24 40\n"," 42 2 7 27 4646 5 80 81 845 12 304 8 67 15\n"," 29 152 3115 103 6 1196 2 15 238 28 1894 27 2 2489\n"," 2 1068 8 2181 27 1692 23 309 17 873 183 140 2357 355\n"," 5 29 9 2 83 6 2699 1765 2 625 2691 1229 80 516\n"," 10 10 11 2 279 12 286 141 6 52 326 8 796 106\n"," 4 2 132 11 4 172 1269 13 296 4 2223 994 7 4\n"," 2223 5 3176 7 4 2223 50 186 8 30 64 38 111 102\n"," 44 551 2 5 4 4616 3388 302 12 70 28 23 4 406\n"," 648 15 31 415 144 30 93 8 4325 11 6 289 42 689\n"," 251 810 146 24 252 51 148 1893 18 4 20 1029 17 68\n"," 2436 819 18 4 2 132 21 76 7 12 9 38 729 8\n"," 4 2223 102 15 12 566 30 2691 2 190 4 2 132 218\n"," 60 754 17 52 17 4 249 7 4 2223 2355 10 10 1371\n"," 112 1905 4981 4 2 132 47 450 85 712 15 66 1487 4\n"," 3129 7 4 20 6 194 1834 13 28 9 19 2 2 11\n"," 4 485 240 141 6 2 1995 15 24 64 81 13 24 459\n"," 44 27 2073 13 165 3663 18 12 696 177 1066 1083 2 5\n"," 2 1602 26 220 17 78 507 38 1904 5 753 36 983 551\n"," 11 192 225 55 117 8 79 2229 44 137 149 4 2 132\n"," 4 816 475 24 55 906 4 168 475 13 62 1634 76 7\n"," 12 17 2 4 114 475 727 4 206 475 50 218 101 444\n"," 14 9 31 8 798 10 10 2994 13 296 4 2 132 2864\n"," 6 1039 7 4 736 1067 750 2 390 163 538 137 24 35\n"," 1557 55 400 4 2 4 20 475 4 128 4 3179 2 4\n"," 493 569 220 32 7 68 3734 19 4 2 132 637 202 12\n"," 6 55 2 470 457 23 61 3179 675 2407]\n","len: 500\n"]}]},{"cell_type":"code","source":["review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n","print(review_as_text)\n","print('len:',len(review_as_text))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CA5ZwsXyLh5C","executionInfo":{"status":"ok","timestamp":1764532398734,"user_tz":-180,"elapsed":103,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"1d6fdfaa-9006-45f7-fac8-e42f51913896"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stdout","textprofessor paul <UNK> is doing research in matter <UNK> he has developed a machine that he can use to make an object like a <UNK> watch or <UNK> disappear only to have that object re <UNK> in a different location but there are those at his research <UNK> that do not like or <UNK> of his experiments and will do whatever it takes to see that he doesn't succeed after a failed <UNK> that might have saved his <UNK> professor <UNK> decides to test his machine on himself as expected things go horribly wrong and he is <UNK> into a heavily scared <UNK> whose mere touch will kill br br in <UNK> maybe it wasn't such a good idea to re watch the <UNK> man in the same week i watched the fly return of the fly and curse of the fly there seems to be only so many movies about matter <UNK> and the potentially horrendous effects it can have on the human body that one person should be made to endure in a three or four day period i'm not sure what those responsible for the movie list as their source material for the <UNK> man but much of it is so similar to the fly movies that it cannot be mere <UNK> however the <UNK> man isn't even nearly as good as the worst of the fly trilogy br br besides being terribly unoriginal the <UNK> man has several other problems that really hurt the enjoyment of the movie a big issue i have is with <UNK> <UNK> in the lead he's such a <UNK> ass that not only do i not care about his suffering i actually root for it supporting cast members mary <UNK> and <UNK> allen are almost as bad they're so bland and dull they hardly matter in fact there's very little to get excited about while watching the <UNK> man the soundtrack – not very memorable the look – i would describe much of it as <UNK> the plot – predictable the action – there isn't any overall this is one to avoid br br fortunately i watched the <UNK> man via a copy of the mystery science theater <UNK> episode funny stuff while not an absolute very often the <UNK> the movie – the better the mst3k <UNK> the guys hit almost all of their marks with the <UNK> man i'll give it a very <UNK> 4 5 on my mst3k rating scale\n","len: 2635\n"]}]},{"cell_type":"code","source":["# вывод данных\n","print('X train: \\n',X_train)\n","print('X train: \\n',X_test)\n","\n","# вывод размерностей\n","print('Shape of X train:', X_train.shape)\n","print('Shape of X test:', X_test.shape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JZ5nEa1ILliE","executionInfo":{"status":"ok","timestamp":1764532398735,"user_tz":-180,"elapsed":89,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"4a7a5662-c91e-4fa6-b57c-1f9ab90a8afd"},"execution_count":31,"outputs":[{"output_type":"stream","name":"stdout","text":["X train: \n"," [[ 0 0 0 ... 6 2 2]\n"," [ 0 0 0 ... 10 10 2]\n"," [ 1 14 22 ... 171 153 303]\n"," ...\n"," [ 0 0 0 ... 17 2199 1262]\n"," [ 0 0 0 ... 606 5 1356]\n"," [ 0 0 0 ... 1026 5 804]]\n","X train: \n"," [[ 0 0 0 ... 10 10 2]\n"," [ 0 0 0 ... 43 1044 710]\n"," [ 0 0 0 ... 35 744 23]\n"," ...\n"," [ 0 0 0 ... 184 1543 616]\n"," [ 0 0 0 ... 38 2 78]\n"," [ 0 0 0 ... 5 2 2]]\n","Shape of X train: (25000, 500)\n","Shape of X test: (25000, 500)\n"]}]},{"cell_type":"code","source":["embed_dim = 32\n","lstm_units = 64\n","\n","model = Sequential()\n","model.add(layers.Embedding(input_dim=vocabulary_size, output_dim=embed_dim, input_length=max_words, input_shape=(max_words,)))\n","model.add(layers.LSTM(lstm_units))\n","model.add(layers.Dropout(0.5))\n","model.add(layers.Dense(1, activation='sigmoid'))\n","\n","model.summary()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":346},"id":"kkrjirH4Lnuz","executionInfo":{"status":"ok","timestamp":1764532398747,"user_tz":-180,"elapsed":67,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"092339c6-e0f6-4a1f-e151-5a696541160a"},"execution_count":32,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n"," warnings.warn(\n","/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/embedding.py:100: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n"," super().__init__(**kwargs)\n"]},{"output_type":"display_data","data":{"text/plain":["\u001b[1mModel: \"sequential_1\"\u001b[0m\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_1\"</span>\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n","┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m500\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m160,000\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ lstm_1 (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m24,832\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m65\u001b[0m │\n","└─────────────────────────────────┴────────────────────────┴───────────────┘\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n","┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","│ embedding_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">500</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">160,000</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ lstm_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">24,832</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dropout_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">65</span> │\n","└─────────────────────────────────┴────────────────────────┴───────────────┘\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Total params: \u001b[0m\u001b[38;5;34m184,897\u001b[0m (722.25 KB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">184,897</span> (722.25 KB)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m184,897\u001b[0m (722.25 KB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">184,897</span> (722.25 KB)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n","</pre>\n"]},"metadata":{}}]},{"cell_type":"code","source":["# компилируем и обучаем модель\n","batch_size = 64\n","epochs = 3\n","model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n","model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xnVLvP8dLvQw","executionInfo":{"status":"ok","timestamp":1764532422590,"user_tz":-180,"elapsed":23842,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"99024fc2-8eed-4624-c0c0-e507221cccbd"},"execution_count":33,"outputs":[{"output_type":"stream","name":"stdout","text":["Epoch 1/3\n","\u001b[1m313/313\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 25ms/step - accuracy: 0.6482 - loss: 0.6264 - val_accuracy: 0.8446 - val_loss: 0.3654\n","Epoch 2/3\n","\u001b[1m313/313\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 23ms/step - accuracy: 0.8459 - loss: 0.3677 - val_accuracy: 0.7632 - val_loss: 0.4948\n","Epoch 3/3\n","\u001b[1m313/313\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 22ms/step - accuracy: 0.8515 - loss: 0.3613 - val_accuracy: 0.8700 - val_loss: 0.3384\n"]},{"output_type":"execute_result","data":{"text/plain":["<keras.src.callbacks.history.History at 0x7a8e6bf1c350>"]},"metadata":{},"execution_count":33}]},{"cell_type":"code","source":["test_loss, test_acc = model.evaluate(X_test, y_test)\n","print(f\"\\nTest accuracy: {test_acc}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-c7bIgHhLy44","executionInfo":{"status":"ok","timestamp":1764532430821,"user_tz":-180,"elapsed":8228,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"58ebb0dd-b359-4022-d7e1-150e86192dea"},"execution_count":34,"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 10ms/step - accuracy: 0.8629 - loss: 0.3494\n","\n","Test accuracy: 0.8626800179481506\n"]}]},{"cell_type":"code","source":["#значение метрики качества классификации на тестовых данных\n","print(f\"\\nTest accuracy: {test_acc}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"AdPKXQ8RMBC8","executionInfo":{"status":"ok","timestamp":1764532430860,"user_tz":-180,"elapsed":36,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"8fb7fc5d-fda4-4bb2-90d6-80a68f68ce0d"},"execution_count":35,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","Test accuracy: 0.8626800179481506\n"]}]},{"cell_type":"code","source":["#отчет о качестве классификации тестовой выборки\n","y_score = model.predict(X_test)\n","y_pred = [1 if y_score[i,0]>=0.5 else 0 for i in range(len(y_score))]\n","\n","from sklearn.metrics import classification_report\n","print(classification_report(y_test, y_pred, labels = [0, 1], target_names=['Negative', 'Positive']))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5cuHltNNMDwl","executionInfo":{"status":"ok","timestamp":1764532437250,"user_tz":-180,"elapsed":6388,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"ff4ade8b-c58d-43a4-943a-720f4d459ce1"},"execution_count":36,"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 8ms/step\n"," precision recall f1-score support\n","\n"," Negative 0.87 0.85 0.86 12500\n"," Positive 0.85 0.87 0.86 12500\n","\n"," accuracy 0.86 25000\n"," macro avg 0.86 0.86 0.86 25000\n","weighted avg 0.86 0.86 0.86 25000\n","\n"]}]},{"cell_type":"code","source":["#построение ROC-кривой и AUC ROC\n","from sklearn.metrics import roc_curve, auc\n","\n","fpr, tpr, thresholds = roc_curve(y_test, y_score)\n","plt.plot(fpr, tpr)\n","plt.grid()\n","plt.xlabel('False Positive Rate')\n","plt.ylabel('True Positive Rate')\n","plt.title('ROC')\n","plt.show()\n","print('AUC ROC:', auc(fpr, tpr))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":489},"id":"xWBS6H9-MIp0","executionInfo":{"status":"ok","timestamp":1764532437410,"user_tz":-180,"elapsed":156,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}},"outputId":"7777e5b9-354f-4aa5-8147-ea6a0af75306"},"execution_count":37,"outputs":[{"output_type":"display_data","data":{"text/plain":["<Figure size 640x480 with 1 Axes>"],"image/png":"\n"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["AUC ROC: 0.9349818848\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"OsxJzr8IMOuQ","executionInfo":{"status":"ok","timestamp":1764532437418,"user_tz":-180,"elapsed":4,"user":{"displayName":"Александр Ли","userId":"11169366018002229978"}}},"execution_count":37,"outputs":[]}]}