Сommit
2776de8a71
@ -0,0 +1,159 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"#device_name=tf.test.gpu_device_name()\n",
|
||||||
|
"#if device_name != '/device:GPU:0':\n",
|
||||||
|
" #raise SystemError('GPUdevicenotfound')\n",
|
||||||
|
"#print('FoundGPUat:{}'.format(device_name))\n",
|
||||||
|
"#загрузкадатасета\n",
|
||||||
|
"from keras.datasets import imdb\n",
|
||||||
|
"vocabulary_size=5000\n",
|
||||||
|
"index_from=3\n",
|
||||||
|
"(X_train,y_train),(X_test,y_test)=imdb.load_data(path=\"imdb.npz\",\n",
|
||||||
|
" num_words=vocabulary_size,\n",
|
||||||
|
" skip_top=0,\n",
|
||||||
|
" maxlen=None,\n",
|
||||||
|
" seed=4*8 - 1,\n",
|
||||||
|
" start_char=1,\n",
|
||||||
|
" oov_char=2,\n",
|
||||||
|
" index_from=index_from)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(X_train.shape)\n",
|
||||||
|
"print(y_train.shape)\n",
|
||||||
|
"print(X_test.shape)\n",
|
||||||
|
"print(y_test.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"word_to_id=imdb.get_word_index()\n",
|
||||||
|
"#уточнениесловаря\n",
|
||||||
|
"word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n",
|
||||||
|
"word_to_id[\"<PAD>\"]=0\n",
|
||||||
|
"word_to_id[\"<START>\"]=1\n",
|
||||||
|
"word_to_id[\"<UNK>\"]=2\n",
|
||||||
|
"word_to_id[\"<UNUSED>\"]=3\n",
|
||||||
|
"#созданиеобратногословаря\"индекс:слово\"\n",
|
||||||
|
"id_to_word={value:key for key,value in word_to_id.items()}\n",
|
||||||
|
"some_number=4*8-1\n",
|
||||||
|
"review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n",
|
||||||
|
"print(X_train[some_number])\n",
|
||||||
|
"print(review_as_text)\n",
|
||||||
|
"print(len(X_train[some_number]))\n",
|
||||||
|
"if y_train[some_number] == 1:\n",
|
||||||
|
" class_label='Positive'\n",
|
||||||
|
"else:\n",
|
||||||
|
" class_label='Negative'\n",
|
||||||
|
"print('Review class:', y_train[some_number], f'({class_label})')\n",
|
||||||
|
"print(len(max(X_train, key=len)))\n",
|
||||||
|
"print(len(min(X_train, key=len)))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"from keras.preprocessing import sequence\n",
|
||||||
|
"max_words=500\n",
|
||||||
|
"X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
|
||||||
|
"X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from keras.preprocessing import sequence\n",
|
||||||
|
"max_words=500\n",
|
||||||
|
"X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
|
||||||
|
"X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"word_to_id=imdb.get_word_index()\n",
|
||||||
|
"#уточнениесловаря\n",
|
||||||
|
"word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n",
|
||||||
|
"word_to_id[\"<PAD>\"]=0\n",
|
||||||
|
"word_to_id[\"<START>\"]=1\n",
|
||||||
|
"word_to_id[\"<UNK>\"]=2\n",
|
||||||
|
"word_to_id[\"<UNUSED>\"]=3\n",
|
||||||
|
"#созданиеобратногословаря\"индекс:слово\"\n",
|
||||||
|
"id_to_word={value:key for key,value in word_to_id.items()}\n",
|
||||||
|
"some_number=4*8-1\n",
|
||||||
|
"review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n",
|
||||||
|
"print(X_train[some_number])\n",
|
||||||
|
"print(review_as_text)\n",
|
||||||
|
"print(len(X_train[some_number]))\n",
|
||||||
|
"if y_train[some_number] == 1:\n",
|
||||||
|
" class_label='Positive'\n",
|
||||||
|
"else:\n",
|
||||||
|
" class_label='Negative'\n",
|
||||||
|
"print('Review class:', y_train[some_number], f'({class_label})')\n",
|
||||||
|
"print(len(max(X_train, key=len)))\n",
|
||||||
|
"print(len(min(X_train, key=len)))\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"x_train: \", X_train)\n",
|
||||||
|
"print(X_train.shape)\n",
|
||||||
|
"print(\"x_test: \", X_test)\n",
|
||||||
|
"print(X_test.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from keras.models import Sequential\n",
|
||||||
|
"from keras.layers import Embedding, LSTM, Dropout, Dense\n",
|
||||||
|
"\n",
|
||||||
|
"model=Sequential()\n",
|
||||||
|
"model.add(Embedding(input_dim=len(word_to_id), output_dim=32, input_length=500))\n",
|
||||||
|
"model.add(LSTM(units=90))\n",
|
||||||
|
"model.add(Dropout(rate=0.4))\n",
|
||||||
|
"model.add(Dense(units=1, activation='sigmoid'))\n",
|
||||||
|
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
||||||
|
"H=model.fit(X_train, y_train, validation_split=0.1, epochs=5, batch_size=32)\n",
|
||||||
|
"print(model.summary())\n",
|
||||||
|
"scores=model.evaluate(X_test, y_test)\n",
|
||||||
|
"print(\"LOss: \", scores[0])\n",
|
||||||
|
"print(\"Accuracy: \", scores[1])\n",
|
||||||
|
"test_result=model.predict(X_test)\n",
|
||||||
|
"predicted_labels=[1 if test_result[i,0]>=0.5 else 0 for i in range(len(test_result))]\n",
|
||||||
|
"from sklearn.metrics import classification_report\n",
|
||||||
|
"print(classification_report(y_test, predicted_labels, labels=[0,1], target_names=[\"Negative\", \"Positive\"]))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@ -0,0 +1,244 @@
|
|||||||
|
**ЛАБОРАТОРНАЯ РАБОТА №4 «Распознавание последовательностей»**
|
||||||
|
А-02-22 бригада №8 Левшенко Д.И., Новиков Д. М., Шестов Д.Н
|
||||||
|
|
||||||
|
**2)Загрузить набор данных IMDb, содержащий оцифрованные отзывы на фильмы, размеченные на два класса: позитивные и негативные. При загрузке набора данных параметр seed выбрать равным (4k – 1), где k – номер бригады. Вывести размеры полученных обучающих и тестовых массивов данных.**
|
||||||
|
```py
|
||||||
|
import tensorflow as tf
|
||||||
|
from keras.datasets import imdb
|
||||||
|
vocabulary_size=5000
|
||||||
|
index_from=3
|
||||||
|
(X_train,y_train),(X_test,y_test)=imdb.load_data(path="imdb.npz",
|
||||||
|
num_words=vocabulary_size,
|
||||||
|
skip_top=0,
|
||||||
|
maxlen=None,
|
||||||
|
seed=4*8 - 1,
|
||||||
|
start_char=1,
|
||||||
|
oov_char=2,
|
||||||
|
index_from=index_from)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
```py
|
||||||
|
print(X_train.shape)
|
||||||
|
print(y_train.shape)
|
||||||
|
print(X_test.shape)
|
||||||
|
print(y_test.shape)
|
||||||
|
|
||||||
|
(25000,)
|
||||||
|
(25000,)
|
||||||
|
(25000,)
|
||||||
|
(25000,)
|
||||||
|
```
|
||||||
|
|
||||||
|
**3)Вывести один отзыв из обучающего множества в виде списка индексов слов. Преобразовать список индексов в текст и вывести отзыв в виде текста. Вывести длину отзыва. Вывести метку класса данного отзыва и название класса (1 – Positive, 0 – Negative).**
|
||||||
|
```py
|
||||||
|
word_to_id=imdb.get_word_index()
|
||||||
|
word_to_id={key:(value + index_from) for key,value in word_to_id.items()}
|
||||||
|
word_to_id["<PAD>"]=0
|
||||||
|
word_to_id["<START>"]=1
|
||||||
|
word_to_id["<UNK>"]=2
|
||||||
|
word_to_id["<UNUSED>"]=3
|
||||||
|
|
||||||
|
id_to_word={value:key for key,value in word_to_id.items()}
|
||||||
|
some_number=4*8-1
|
||||||
|
review_as_text=''.join(id_to_word[id] for id in X_train[some_number])
|
||||||
|
print(X_train[some_number])
|
||||||
|
print(review_as_text)
|
||||||
|
print(len(X_train[some_number]))
|
||||||
|
if y_train[some_number] == 1:
|
||||||
|
class_label='Positive'
|
||||||
|
else:
|
||||||
|
class_label='Negative'
|
||||||
|
|
||||||
|
|
||||||
|
[1, 4, 2112, 512, 9, 150, 6, 4737, 875, 31, 15, 9, 99, 400, 2, 8, 2111, 11, 2, 4, 201, 9, 6, 2, 7, 960, 1807, 15, 28, 77, 2, 11, 45, 512, 2670, 4, 927, 28, 4677, 725, 14, 3279, 34, 1855, 6, 1882, 63, 47, 77, 2, 8, 12, 4, 2, 9, 35, 1711, 823, 4296, 15, 2, 45, 1500, 19, 1987, 1137, 15, 9, 2, 19, 1302, 2, 486, 5, 2, 567, 4, 1317, 2311, 1223, 2, 9, 2, 17, 6, 2, 831, 2, 7, 1092, 5, 1515, 1234, 34, 27, 1051, 190, 1223, 9, 7, 107, 2, 31, 63, 9, 2, 137, 4, 85, 9, 2, 19, 3237, 5, 2, 19, 46, 101, 2, 42, 2, 13, 131, 2, 264, 15, 4, 2, 47, 4, 1885, 3137, 177, 7, 1136, 1757, 32, 183, 1192, 13, 100, 97, 35, 3761, 2590, 23, 4, 201, 21, 13, 528, 48, 126, 50, 9, 6, 1114, 2, 11, 4564, 2, 1787, 18, 134, 2, 2, 2, 2, 1711, 2, 5, 4, 2, 2, 2, 80, 30, 4783, 208, 145, 33, 25]
|
||||||
|
<START>thegangstergenreisnowawornsubjectonethatistoooften<UNK>toparodyin<UNK>theseriesisa<UNK>ofpreviousclichésthathavebeen<UNK>init'sgenrethankfullythewritershaveadvanceduponthisflawbycreatingarealismwhichhasbeen<UNK>toitthe<UNK>isanepiccrimesagathat<UNK>it'scontentwithpsychologicaldepththatis<UNK>withsubtle<UNK>humorand<UNK>violencethekeyprotagonisttony<UNK>is<UNK>asa<UNK>general<UNK>offearandmoralvaluesbyhiscrewhowevertonyisoftwo<UNK>onewhichis<UNK>whiletheotheris<UNK>withguiltand<UNK>withoutany<UNK>or<UNK>istill<UNK>believethatthe<UNK>hasthefinestensemblecastofrecentmemoryallthingsconsideredicouldmakeanelaboratestatementontheseriesbutiwon'tifeverthereisavisual<UNK>inglobal<UNK>searchforthese<UNK><UNK><UNK><UNK>epic<UNK>andthe<UNK><UNK><UNK>willbesmilingrightbackatyou
|
||||||
|
182
|
||||||
|
```
|
||||||
|
|
||||||
|
**4)Вывести максимальную и минимальную длину отзыва в обучающем множестве.**
|
||||||
|
```py
|
||||||
|
print('Review class:', y_train[some_number], f'({class_label})')
|
||||||
|
print(len(max(X_train, key=len)))
|
||||||
|
print(len(min(X_train, key=len)))
|
||||||
|
|
||||||
|
Review class: 1 (Positive)
|
||||||
|
2494
|
||||||
|
11
|
||||||
|
```
|
||||||
|
|
||||||
|
**5)Провести предобработку данных. Выбрать единую длину, к которой будут приведены все отзывы. Короткие отзывы дополнить спецсимволами, а длинные обрезать до выбранной длины.**
|
||||||
|
```py
|
||||||
|
from keras.preprocessing import sequence
|
||||||
|
max_words=500
|
||||||
|
X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')
|
||||||
|
X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')
|
||||||
|
```
|
||||||
|
|
||||||
|
**6-7)Повторить п. 3,4.**
|
||||||
|
```py
|
||||||
|
word_to_id=imdb.get_word_index()
|
||||||
|
|
||||||
|
word_to_id={key:(value + index_from) for key,value in word_to_id.items()}
|
||||||
|
word_to_id["<PAD>"]=0
|
||||||
|
word_to_id["<START>"]=1
|
||||||
|
word_to_id["<UNK>"]=2
|
||||||
|
word_to_id["<UNUSED>"]=3
|
||||||
|
|
||||||
|
id_to_word={value:key for key,value in word_to_id.items()}
|
||||||
|
some_number=4*8-1
|
||||||
|
review_as_text=''.join(id_to_word[id] for id in X_train[some_number])
|
||||||
|
print(X_train[some_number])
|
||||||
|
print(review_as_text)
|
||||||
|
print(len(X_train[some_number]))
|
||||||
|
if y_train[some_number] == 1:
|
||||||
|
class_label='Positive'
|
||||||
|
else:
|
||||||
|
class_label='Negative'
|
||||||
|
print('Review class:', y_train[some_number], f'({class_label})')
|
||||||
|
print(len(max(X_train, key=len)))
|
||||||
|
print(len(min(X_train, key=len)))
|
||||||
|
|
||||||
|
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 1 4 2112 512
|
||||||
|
9 150 6 4737 875 31 15 9 99 400 2 8 2111 11
|
||||||
|
2 4 201 9 6 2 7 960 1807 15 28 77 2 11
|
||||||
|
45 512 2670 4 927 28 4677 725 14 3279 34 1855 6 1882
|
||||||
|
63 47 77 2 8 12 4 2 9 35 1711 823 4296 15
|
||||||
|
2 45 1500 19 1987 1137 15 9 2 19 1302 2 486 5
|
||||||
|
2 567 4 1317 2311 1223 2 9 2 17 6 2 831 2
|
||||||
|
7 1092 5 1515 1234 34 27 1051 190 1223 9 7 107 2
|
||||||
|
31 63 9 2 137 4 85 9 2 19 3237 5 2 19
|
||||||
|
46 101 2 42 2 13 131 2 264 15 4 2 47 4
|
||||||
|
1885 3137 177 7 1136 1757 32 183 1192 13 100 97 35 3761
|
||||||
|
2590 23 4 201 21 13 528 48 126 50 9 6 1114 2
|
||||||
|
11 4564 2 1787 18 134 2 2 2 2 1711 2 5 4
|
||||||
|
2 2 2 80 30 4783 208 145 33 25]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><START>thegangstergenreisnowawornsubjectonethatistoooften<UNK>toparodyin<UNK>theseriesisa<UNK>ofpreviousclichésthathavebeen<UNK>init'sgenrethankfullythewritershaveadvanceduponthisflawbycreatingarealismwhichhasbeen<UNK>toitthe<UNK>isanepiccrimesagathat<UNK>it'scontentwithpsychologicaldepththatis<UNK>withsubtle<UNK>humorand<UNK>violencethekeyprotagonisttony<UNK>is<UNK>asa<UNK>general<UNK>offearandmoralvaluesbyhiscrewhowevertonyisoftwo<UNK>onewhichis<UNK>whiletheotheris<UNK>withguiltand<UNK>withoutany<UNK>or<UNK>istill<UNK>believethatthe<UNK>hasthefinestensemblecastofrecentmemoryallthingsconsideredicouldmakeanelaboratestatementontheseriesbutiwon'tifeverthereisavisual<UNK>inglobal<UNK>searchforthese<UNK><UNK><UNK><UNK>epic<UNK>andthe<UNK><UNK><UNK>willbesmilingrightbackatyou
|
||||||
|
500
|
||||||
|
Review class: 1 (Positive)
|
||||||
|
500
|
||||||
|
500
|
||||||
|
```
|
||||||
|
|
||||||
|
```py
|
||||||
|
**8)Вывести предобработанные массивы обучающих и тестовых данных и их размерности.**
|
||||||
|
print("x_train: ", X_train)
|
||||||
|
print(X_train.shape)
|
||||||
|
print("x_test: ", X_test)
|
||||||
|
print(X_test.shape)
|
||||||
|
|
||||||
|
|
||||||
|
x_train: [[ 0 0 0 ... 2 4050 2]
|
||||||
|
[ 0 0 0 ... 721 90 180]
|
||||||
|
[ 0 0 0 ... 1114 2 174]
|
||||||
|
...
|
||||||
|
[ 1 1065 2022 ... 7 1514 2]
|
||||||
|
[ 0 0 0 ... 6 879 132]
|
||||||
|
[ 0 0 0 ... 12 152 157]]
|
||||||
|
(25000, 500)
|
||||||
|
x_test: [[ 0 0 0 ... 10 342 158]
|
||||||
|
[ 0 0 0 ... 2 67 12]
|
||||||
|
[ 0 0 0 ... 1242 1095 1095]
|
||||||
|
...
|
||||||
|
[ 0 0 0 ... 4 2 136]
|
||||||
|
[ 0 0 0 ... 14 31 591]
|
||||||
|
[ 0 0 0 ... 7 3923 212]]
|
||||||
|
(25000, 500)
|
||||||
|
```
|
||||||
|
|
||||||
|
**9) Реализовать модель рекуррентной нейронной сети, состоящей из слоев Embedding, LSTM, Dropout, Dense, и обучить ее на обучающих данных с выделением части обучающих данных в качестве валидационных. Вывести информацию об архитектуре нейронной сети. Добиться качества обучения по метрике accuracy не менее 0.8.**
|
||||||
|
```py
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Embedding, LSTM, Dropout, Dense
|
||||||
|
|
||||||
|
model=Sequential()
|
||||||
|
model.add(Embedding(input_dim=len(word_to_id), output_dim=32, input_length=500))
|
||||||
|
model.add(LSTM(units=90))
|
||||||
|
model.add(Dropout(rate=0.4))
|
||||||
|
model.add(Dense(units=1, activation='sigmoid'))
|
||||||
|
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||||
|
H=model.fit(X_train, y_train, validation_split=0.1, epochs=5, batch_size=32)
|
||||||
|
print(model.summary())
|
||||||
|
|
||||||
|
Epoch 1/5
|
||||||
|
704/704 ━━━━━━━━━━━━━━━━━━━━ 232s 326ms/step - accuracy: 0.6650 - loss: 0.5766 - val_accuracy: 0.8552 - val_loss: 0.3558
|
||||||
|
Epoch 2/5
|
||||||
|
704/704 ━━━━━━━━━━━━━━━━━━━━ 253s 313ms/step - accuracy: 0.8611 - loss: 0.3344 - val_accuracy: 0.7944 - val_loss: 0.4277
|
||||||
|
Epoch 3/5
|
||||||
|
704/704 ━━━━━━━━━━━━━━━━━━━━ 264s 316ms/step - accuracy: 0.8672 - loss: 0.3235 - val_accuracy: 0.8584 - val_loss: 0.3852
|
||||||
|
Epoch 4/5
|
||||||
|
704/704 ━━━━━━━━━━━━━━━━━━━━ 263s 317ms/step - accuracy: 0.9013 - loss: 0.2555 - val_accuracy: 0.8688 - val_loss: 0.3254
|
||||||
|
Epoch 5/5
|
||||||
|
704/704 ━━━━━━━━━━━━━━━━━━━━ 225s 319ms/step - accuracy: 0.9245 - loss: 0.2050 - val_accuracy: 0.8752 - val_loss: 0.3455
|
||||||
|
Model: "sequential"
|
||||||
|
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
|
||||||
|
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
|
||||||
|
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
|
||||||
|
│ embedding (Embedding) │ (None, 500, 32) │ 2,834,816 │
|
||||||
|
├─────────────────────────────────┼────────────────────────┼───────────────┤
|
||||||
|
│ lstm (LSTM) │ (None, 90) │ 44,280 │
|
||||||
|
├─────────────────────────────────┼────────────────────────┼───────────────┤
|
||||||
|
│ dropout (Dropout) │ (None, 90) │ 0 │
|
||||||
|
├─────────────────────────────────┼────────────────────────┼───────────────┤
|
||||||
|
│ dense (Dense) │ (None, 1) │ 91 │
|
||||||
|
└─────────────────────────────────┴────────────────────────┴───────────────┘
|
||||||
|
Total params: 8,637,563 (32.95 MB)
|
||||||
|
Trainable params: 2,879,187 (10.98 MB)
|
||||||
|
Non-trainable params: 0 (0.00 B)
|
||||||
|
Optimizer params: 5,758,376 (21.97 MB)
|
||||||
|
```
|
||||||
|
|
||||||
|
**10)Оценить качество обучения на тестовых данных**
|
||||||
|
```py
|
||||||
|
scores=model.evaluate(X_test, y_test)
|
||||||
|
print("LOss: ", scores[0])
|
||||||
|
print("Accuracy: ", scores[1])
|
||||||
|
test_result=model.predict(X_test)
|
||||||
|
predicted_labels=[1 if test_result[i,0]>=0.5 else 0 for i in range(len(test_result))]
|
||||||
|
from sklearn.metrics import classification_report
|
||||||
|
print(classification_report(y_test, predicted_labels, labels=[0,1], target_names=["Negative", "Positive"]
|
||||||
|
|
||||||
|
|
||||||
|
782/782 ━━━━━━━━━━━━━━━━━━━━ 68s 87ms/step - accuracy: 0.8673 - loss: 0.3494
|
||||||
|
LOss: 0.35319784283638
|
||||||
|
Accuracy: 0.8662400245666504
|
||||||
|
782/782 ━━━━━━━━━━━━━━━━━━━━ 66s 84ms/step
|
||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
Negative 0.87 0.86 0.87 12500
|
||||||
|
Positive 0.86 0.87 0.87 12500
|
||||||
|
|
||||||
|
accuracy 0.87 25000
|
||||||
|
macro avg 0.87 0.87 0.87 25000
|
||||||
|
weighted avg 0.87 0.87 0.87 25000
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
**Вывод:**
|
||||||
|
В ходе работы данные об отзывах на фильмы были загружены, обработаны и разбиты на тренировочную и тестовую выборки. Была создана и обучена рекуррентная нейронная сеть на основе ячеек LSTM. Точность обученной модели составила 0,86924.
|
||||||
Загрузка…
Ссылка в новой задаче