|
|
|
@ -233,6 +233,7 @@ print("Размер тестовых меток y_test:", y_test.shape)
|
|
|
|
[ 1 146 6 ... 15 12 16]
|
|
|
|
[ 1 146 6 ... 15 12 16]
|
|
|
|
[ 0 0 0 ... 141 17 134]
|
|
|
|
[ 0 0 0 ... 141 17 134]
|
|
|
|
[ 1 12 9 ... 320 7 51]
|
|
|
|
[ 1 12 9 ... 320 7 51]
|
|
|
|
|
|
|
|
|
|
|
|
Размер обучающего множества X_train: (25000, 500)
|
|
|
|
Размер обучающего множества X_train: (25000, 500)
|
|
|
|
Размер обучающих меток y_train: (25000,)
|
|
|
|
Размер обучающих меток y_train: (25000,)
|
|
|
|
Размер тестового множества X_test: (25000, 500)
|
|
|
|
Размер тестового множества X_test: (25000, 500)
|
|
|
|
@ -242,25 +243,121 @@ print("Размер тестовых меток y_test:", y_test.shape)
|
|
|
|
### Пункт №9. Реализация модели рекуррентной нейронной сети.
|
|
|
|
### Пункт №9. Реализация модели рекуррентной нейронной сети.
|
|
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
```python
|
|
|
|
|
|
|
|
from tensorflow.keras.models import Sequential
|
|
|
|
|
|
|
|
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vocabulary_size = 5000
|
|
|
|
|
|
|
|
embedding_dim = 32
|
|
|
|
|
|
|
|
lstm_units = 64
|
|
|
|
|
|
|
|
dropout_rate = 0.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = Sequential()
|
|
|
|
|
|
|
|
model.add(Embedding(
|
|
|
|
|
|
|
|
input_dim=vocabulary_size + index_from,
|
|
|
|
|
|
|
|
output_dim=embedding_dim,
|
|
|
|
|
|
|
|
input_length=max_words
|
|
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
model.add(LSTM(lstm_units))
|
|
|
|
|
|
|
|
model.add(Dropout(dropout_rate))
|
|
|
|
|
|
|
|
model.add(Dense(1, activation='sigmoid'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.compile(
|
|
|
|
|
|
|
|
loss='binary_crossentropy',
|
|
|
|
|
|
|
|
optimizer='adam',
|
|
|
|
|
|
|
|
metrics=['accuracy']
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.build(input_shape=(None, max_words))
|
|
|
|
|
|
|
|
model.summary()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Обучение модели
|
|
|
|
|
|
|
|
history = model.fit(
|
|
|
|
|
|
|
|
X_train,
|
|
|
|
|
|
|
|
y_train,
|
|
|
|
|
|
|
|
epochs=5,
|
|
|
|
|
|
|
|
batch_size=64,
|
|
|
|
|
|
|
|
validation_split=0.2,
|
|
|
|
|
|
|
|
verbose=1
|
|
|
|
|
|
|
|
)
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
**Результат выполнения:**
|
|
|
|
**Результат выполнения:**
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
| Layer (type) | Output Shape | Param # |
|
|
|
|
|
|
|
|
|-----------------------|-------------------|-----------|
|
|
|
|
|
|
|
|
| embedding_3 (Embedding) | (None, 500, 32) | 160,096 |
|
|
|
|
|
|
|
|
| lstm_3 (LSTM) | (None, 64) | 24,832 |
|
|
|
|
|
|
|
|
| dropout_3 (Dropout) | (None, 64) | 0 |
|
|
|
|
|
|
|
|
| dense_3 (Dense) | (None, 1) | 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
**Total params:** 184,993 (722.63 KB)
|
|
|
|
|
|
|
|
**Trainable params:** 184,993 (722.63 KB)
|
|
|
|
|
|
|
|
**Non-trainable params:** 0 (0.00 B)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
Качество обучения по эпохам
|
|
|
|
|
|
|
|
Эпоха 1: accuracy = 0.9302, val_accuracy = 0.8686
|
|
|
|
|
|
|
|
Эпоха 2: accuracy = 0.9298, val_accuracy = 0.8416
|
|
|
|
|
|
|
|
Эпоха 3: accuracy = 0.9351, val_accuracy = 0.8576
|
|
|
|
|
|
|
|
Эпоха 4: accuracy = 0.9311, val_accuracy = 0.8678
|
|
|
|
|
|
|
|
Эпоха 5: accuracy = 0.9522, val_accuracy = 0.8670
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Добились качества обученияпо метрике accuracyне менее 0.8.
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
### Пункт №10. Оценка качества обучения на тестовых данных.
|
|
|
|
### Пункт №10.1 Оценка качества обучения на тестовых данных.
|
|
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
```python
|
|
|
|
|
|
|
|
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Качество классификации на тестовой выборке")
|
|
|
|
|
|
|
|
print(f"Test accuracy: {test_accuracy:.4f}")
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
**Результат выполнения:**
|
|
|
|
**Результат выполнения:**
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
Качество классификации на тестовой выборке
|
|
|
|
|
|
|
|
Test accuracy: 0.8607
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Пункт №10.2 Оценка качества обучения на тестовых данных.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
|
|
|
y_score = model.predict(X_test)
|
|
|
|
|
|
|
|
y_pred = [1 if y_score[i,0]>=0.5 else 0 for i in range(len(y_score))]
|
|
|
|
|
|
|
|
from sklearn.metrics import classification_report
|
|
|
|
|
|
|
|
print(classification_report(y_test, y_pred, labels = [0, 1], target_names=['Negative', 'Positive']))
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
**Результат выполнения:**
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
Negative 0.84 0.89 0.86 12500
|
|
|
|
|
|
|
|
Positive 0.88 0.83 0.86 12500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accuracy 0.86 25000
|
|
|
|
|
|
|
|
macro avg 0.86 0.86 0.86 25000
|
|
|
|
|
|
|
|
weighted avg 0.86 0.86 0.86 25000
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Пункт №10.3 Оценка качества обучения на тестовых данных.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
|
|
|
from sklearn.metrics import roc_curve, auc
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
fpr, tpr, thresholds = roc_curve(y_test, y_score)
|
|
|
|
|
|
|
|
plt.plot(fpr, tpr)
|
|
|
|
|
|
|
|
plt.grid()
|
|
|
|
|
|
|
|
plt.xlabel('False Positive Rate')
|
|
|
|
|
|
|
|
plt.ylabel('True Positive Rate')
|
|
|
|
|
|
|
|
plt.title('ROC')
|
|
|
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
print('Area under ROC is', auc(fpr, tpr))
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!(1.png)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Area under ROC is 0.9304564479999999
|
|
|
|
|
|
|
|
|
|
|
|
### Пункт №11. Выводы по результатам применения рекуррентной нейронной сети.
|
|
|
|
### Пункт №11. Выводы по результатам применения рекуррентной нейронной сети.
|
|
|
|
|
|
|
|
|
|
|
|
**Выводы по лабораторной работе:**
|
|
|
|
**Выводы по лабораторной работе:**
|
|
|
|
|