Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

5.5 KiB

import tensorflow as tf
#device_name=tf.test.gpu_device_name()
#if device_name != '/device:GPU:0':
  #raise SystemError('GPUdevicenotfound')
#print('FoundGPUat:{}'.format(device_name))
#загрузкадатасета
from keras.datasets import imdb
vocabulary_size=5000
index_from=3
(X_train,y_train),(X_test,y_test)=imdb.load_data(path="imdb.npz",
                                                 num_words=vocabulary_size,
                                                 skip_top=0,
                                                 maxlen=None,
                                                 seed=4*8 - 1,
                                                 start_char=1,
                                                 oov_char=2,
                                                 index_from=index_from)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
word_to_id=imdb.get_word_index()
#уточнениесловаря
word_to_id={key:(value + index_from) for key,value in word_to_id.items()}
word_to_id["<PAD>"]=0
word_to_id["<START>"]=1
word_to_id["<UNK>"]=2
word_to_id["<UNUSED>"]=3
#созданиеобратногословаря"индекс:слово"
id_to_word={value:key for key,value in word_to_id.items()}
some_number=4*8-1
review_as_text=''.join(id_to_word[id] for id in X_train[some_number])
print(X_train[some_number])
print(review_as_text)
print(len(X_train[some_number]))
if y_train[some_number] == 1:
  class_label='Positive'
else:
  class_label='Negative'
print('Review class:', y_train[some_number], f'({class_label})')
print(len(max(X_train, key=len)))
print(len(min(X_train, key=len)))

from keras.preprocessing import sequence max_words=500 X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post') X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')

from keras.preprocessing import sequence
max_words=500
X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')
X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')
word_to_id=imdb.get_word_index()
#уточнениесловаря
word_to_id={key:(value + index_from) for key,value in word_to_id.items()}
word_to_id["<PAD>"]=0
word_to_id["<START>"]=1
word_to_id["<UNK>"]=2
word_to_id["<UNUSED>"]=3
#созданиеобратногословаря"индекс:слово"
id_to_word={value:key for key,value in word_to_id.items()}
some_number=4*8-1
review_as_text=''.join(id_to_word[id] for id in X_train[some_number])
print(X_train[some_number])
print(review_as_text)
print(len(X_train[some_number]))
if y_train[some_number] == 1:
  class_label='Positive'
else:
  class_label='Negative'
print('Review class:', y_train[some_number], f'({class_label})')
print(len(max(X_train, key=len)))
print(len(min(X_train, key=len)))

print("x_train: ", X_train)
print(X_train.shape)
print("x_test: ", X_test)
print(X_test.shape)
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dropout, Dense

model=Sequential()
model.add(Embedding(input_dim=len(word_to_id), output_dim=32, input_length=500))
model.add(LSTM(units=90))
model.add(Dropout(rate=0.4))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
H=model.fit(X_train, y_train, validation_split=0.1, epochs=5, batch_size=32)
print(model.summary())
scores=model.evaluate(X_test, y_test)
print("LOss: ", scores[0])
print("Accuracy: ", scores[1])
test_result=model.predict(X_test)
predicted_labels=[1 if test_result[i,0]>=0.5 else 0 for i in range(len(test_result))]
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted_labels, labels=[0,1], target_names=["Negative", "Positive"]))