Вы не можете выбрать более 25 тем
Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
160 строки
5.5 KiB
Plaintext
160 строки
5.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import tensorflow as tf\n",
|
|
"#device_name=tf.test.gpu_device_name()\n",
|
|
"#if device_name != '/device:GPU:0':\n",
|
|
" #raise SystemError('GPUdevicenotfound')\n",
|
|
"#print('FoundGPUat:{}'.format(device_name))\n",
|
|
"#загрузкадатасета\n",
|
|
"from keras.datasets import imdb\n",
|
|
"vocabulary_size=5000\n",
|
|
"index_from=3\n",
|
|
"(X_train,y_train),(X_test,y_test)=imdb.load_data(path=\"imdb.npz\",\n",
|
|
" num_words=vocabulary_size,\n",
|
|
" skip_top=0,\n",
|
|
" maxlen=None,\n",
|
|
" seed=4*8 - 1,\n",
|
|
" start_char=1,\n",
|
|
" oov_char=2,\n",
|
|
" index_from=index_from)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(X_train.shape)\n",
|
|
"print(y_train.shape)\n",
|
|
"print(X_test.shape)\n",
|
|
"print(y_test.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"word_to_id=imdb.get_word_index()\n",
|
|
"#уточнениесловаря\n",
|
|
"word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n",
|
|
"word_to_id[\"<PAD>\"]=0\n",
|
|
"word_to_id[\"<START>\"]=1\n",
|
|
"word_to_id[\"<UNK>\"]=2\n",
|
|
"word_to_id[\"<UNUSED>\"]=3\n",
|
|
"#созданиеобратногословаря\"индекс:слово\"\n",
|
|
"id_to_word={value:key for key,value in word_to_id.items()}\n",
|
|
"some_number=4*8-1\n",
|
|
"review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n",
|
|
"print(X_train[some_number])\n",
|
|
"print(review_as_text)\n",
|
|
"print(len(X_train[some_number]))\n",
|
|
"if y_train[some_number] == 1:\n",
|
|
" class_label='Positive'\n",
|
|
"else:\n",
|
|
" class_label='Negative'\n",
|
|
"print('Review class:', y_train[some_number], f'({class_label})')\n",
|
|
"print(len(max(X_train, key=len)))\n",
|
|
"print(len(min(X_train, key=len)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"from keras.preprocessing import sequence\n",
|
|
"max_words=500\n",
|
|
"X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
|
|
"X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from keras.preprocessing import sequence\n",
|
|
"max_words=500\n",
|
|
"X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
|
|
"X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"word_to_id=imdb.get_word_index()\n",
|
|
"#уточнениесловаря\n",
|
|
"word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n",
|
|
"word_to_id[\"<PAD>\"]=0\n",
|
|
"word_to_id[\"<START>\"]=1\n",
|
|
"word_to_id[\"<UNK>\"]=2\n",
|
|
"word_to_id[\"<UNUSED>\"]=3\n",
|
|
"#созданиеобратногословаря\"индекс:слово\"\n",
|
|
"id_to_word={value:key for key,value in word_to_id.items()}\n",
|
|
"some_number=4*8-1\n",
|
|
"review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n",
|
|
"print(X_train[some_number])\n",
|
|
"print(review_as_text)\n",
|
|
"print(len(X_train[some_number]))\n",
|
|
"if y_train[some_number] == 1:\n",
|
|
" class_label='Positive'\n",
|
|
"else:\n",
|
|
" class_label='Negative'\n",
|
|
"print('Review class:', y_train[some_number], f'({class_label})')\n",
|
|
"print(len(max(X_train, key=len)))\n",
|
|
"print(len(min(X_train, key=len)))\n",
|
|
"\n",
|
|
"print(\"x_train: \", X_train)\n",
|
|
"print(X_train.shape)\n",
|
|
"print(\"x_test: \", X_test)\n",
|
|
"print(X_test.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from keras.models import Sequential\n",
|
|
"from keras.layers import Embedding, LSTM, Dropout, Dense\n",
|
|
"\n",
|
|
"model=Sequential()\n",
|
|
"model.add(Embedding(input_dim=len(word_to_id), output_dim=32, input_length=500))\n",
|
|
"model.add(LSTM(units=90))\n",
|
|
"model.add(Dropout(rate=0.4))\n",
|
|
"model.add(Dense(units=1, activation='sigmoid'))\n",
|
|
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
|
"H=model.fit(X_train, y_train, validation_split=0.1, epochs=5, batch_size=32)\n",
|
|
"print(model.summary())\n",
|
|
"scores=model.evaluate(X_test, y_test)\n",
|
|
"print(\"LOss: \", scores[0])\n",
|
|
"print(\"Accuracy: \", scores[1])\n",
|
|
"test_result=model.predict(X_test)\n",
|
|
"predicted_labels=[1 if test_result[i,0]>=0.5 else 0 for i in range(len(test_result))]\n",
|
|
"from sklearn.metrics import classification_report\n",
|
|
"print(classification_report(y_test, predicted_labels, labels=[0,1], target_names=[\"Negative\", \"Positive\"]))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|