Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

160 строки
5.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"#device_name=tf.test.gpu_device_name()\n",
"#if device_name != '/device:GPU:0':\n",
" #raise SystemError('GPUdevicenotfound')\n",
"#print('FoundGPUat:{}'.format(device_name))\n",
"#загрузкадатасета\n",
"from keras.datasets import imdb\n",
"vocabulary_size=5000\n",
"index_from=3\n",
"(X_train,y_train),(X_test,y_test)=imdb.load_data(path=\"imdb.npz\",\n",
" num_words=vocabulary_size,\n",
" skip_top=0,\n",
" maxlen=None,\n",
" seed=4*8 - 1,\n",
" start_char=1,\n",
" oov_char=2,\n",
" index_from=index_from)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(X_train.shape)\n",
"print(y_train.shape)\n",
"print(X_test.shape)\n",
"print(y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"word_to_id=imdb.get_word_index()\n",
"#уточнениесловаря\n",
"word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n",
"word_to_id[\"<PAD>\"]=0\n",
"word_to_id[\"<START>\"]=1\n",
"word_to_id[\"<UNK>\"]=2\n",
"word_to_id[\"<UNUSED>\"]=3\n",
"#созданиеобратногословаря\"индекс:слово\"\n",
"id_to_word={value:key for key,value in word_to_id.items()}\n",
"some_number=4*8-1\n",
"review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n",
"print(X_train[some_number])\n",
"print(review_as_text)\n",
"print(len(X_train[some_number]))\n",
"if y_train[some_number] == 1:\n",
" class_label='Positive'\n",
"else:\n",
" class_label='Negative'\n",
"print('Review class:', y_train[some_number], f'({class_label})')\n",
"print(len(max(X_train, key=len)))\n",
"print(len(min(X_train, key=len)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"from keras.preprocessing import sequence\n",
"max_words=500\n",
"X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
"X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from keras.preprocessing import sequence\n",
"max_words=500\n",
"X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n",
"X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"word_to_id=imdb.get_word_index()\n",
"#уточнениесловаря\n",
"word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n",
"word_to_id[\"<PAD>\"]=0\n",
"word_to_id[\"<START>\"]=1\n",
"word_to_id[\"<UNK>\"]=2\n",
"word_to_id[\"<UNUSED>\"]=3\n",
"#созданиеобратногословаря\"индекс:слово\"\n",
"id_to_word={value:key for key,value in word_to_id.items()}\n",
"some_number=4*8-1\n",
"review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n",
"print(X_train[some_number])\n",
"print(review_as_text)\n",
"print(len(X_train[some_number]))\n",
"if y_train[some_number] == 1:\n",
" class_label='Positive'\n",
"else:\n",
" class_label='Negative'\n",
"print('Review class:', y_train[some_number], f'({class_label})')\n",
"print(len(max(X_train, key=len)))\n",
"print(len(min(X_train, key=len)))\n",
"\n",
"print(\"x_train: \", X_train)\n",
"print(X_train.shape)\n",
"print(\"x_test: \", X_test)\n",
"print(X_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from keras.models import Sequential\n",
"from keras.layers import Embedding, LSTM, Dropout, Dense\n",
"\n",
"model=Sequential()\n",
"model.add(Embedding(input_dim=len(word_to_id), output_dim=32, input_length=500))\n",
"model.add(LSTM(units=90))\n",
"model.add(Dropout(rate=0.4))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"H=model.fit(X_train, y_train, validation_split=0.1, epochs=5, batch_size=32)\n",
"print(model.summary())\n",
"scores=model.evaluate(X_test, y_test)\n",
"print(\"LOss: \", scores[0])\n",
"print(\"Accuracy: \", scores[1])\n",
"test_result=model.predict(X_test)\n",
"predicted_labels=[1 if test_result[i,0]>=0.5 else 0 for i in range(len(test_result))]\n",
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, predicted_labels, labels=[0,1], target_names=[\"Negative\", \"Positive\"]))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}