{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "#device_name=tf.test.gpu_device_name()\n", "#if device_name != '/device:GPU:0':\n", " #raise SystemError('GPUdevicenotfound')\n", "#print('FoundGPUat:{}'.format(device_name))\n", "#загрузкадатасета\n", "from keras.datasets import imdb\n", "vocabulary_size=5000\n", "index_from=3\n", "(X_train,y_train),(X_test,y_test)=imdb.load_data(path=\"imdb.npz\",\n", " num_words=vocabulary_size,\n", " skip_top=0,\n", " maxlen=None,\n", " seed=4*8 - 1,\n", " start_char=1,\n", " oov_char=2,\n", " index_from=index_from)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(X_train.shape)\n", "print(y_train.shape)\n", "print(X_test.shape)\n", "print(y_test.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "word_to_id=imdb.get_word_index()\n", "#уточнениесловаря\n", "word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n", "word_to_id[\"\"]=0\n", "word_to_id[\"\"]=1\n", "word_to_id[\"\"]=2\n", "word_to_id[\"\"]=3\n", "#созданиеобратногословаря\"индекс:слово\"\n", "id_to_word={value:key for key,value in word_to_id.items()}\n", "some_number=4*8-1\n", "review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n", "print(X_train[some_number])\n", "print(review_as_text)\n", "print(len(X_train[some_number]))\n", "if y_train[some_number] == 1:\n", " class_label='Positive'\n", "else:\n", " class_label='Negative'\n", "print('Review class:', y_train[some_number], f'({class_label})')\n", "print(len(max(X_train, key=len)))\n", "print(len(min(X_train, key=len)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "from keras.preprocessing import sequence\n", "max_words=500\n", "X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n", "X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from keras.preprocessing import sequence\n", "max_words=500\n", "X_train=sequence.pad_sequences(X_train, maxlen=max_words, value=0, padding='pre', truncating='post')\n", "X_test=sequence.pad_sequences(X_test, maxlen=max_words, value=0, padding='pre', truncating='post')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "word_to_id=imdb.get_word_index()\n", "#уточнениесловаря\n", "word_to_id={key:(value + index_from) for key,value in word_to_id.items()}\n", "word_to_id[\"\"]=0\n", "word_to_id[\"\"]=1\n", "word_to_id[\"\"]=2\n", "word_to_id[\"\"]=3\n", "#созданиеобратногословаря\"индекс:слово\"\n", "id_to_word={value:key for key,value in word_to_id.items()}\n", "some_number=4*8-1\n", "review_as_text=''.join(id_to_word[id] for id in X_train[some_number])\n", "print(X_train[some_number])\n", "print(review_as_text)\n", "print(len(X_train[some_number]))\n", "if y_train[some_number] == 1:\n", " class_label='Positive'\n", "else:\n", " class_label='Negative'\n", "print('Review class:', y_train[some_number], f'({class_label})')\n", "print(len(max(X_train, key=len)))\n", "print(len(min(X_train, key=len)))\n", "\n", "print(\"x_train: \", X_train)\n", "print(X_train.shape)\n", "print(\"x_test: \", X_test)\n", "print(X_test.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from keras.models import Sequential\n", "from keras.layers import Embedding, LSTM, Dropout, Dense\n", "\n", "model=Sequential()\n", "model.add(Embedding(input_dim=len(word_to_id), output_dim=32, input_length=500))\n", "model.add(LSTM(units=90))\n", "model.add(Dropout(rate=0.4))\n", "model.add(Dense(units=1, activation='sigmoid'))\n", "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", "H=model.fit(X_train, y_train, validation_split=0.1, epochs=5, batch_size=32)\n", "print(model.summary())\n", "scores=model.evaluate(X_test, y_test)\n", "print(\"LOss: \", scores[0])\n", "print(\"Accuracy: \", scores[1])\n", "test_result=model.predict(X_test)\n", "predicted_labels=[1 if test_result[i,0]>=0.5 else 0 for i in range(len(test_result))]\n", "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, predicted_labels, labels=[0,1], target_names=[\"Negative\", \"Positive\"]))" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }