diff --git a/labworks/LW4/lab4.ipynb b/labworks/LW4/lab4.ipynb index 67fe187..915bf9a 100644 --- a/labworks/LW4/lab4.ipynb +++ b/labworks/LW4/lab4.ipynb @@ -130,11 +130,24 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "print(X_train[26])\n", + "print('len:',len(X_train[26]))" + ], + "metadata": { + "id": "Nu-Bs1jnaYhB" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n", - "print(review_as_text)" + "print(review_as_text)\n", + "print('len:',len(review_as_text))" ], "metadata": { "id": "JhTwTurtZ6Sp" @@ -217,11 +230,24 @@ "id": "KzrVY1SR1DZh" } }, + { + "cell_type": "code", + "source": [ + "print(X_train[26])\n", + "print('len:',len(X_train[26]))" + ], + "metadata": { + "id": "vudlgqoCbjU1" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n", - "print(review_as_text)" + "print(review_as_text)\n", + "print('len:',len(review_as_text))" ], "metadata": { "id": "dbfkWjDI1Dp7" diff --git a/labworks/LW4/notebook с полными выводами/lab4_full.ipynb b/labworks/LW4/notebook с полными выводами/lab4_full.ipynb index 66b4e90..af3b3d0 100644 --- a/labworks/LW4/notebook с полными выводами/lab4_full.ipynb +++ b/labworks/LW4/notebook с полными выводами/lab4_full.ipynb @@ -41,7 +41,7 @@ "metadata": { "id": "mr9IszuQ1ANG" }, - "execution_count": 6, + "execution_count": 26, "outputs": [] }, { @@ -58,9 +58,9 @@ "base_uri": "https://localhost:8080/" }, "id": "o63-lKG_RuNc", - "outputId": "4c6df484-af24-4e87-cdf4-8917253924b1" + "outputId": "40173ad5-1e1c-4d91-b297-c732c5db0e57" }, - "execution_count": 7, + "execution_count": 27, "outputs": [ { "output_type": "stream", @@ -111,9 +111,9 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "7edbbdce-b2f6-4f0d-c0af-0b7af794d9b8" + "outputId": "cf28ed89-8490-4713-ea5c-7896c36e46d3" }, - "execution_count": 11, + "execution_count": 28, "outputs": [ { "output_type": "stream", @@ -154,29 +154,56 @@ "metadata": { "id": "9W3RklPcZyH0" }, - "execution_count": 12, + "execution_count": 29, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "print(X_train[26])\n", + "print('len:',len(X_train[26]))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Nu-Bs1jnaYhB", + "outputId": "e4abd078-aea3-448f-b9aa-3b7142595f46" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1, 4, 78, 46, 304, 39, 2, 7, 968, 2, 295, 209, 101, 147, 65, 10, 10, 2643, 2, 497, 8, 30, 6, 147, 284, 5, 996, 174, 10, 10, 11, 4, 130, 4, 2, 4979, 11, 2, 10, 10, 2]\n", + "len: 41\n" + ] + } + ] + }, { "cell_type": "code", "source": [ "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n", - "print(review_as_text)" + "print(review_as_text)\n", + "print('len:',len(review_as_text))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JhTwTurtZ6Sp", - "outputId": "7f52f295-9195-4ce4-dedb-f69a2807c096" + "outputId": "1e65df22-05f7-4036-d3b1-7be8168a7c88" }, - "execution_count": 17, + "execution_count": 34, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - " the bad out takes from of fire together without any real story br br dean tries to be a real actor and fails again br br in the end the quit in br br \n" + " the bad out takes from of fire together without any real story br br dean tries to be a real actor and fails again br br in the end the quit in br br \n", + "len: 193\n" ] } ] @@ -201,9 +228,9 @@ "base_uri": "https://localhost:8080/" }, "id": "xJH87ISq1B9h", - "outputId": "8723c18f-9997-415d-f67e-e4644d4e81f5" + "outputId": "241e484b-d13f-462f-9bb3-214c81c00f77" }, - "execution_count": 19, + "execution_count": 35, "outputs": [ { "output_type": "stream", @@ -236,7 +263,7 @@ "metadata": { "id": "lrF-B2aScR4t" }, - "execution_count": 29, + "execution_count": 36, "outputs": [] }, { @@ -259,9 +286,9 @@ "base_uri": "https://localhost:8080/" }, "id": "81Cgq8dn9uL6", - "outputId": "0476b7c6-8255-4fe1-996c-7e7f1e260d04" + "outputId": "94d17c54-1e36-4036-d4a8-cb1c38a5c65a" }, - "execution_count": 23, + "execution_count": 37, "outputs": [ { "output_type": "stream", @@ -282,26 +309,88 @@ "id": "KzrVY1SR1DZh" } }, + { + "cell_type": "code", + "source": [ + "print(X_train[26])\n", + "print('len:',len(X_train[26]))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vudlgqoCbjU1", + "outputId": "3292596a-45a0-49d1-a723-4398190ddb47" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 1 4 78\n", + " 46 304 39 2 7 968 2 295 209 101 147 65 10 10\n", + " 2643 2 497 8 30 6 147 284 5 996 174 10 10 11\n", + " 4 130 4 2 4979 11 2 10 10 2]\n", + "len: 500\n" + ] + } + ] + }, { "cell_type": "code", "source": [ "review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n", - "print(review_as_text)" + "print(review_as_text)\n", + "print('len:',len(review_as_text))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dbfkWjDI1Dp7", - "outputId": "4b622bcb-f605-4988-af1b-018135847def" + "outputId": "7b53084e-40fc-47e6-ea72-4eb76d213e55" }, - "execution_count": 24, + "execution_count": 39, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - " the bad out takes from of fire together without any real story br br dean tries to be a real actor and fails again br br in the end the quit in br br \n" + " the bad out takes from of fire together without any real story br br dean tries to be a real actor and fails again br br in the end the quit in br br \n", + "len: 2947\n" ] } ] @@ -342,7 +431,7 @@ "id": "7MqcG_wl1EHI", "outputId": "b068f5d7-4f3a-41f8-90e8-4b2be5076170" }, - "execution_count": 37, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -401,7 +490,7 @@ "id": "ktWEeqWd1EyF", "outputId": "01618308-359e-40c5-d675-bea723939cd5" }, - "execution_count": 42, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -506,7 +595,7 @@ "id": "CuPqKpX0kQfP", "outputId": "297fa800-d027-4d99-cc37-16302d8190fb" }, - "execution_count": 46, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -545,7 +634,7 @@ "id": "hJIWinxymQjb", "outputId": "71a3184a-4574-40ed-f456-8f18db1fd4bf" }, - "execution_count": 47, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -583,7 +672,7 @@ "id": "Rya5ABT8msha", "outputId": "6f69b050-952c-4fd8-f5a4-de52004fdf39" }, - "execution_count": 51, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -612,7 +701,7 @@ "id": "2kHjcmnCmv0Y", "outputId": "2025617c-d67c-4c54-eda5-ee74671a41ab" }, - "execution_count": 52, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -655,7 +744,7 @@ "id": "Kp4AQRbcmwAx", "outputId": "7ba7f48a-0f40-4708-87ac-ef85edaf4de0" }, - "execution_count": 56, + "execution_count": null, "outputs": [ { "output_type": "display_data", diff --git a/labworks/LW4/report.md b/labworks/LW4/report.md index 39d9eea..5dd0a94 100644 --- a/labworks/LW4/report.md +++ b/labworks/LW4/report.md @@ -78,12 +78,21 @@ word_to_id[""] = 3 id_to_word = {value:key for key,value in word_to_id.items()} ``` ```python +print(X_train[26]) +print('len:',len(X_train[26])) +``` +``` +[1, 4, 78, 46, 304, 39, 2, 7, 968, 2, 295, 209, 101, 147, 65, 10, 10, 2643, 2, 497, 8, 30, 6, 147, 284, 5, 996, 174, 10, 10, 11, 4, 130, 4, 2, 4979, 11, 2, 10, 10, 2] +len: 41 +``` +```python review_as_text = ' '.join(id_to_word[id] for id in X_train[26]) print(review_as_text) +print('len:',len(review_as_text)) ``` ``` - the bad out takes from of fire together without any real story br br dean -tries to be a real actor and fails again br br in the end the quit in br br + the bad out takes from of fire together without any real story br br dean tries to be a real actor and fails again br br in the end the quit in br br +len: 193 ``` @@ -120,13 +129,58 @@ MIN Len: 500 ``` ### 7) Повторили пункт 3. Сделали вывод о том, как отзыв преобразовался после предобработки. +```python +print(X_train[26]) +print('len:',len(X_train[26])) +``` +``` +[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 1 4 78 + 46 304 39 2 7 968 2 295 209 101 147 65 10 10 + 2643 2 497 8 30 6 147 284 5 996 174 10 10 11 + 4 130 4 2 4979 11 2 10 10 2] +len: 500 +``` ```python review_as_text = ' '.join(id_to_word[id] for id in X_train[26]) print(review_as_text) +print('len:',len(review_as_text)) ``` ``` the bad out takes from of fire together without any real story br br dean tries to be a real actor and fails again br br in the end the quit in br br +len: 2947 ``` #### После обработки в начало отзыва добавилось необходимое количество токенов , чтобы отзыв был длинной в 500 индексов.