fix: fix Lab4
Этот коммит содержится в:
@@ -130,11 +130,24 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"print(X_train[26])\n",
|
||||||
|
"print('len:',len(X_train[26]))"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "Nu-Bs1jnaYhB"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
||||||
"print(review_as_text)"
|
"print(review_as_text)\n",
|
||||||
|
"print('len:',len(review_as_text))"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "JhTwTurtZ6Sp"
|
"id": "JhTwTurtZ6Sp"
|
||||||
@@ -217,11 +230,24 @@
|
|||||||
"id": "KzrVY1SR1DZh"
|
"id": "KzrVY1SR1DZh"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"print(X_train[26])\n",
|
||||||
|
"print('len:',len(X_train[26]))"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "vudlgqoCbjU1"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
||||||
"print(review_as_text)"
|
"print(review_as_text)\n",
|
||||||
|
"print('len:',len(review_as_text))"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "dbfkWjDI1Dp7"
|
"id": "dbfkWjDI1Dp7"
|
||||||
|
|||||||
@@ -41,7 +41,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "mr9IszuQ1ANG"
|
"id": "mr9IszuQ1ANG"
|
||||||
},
|
},
|
||||||
"execution_count": 6,
|
"execution_count": 26,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -58,9 +58,9 @@
|
|||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "o63-lKG_RuNc",
|
"id": "o63-lKG_RuNc",
|
||||||
"outputId": "4c6df484-af24-4e87-cdf4-8917253924b1"
|
"outputId": "40173ad5-1e1c-4d91-b297-c732c5db0e57"
|
||||||
},
|
},
|
||||||
"execution_count": 7,
|
"execution_count": 27,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -111,9 +111,9 @@
|
|||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"outputId": "7edbbdce-b2f6-4f0d-c0af-0b7af794d9b8"
|
"outputId": "cf28ed89-8490-4713-ea5c-7896c36e46d3"
|
||||||
},
|
},
|
||||||
"execution_count": 11,
|
"execution_count": 28,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -154,29 +154,56 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "9W3RklPcZyH0"
|
"id": "9W3RklPcZyH0"
|
||||||
},
|
},
|
||||||
"execution_count": 12,
|
"execution_count": 29,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"print(X_train[26])\n",
|
||||||
|
"print('len:',len(X_train[26]))"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "Nu-Bs1jnaYhB",
|
||||||
|
"outputId": "e4abd078-aea3-448f-b9aa-3b7142595f46"
|
||||||
|
},
|
||||||
|
"execution_count": 33,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"[1, 4, 78, 46, 304, 39, 2, 7, 968, 2, 295, 209, 101, 147, 65, 10, 10, 2643, 2, 497, 8, 30, 6, 147, 284, 5, 996, 174, 10, 10, 11, 4, 130, 4, 2, 4979, 11, 2, 10, 10, 2]\n",
|
||||||
|
"len: 41\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
||||||
"print(review_as_text)"
|
"print(review_as_text)\n",
|
||||||
|
"print('len:',len(review_as_text))"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "JhTwTurtZ6Sp",
|
"id": "JhTwTurtZ6Sp",
|
||||||
"outputId": "7f52f295-9195-4ce4-dedb-f69a2807c096"
|
"outputId": "1e65df22-05f7-4036-d3b1-7be8168a7c88"
|
||||||
},
|
},
|
||||||
"execution_count": 17,
|
"execution_count": 34,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"text": [
|
"text": [
|
||||||
"<START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>\n"
|
"<START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>\n",
|
||||||
|
"len: 193\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -201,9 +228,9 @@
|
|||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "xJH87ISq1B9h",
|
"id": "xJH87ISq1B9h",
|
||||||
"outputId": "8723c18f-9997-415d-f67e-e4644d4e81f5"
|
"outputId": "241e484b-d13f-462f-9bb3-214c81c00f77"
|
||||||
},
|
},
|
||||||
"execution_count": 19,
|
"execution_count": 35,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -236,7 +263,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "lrF-B2aScR4t"
|
"id": "lrF-B2aScR4t"
|
||||||
},
|
},
|
||||||
"execution_count": 29,
|
"execution_count": 36,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -259,9 +286,9 @@
|
|||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "81Cgq8dn9uL6",
|
"id": "81Cgq8dn9uL6",
|
||||||
"outputId": "0476b7c6-8255-4fe1-996c-7e7f1e260d04"
|
"outputId": "94d17c54-1e36-4036-d4a8-cb1c38a5c65a"
|
||||||
},
|
},
|
||||||
"execution_count": 23,
|
"execution_count": 37,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -282,26 +309,88 @@
|
|||||||
"id": "KzrVY1SR1DZh"
|
"id": "KzrVY1SR1DZh"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"print(X_train[26])\n",
|
||||||
|
"print('len:',len(X_train[26]))"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "vudlgqoCbjU1",
|
||||||
|
"outputId": "3292596a-45a0-49d1-a723-4398190ddb47"
|
||||||
|
},
|
||||||
|
"execution_count": 38,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 0 0 0 0 1 4 78\n",
|
||||||
|
" 46 304 39 2 7 968 2 295 209 101 147 65 10 10\n",
|
||||||
|
" 2643 2 497 8 30 6 147 284 5 996 174 10 10 11\n",
|
||||||
|
" 4 130 4 2 4979 11 2 10 10 2]\n",
|
||||||
|
"len: 500\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
"review_as_text = ' '.join(id_to_word[id] for id in X_train[26])\n",
|
||||||
"print(review_as_text)"
|
"print(review_as_text)\n",
|
||||||
|
"print('len:',len(review_as_text))"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "dbfkWjDI1Dp7",
|
"id": "dbfkWjDI1Dp7",
|
||||||
"outputId": "4b622bcb-f605-4988-af1b-018135847def"
|
"outputId": "7b53084e-40fc-47e6-ea72-4eb76d213e55"
|
||||||
},
|
},
|
||||||
"execution_count": 24,
|
"execution_count": 39,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"text": [
|
"text": [
|
||||||
"<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>\n"
|
"<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>\n",
|
||||||
|
"len: 2947\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -342,7 +431,7 @@
|
|||||||
"id": "7MqcG_wl1EHI",
|
"id": "7MqcG_wl1EHI",
|
||||||
"outputId": "b068f5d7-4f3a-41f8-90e8-4b2be5076170"
|
"outputId": "b068f5d7-4f3a-41f8-90e8-4b2be5076170"
|
||||||
},
|
},
|
||||||
"execution_count": 37,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -401,7 +490,7 @@
|
|||||||
"id": "ktWEeqWd1EyF",
|
"id": "ktWEeqWd1EyF",
|
||||||
"outputId": "01618308-359e-40c5-d675-bea723939cd5"
|
"outputId": "01618308-359e-40c5-d675-bea723939cd5"
|
||||||
},
|
},
|
||||||
"execution_count": 42,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "display_data",
|
"output_type": "display_data",
|
||||||
@@ -506,7 +595,7 @@
|
|||||||
"id": "CuPqKpX0kQfP",
|
"id": "CuPqKpX0kQfP",
|
||||||
"outputId": "297fa800-d027-4d99-cc37-16302d8190fb"
|
"outputId": "297fa800-d027-4d99-cc37-16302d8190fb"
|
||||||
},
|
},
|
||||||
"execution_count": 46,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -545,7 +634,7 @@
|
|||||||
"id": "hJIWinxymQjb",
|
"id": "hJIWinxymQjb",
|
||||||
"outputId": "71a3184a-4574-40ed-f456-8f18db1fd4bf"
|
"outputId": "71a3184a-4574-40ed-f456-8f18db1fd4bf"
|
||||||
},
|
},
|
||||||
"execution_count": 47,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -583,7 +672,7 @@
|
|||||||
"id": "Rya5ABT8msha",
|
"id": "Rya5ABT8msha",
|
||||||
"outputId": "6f69b050-952c-4fd8-f5a4-de52004fdf39"
|
"outputId": "6f69b050-952c-4fd8-f5a4-de52004fdf39"
|
||||||
},
|
},
|
||||||
"execution_count": 51,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -612,7 +701,7 @@
|
|||||||
"id": "2kHjcmnCmv0Y",
|
"id": "2kHjcmnCmv0Y",
|
||||||
"outputId": "2025617c-d67c-4c54-eda5-ee74671a41ab"
|
"outputId": "2025617c-d67c-4c54-eda5-ee74671a41ab"
|
||||||
},
|
},
|
||||||
"execution_count": 52,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@@ -655,7 +744,7 @@
|
|||||||
"id": "Kp4AQRbcmwAx",
|
"id": "Kp4AQRbcmwAx",
|
||||||
"outputId": "7ba7f48a-0f40-4708-87ac-ef85edaf4de0"
|
"outputId": "7ba7f48a-0f40-4708-87ac-ef85edaf4de0"
|
||||||
},
|
},
|
||||||
"execution_count": 56,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "display_data",
|
"output_type": "display_data",
|
||||||
|
|||||||
@@ -78,12 +78,21 @@ word_to_id["<UNUSED>"] = 3
|
|||||||
id_to_word = {value:key for key,value in word_to_id.items()}
|
id_to_word = {value:key for key,value in word_to_id.items()}
|
||||||
```
|
```
|
||||||
```python
|
```python
|
||||||
|
print(X_train[26])
|
||||||
|
print('len:',len(X_train[26]))
|
||||||
|
```
|
||||||
|
```
|
||||||
|
[1, 4, 78, 46, 304, 39, 2, 7, 968, 2, 295, 209, 101, 147, 65, 10, 10, 2643, 2, 497, 8, 30, 6, 147, 284, 5, 996, 174, 10, 10, 11, 4, 130, 4, 2, 4979, 11, 2, 10, 10, 2]
|
||||||
|
len: 41
|
||||||
|
```
|
||||||
|
```python
|
||||||
review_as_text = ' '.join(id_to_word[id] for id in X_train[26])
|
review_as_text = ' '.join(id_to_word[id] for id in X_train[26])
|
||||||
print(review_as_text)
|
print(review_as_text)
|
||||||
|
print('len:',len(review_as_text))
|
||||||
```
|
```
|
||||||
```
|
```
|
||||||
<START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK>
|
<START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>
|
||||||
tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>
|
len: 193
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -120,13 +129,58 @@ MIN Len: 500
|
|||||||
```
|
```
|
||||||
|
|
||||||
### 7) Повторили пункт 3. Сделали вывод о том, как отзыв преобразовался после предобработки.
|
### 7) Повторили пункт 3. Сделали вывод о том, как отзыв преобразовался после предобработки.
|
||||||
|
```python
|
||||||
|
print(X_train[26])
|
||||||
|
print('len:',len(X_train[26]))
|
||||||
|
```
|
||||||
|
```
|
||||||
|
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 1 4 78
|
||||||
|
46 304 39 2 7 968 2 295 209 101 147 65 10 10
|
||||||
|
2643 2 497 8 30 6 147 284 5 996 174 10 10 11
|
||||||
|
4 130 4 2 4979 11 2 10 10 2]
|
||||||
|
len: 500
|
||||||
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
review_as_text = ' '.join(id_to_word[id] for id in X_train[26])
|
review_as_text = ' '.join(id_to_word[id] for id in X_train[26])
|
||||||
print(review_as_text)
|
print(review_as_text)
|
||||||
|
print('len:',len(review_as_text))
|
||||||
```
|
```
|
||||||
```
|
```
|
||||||
<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>
|
<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <START> the bad out takes from <UNK> of fire <UNK> together without any real story br br dean <UNK> tries to be a real actor and fails again br br in the end the <UNK> quit in <UNK> br br <UNK>
|
||||||
|
len: 2947
|
||||||
```
|
```
|
||||||
#### После обработки в начало отзыва добавилось необходимое количество токенов <PAD>, чтобы отзыв был длинной в 500 индексов.
|
#### После обработки в начало отзыва добавилось необходимое количество токенов <PAD>, чтобы отзыв был длинной в 500 индексов.
|
||||||
|
|
||||||
|
|||||||
Ссылка в новой задаче
Block a user