Andrey 1 год назад
Родитель 72a7d78a92
Сommit a383f571b6

Двоичные данные
lections/OATD_lec_4.pdf

Двоичный файл не отображается.

Двоичные данные
lections/OATD_lec_4.pptx

Двоичный файл не отображается.

@ -10,18 +10,34 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Requirement already satisfied: gensim in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (4.3.0)\n", "Requirement already satisfied: gensim in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (4.3.0)"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.0.1 -> 24.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Requirement already satisfied: scipy>=1.7.0 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (1.10.0)\n",
"Requirement already satisfied: numpy>=1.18.5 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (1.23.3)\n", "Requirement already satisfied: numpy>=1.18.5 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (1.23.3)\n",
"Requirement already satisfied: FuzzyTM>=0.4.0 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (2.0.5)\n",
"Requirement already satisfied: Cython==0.29.32 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (0.29.32)\n",
"Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (6.3.0)\n", "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (6.3.0)\n",
"Requirement already satisfied: scipy>=1.7.0 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (1.10.0)\n", "Requirement already satisfied: Cython==0.29.32 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (0.29.32)\n",
"Requirement already satisfied: FuzzyTM>=0.4.0 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from gensim) (2.0.5)\n",
"Requirement already satisfied: pyfume in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from FuzzyTM>=0.4.0->gensim) (0.2.25)\n", "Requirement already satisfied: pyfume in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from FuzzyTM>=0.4.0->gensim) (0.2.25)\n",
"Requirement already satisfied: pandas in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from FuzzyTM>=0.4.0->gensim) (1.5.3)\n", "Requirement already satisfied: pandas in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from FuzzyTM>=0.4.0->gensim) (1.5.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2.8.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2022.7.1)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2022.7.1)\n",
"Requirement already satisfied: simpful in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (2.10.0)\n",
"Requirement already satisfied: fst-pso in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (1.8.1)\n", "Requirement already satisfied: fst-pso in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (1.8.1)\n",
"Requirement already satisfied: simpful in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (2.10.0)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas->FuzzyTM>=0.4.0->gensim) (1.16.0)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas->FuzzyTM>=0.4.0->gensim) (1.16.0)\n",
"Requirement already satisfied: miniful in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from fst-pso->pyfume->FuzzyTM>=0.4.0->gensim) (0.0.6)\n", "Requirement already satisfied: miniful in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from fst-pso->pyfume->FuzzyTM>=0.4.0->gensim) (0.0.6)\n",
"Requirement already satisfied: requests in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.28.1)\n", "Requirement already satisfied: requests in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.28.1)\n",
@ -30,15 +46,6 @@
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2022.9.14)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2022.9.14)\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.1.1)\n" "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\андрей\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.1.1)\n"
] ]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip available: 22.2.2 -> 23.0.1\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
} }
], ],
"source": [ "source": [
@ -57,7 +64,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 3,
"id": "527c1b9a", "id": "527c1b9a",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -84,7 +91,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 4,
"id": "6f539977", "id": "6f539977",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -94,7 +101,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 5,
"id": "ca0e6543", "id": "ca0e6543",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -113,7 +120,7 @@
" 'самый_DET']" " 'самый_DET']"
] ]
}, },
"execution_count": 8, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -124,37 +131,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 6,
"id": "b7840d1e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('кошка_NOUN', 0.7570087909698486),\n",
" ('котенок_NOUN', 0.6676193475723267),\n",
" ('пес_NOUN', 0.5633267164230347),\n",
" ('мяукать_VERB', 0.561974287033081),\n",
" ('тобик_NOUN', 0.5586473941802979),\n",
" ('фоксик_NOUN', 0.5572988986968994),\n",
" ('собака_NOUN', 0.5567899942398071),\n",
" ('мяучать_VERB', 0.5535756349563599),\n",
" ('харлашка_NOUN', 0.551755428314209),\n",
" ('котяра_NOUN', 0.5508568286895752)]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"w2v_vectors.most_similar('кот_NOUN')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "de05204a", "id": "de05204a",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -239,7 +216,7 @@
" dtype=float32)" " dtype=float32)"
] ]
}, },
"execution_count": 13, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -248,6 +225,36 @@
"w2v_vectors['кот_NOUN']" "w2v_vectors['кот_NOUN']"
] ]
}, },
{
"cell_type": "code",
"execution_count": 7,
"id": "b7840d1e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('кошка_NOUN', 0.7570087909698486),\n",
" ('котенок_NOUN', 0.6676193475723267),\n",
" ('пес_NOUN', 0.5633267164230347),\n",
" ('мяукать_VERB', 0.561974287033081),\n",
" ('тобик_NOUN', 0.5586473941802979),\n",
" ('фоксик_NOUN', 0.5572988986968994),\n",
" ('собака_NOUN', 0.5567899942398071),\n",
" ('мяучать_VERB', 0.5535756349563599),\n",
" ('харлашка_NOUN', 0.551755428314209),\n",
" ('котяра_NOUN', 0.5508568286895752)]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"w2v_vectors.most_similar('кот_NOUN')"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "3f93b5f6", "id": "3f93b5f6",
@ -258,7 +265,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 8,
"id": "be870586", "id": "be870586",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -268,7 +275,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 9,
"id": "599d6406", "id": "599d6406",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -297,7 +304,7 @@
" ('lady', 0.8845519423484802)]" " ('lady', 0.8845519423484802)]"
] ]
}, },
"execution_count": 19, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -309,7 +316,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 10,
"id": "2db71cfb", "id": "2db71cfb",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -319,7 +326,7 @@
"0.60927683" "0.60927683"
] ]
}, },
"execution_count": 24, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }

@ -594,27 +594,14 @@
" -0.34222007 1.78697008 -1.45404002 2.56643 -1.32184002\n", " -0.34222007 1.78697008 -1.45404002 2.56643 -1.32184002\n",
" -1.04677537 0.27867999 -12.95450976 -1.00809997 3.15975004\n", " -1.04677537 0.27867999 -12.95450976 -1.00809997 3.15975004\n",
" -0.52662008 1.93239999 -0.89686999 -0.60924001 1.51628\n", " -0.52662008 1.93239999 -0.89686999 -0.60924001 1.51628\n",
" -3.16624993 -0.89275002 1.86969995 -1.33607102 -2.23464306]\n", " -3.16624993 -0.89275002 1.86969995 -1.33607102 -2.23464306]\n"
"glove_data: 0 1 2 3 4 5 6 7 \\\n",
"0 -1.55058 -0.081683 0.279919 0.588469 -1.00551 -0.826139 6.18643 1.44845 \n",
"0 1.73610 0.742082 0.355460 -4.744110 1.41544 -0.342220 1.78697 -1.45404 \n",
"\n",
" 8 9 ... 15 16 17 18 19 \\\n",
"0 -0.71108 -1.14717 ... -0.430875 0.872347 -0.806399 0.27203 2.23922 \n",
"0 2.56643 -1.32184 ... -0.526620 1.932400 -0.896870 -0.60924 1.51628 \n",
"\n",
" 20 21 22 23 24 \n",
"0 -1.23572 -1.310711 -1.96934 -0.176410 -0.135300 \n",
"0 -3.16625 -0.892750 1.86970 -1.336071 -2.234643 \n",
"\n",
"[2 rows x 25 columns]\n"
] ]
}, },
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\Андрей\\AppData\\Local\\Temp\\ipykernel_29476\\129113310.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", "C:\\Users\\Андрей\\AppData\\Local\\Temp\\ipykernel_8524\\2010506005.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" glove_data=glove_data.append(pd.DataFrame([one_doc]))\n" " glove_data=glove_data.append(pd.DataFrame([one_doc]))\n"
] ]
} }
@ -623,7 +610,7 @@
"# Создадим датафрейм, в который будем сохранять вектор документа\n", "# Создадим датафрейм, в который будем сохранять вектор документа\n",
"glove_data=pd.DataFrame()\n", "glove_data=pd.DataFrame()\n",
"\n", "\n",
"# Пробегаем по каждой строке (по каждому документу)\n", "# Пробегаем по каждой строке датафрейма (по каждому документу)\n",
"for i in range(CV_text_data.shape[0]):\n", "for i in range(CV_text_data.shape[0]):\n",
" \n", " \n",
" # Вектор одного документа с размерностью glove-модели:\n", " # Вектор одного документа с размерностью glove-модели:\n",
@ -636,8 +623,7 @@
" print(word, ': ', glove_model[word])\n", " print(word, ': ', glove_model[word])\n",
" one_doc += glove_model[word]\n", " one_doc += glove_model[word]\n",
" print(text_data[i], ': ', one_doc)\n", " print(text_data[i], ': ', one_doc)\n",
" glove_data=glove_data.append(pd.DataFrame([one_doc])) \n", " glove_data=glove_data.append(pd.DataFrame([one_doc])) \n"
"print('glove_data: ', glove_data)"
] ]
}, },
{ {
@ -804,33 +790,6 @@
"glove_data\n" "glove_data\n"
] ]
}, },
{
"cell_type": "code",
"execution_count": 14,
"id": "cb6edbdf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1.73610002, 0.74208201, 0.35545996, -4.74411008,\n",
" 1.41543998, -0.34222007, 1.78697008, -1.45404002,\n",
" 2.56643 , -1.32184002, -1.04677537, 0.27867999,\n",
" -12.95450976, -1.00809997, 3.15975004, -0.52662008,\n",
" 1.93239999, -0.89686999, -0.60924001, 1.51628 ,\n",
" -3.16624993, -0.89275002, 1.86969995, -1.33607102,\n",
" -2.23464306])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_doc"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 15,
@ -1217,28 +1176,6 @@
"train_data_glove" "train_data_glove"
] ]
}, },
{
"cell_type": "code",
"execution_count": 16,
"id": "3a7ea7c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<1657x23297 sparse matrix of type '<class 'numpy.int64'>'\n",
"\twith 106580 stored elements in Compressed Sparse Row format>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data\n"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 17,

Загрузка…
Отмена
Сохранить