Вы не можете выбрать более 25 тем
Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
16 KiB
16 KiB
!pip install gensim
Requirement already satisfied: gensim in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (4.3.0)
[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: scipy>=1.7.0 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from gensim) (1.10.0)
Requirement already satisfied: numpy>=1.18.5 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from gensim) (1.23.3)
Requirement already satisfied: smart-open>=1.8.1 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from gensim) (6.3.0)
Requirement already satisfied: Cython==0.29.32 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from gensim) (0.29.32)
Requirement already satisfied: FuzzyTM>=0.4.0 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from gensim) (2.0.5)
Requirement already satisfied: pyfume in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from FuzzyTM>=0.4.0->gensim) (0.2.25)
Requirement already satisfied: pandas in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from FuzzyTM>=0.4.0->gensim) (1.5.3)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2022.7.1)
Requirement already satisfied: fst-pso in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (1.8.1)
Requirement already satisfied: simpful in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (2.10.0)
Requirement already satisfied: six>=1.5 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from python-dateutil>=2.8.1->pandas->FuzzyTM>=0.4.0->gensim) (1.16.0)
Requirement already satisfied: miniful in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from fst-pso->pyfume->FuzzyTM>=0.4.0->gensim) (0.0.6)
Requirement already satisfied: requests in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.28.1)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (1.26.12)
Requirement already satisfied: idna<4,>=2.5 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (3.4)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2022.9.14)
Requirement already satisfied: charset-normalizer<3,>=2 in c:\users\андрей\appdata\local\programs\python\python39\lib\site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.1.1)
import gensim.downloader
print(list(gensim.downloader.info()['models'].keys()))
['fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', '__testing_word2vec-matrix-synopsis']
Word2Vec
= gensim.downloader.load('word2vec-ruscorpora-300') w2v_vectors
list(w2v_vectors.key_to_index.keys())[:10]
['весь_DET',
'человек_NOUN',
'мочь_VERB',
'год_NOUN',
'сказать_VERB',
'время_NOUN',
'говорить_VERB',
'становиться_VERB',
'знать_VERB',
'самый_DET']
'кот_NOUN'] w2v_vectors[
array([-1.52633622e-01, -6.22178875e-02, 8.02985206e-02, 3.76203880e-02,
-8.09977110e-03, -6.56392053e-02, 5.08799739e-02, 6.61313012e-02,
1.57197528e-02, 9.78986733e-03, 4.73552682e-02, -2.55929027e-02,
1.05717339e-01, -2.22761724e-02, 5.41505031e-02, -3.82993110e-02,
5.26556484e-02, 1.17264939e-02, -1.46230776e-02, -1.95544884e-02,
5.04205190e-02, -3.71097960e-02, 3.57442684e-02, 4.96631972e-02,
-5.57994805e-02, -3.11674438e-02, -2.20739599e-02, 1.07113965e-01,
-9.91705209e-02, -4.57583293e-02, -9.96095166e-02, 4.80931476e-02,
-1.33494905e-03, 3.51430699e-02, 2.42795311e-02, -2.34595835e-02,
5.17160492e-03, -2.06816625e-02, 4.38127927e-02, -3.31711844e-02,
-2.07874626e-02, 6.72167316e-02, -7.74500072e-02, 2.93545369e-02,
-1.46178985e-02, 4.10723649e-02, 8.69638026e-02, -3.46537703e-03,
3.90354246e-02, -2.03978154e-03, 5.43198660e-02, 7.57279024e-02,
1.48434611e-02, 8.33871886e-02, -2.87217349e-02, -3.09202913e-03,
-7.93954656e-02, -2.82405037e-02, -1.64566293e-01, -1.17127458e-02,
-2.68191863e-02, -1.14840917e-01, 4.07641158e-02, -1.52551448e-02,
1.05389841e-01, -2.80199181e-02, -1.25609236e-02, 1.09363765e-01,
-1.05669824e-02, 1.92236323e-02, -2.05025654e-02, 3.25121842e-02,
3.57208811e-02, -2.52568591e-02, 2.24481337e-02, 5.09182140e-02,
6.63011149e-02, -6.96184263e-02, -5.87991485e-03, 3.19263488e-02,
2.67947633e-02, 5.35315834e-02, 5.44695035e-02, 2.58983169e-02,
-7.08631724e-02, 1.04762614e-01, -6.68804273e-02, -1.38250962e-02,
1.44148827e-01, 6.52979612e-02, 1.60416458e-02, -2.04468183e-02,
3.70856933e-02, -3.04988828e-02, 1.09351687e-01, 1.64980050e-02,
2.36458685e-02, -1.01091415e-02, -6.50116727e-02, -1.13031827e-01,
-1.19736008e-01, -5.59152151e-03, 1.64195765e-02, 8.24512169e-03,
-8.84061214e-03, 7.30062574e-02, 2.95458623e-04, 3.91627736e-02,
6.22012243e-02, 1.01540620e-02, -2.01074360e-03, 9.14960168e-03,
-2.40149889e-02, -7.16753602e-02, -8.49208906e-02, 5.45662642e-02,
2.19109673e-02, 9.25432891e-03, 2.24880818e-02, -3.62291490e-03,
8.57939944e-02, -5.56841269e-02, -1.16740711e-01, 2.33066957e-02,
-8.18690881e-02, -1.44955916e-02, 3.33725065e-02, 3.03953364e-02,
2.25391071e-02, -3.46978344e-02, -6.41057938e-02, 7.33885840e-02,
-2.90144072e-03, -2.75960714e-02, -2.21674796e-02, -3.96765396e-02,
-3.22195105e-02, 4.82296161e-02, 4.16103862e-02, 3.63796987e-02,
2.58319732e-02, 7.23602101e-02, 1.09503092e-03, 8.37009493e-03,
5.09082936e-02, -3.29718776e-02, -5.68303093e-02, 1.01079745e-02,
-8.52582380e-02, 1.99150909e-02, 2.33987775e-02, -3.49289179e-02,
-2.18948033e-02, -1.17089637e-02, 1.78485103e-02, -5.88125037e-03,
2.24573947e-02, -7.76379481e-02, -2.46963687e-02, 2.34957393e-02,
-7.47927353e-02, -3.52633633e-02, 6.65142164e-02, -2.21630055e-02,
9.85186771e-02, -4.27325964e-02, 2.38673016e-02, 3.69326621e-02,
5.19271940e-03, -4.75301892e-02, -1.99485421e-02, 2.70965626e-03,
-7.23582553e-03, 8.48396868e-02, 6.64435774e-02, -9.35326666e-02,
4.94468771e-02, 8.26572999e-02, -1.33822160e-02, -5.32249734e-03,
4.29970361e-02, 8.93590376e-02, -1.27462680e-02, 2.74799261e-02,
-3.33027355e-02, 4.35785688e-02, 4.56295535e-02, 3.17847766e-02,
-9.68080908e-02, -6.77153543e-02, -9.52497870e-02, -8.87092575e-03,
-4.08960059e-02, -5.09431772e-02, 2.54585221e-02, 5.80319017e-02,
5.08921407e-02, -5.23761436e-02, -2.77449843e-02, 7.23702163e-02,
-9.36738960e-03, 8.10077041e-03, 3.52279693e-02, -1.19305283e-01,
-3.82529870e-02, -8.29238147e-02, -8.81364495e-02, 1.62167493e-02,
2.68793292e-02, -3.83929200e-02, -2.57957950e-02, -1.86822563e-02,
-5.47099225e-02, -5.65230772e-02, -1.98926777e-02, 3.54687981e-02,
1.35690883e-01, 8.04331973e-02, 1.92622133e-02, 5.81734739e-02,
-5.02377190e-02, 2.47635460e-03, -5.33336513e-02, 4.08107415e-02,
1.18754342e-01, -7.40583912e-02, 7.48252273e-02, 1.46314219e-01,
6.73391623e-03, -1.98812839e-02, -2.93681423e-05, -2.12224070e-02,
1.70804688e-03, 3.52822542e-02, -1.65668026e-01, -4.84176865e-03,
1.21439025e-02, 8.64505395e-02, -1.57235548e-01, 7.75721148e-02,
5.35202436e-02, 1.17224073e-02, -7.53299072e-02, -3.44986990e-02,
-1.58868451e-02, 7.00481758e-02, 7.96044394e-02, -4.09048088e-02,
-1.46982130e-02, -1.24979429e-01, -4.20956686e-02, -8.43289569e-02,
-6.92764968e-02, 5.16316369e-02, 2.03369856e-02, -4.73499410e-02,
9.15571675e-02, -5.96052743e-02, 1.10012911e-01, 2.55208667e-02,
-8.69148783e-03, -7.76273850e-03, 4.98862900e-02, 9.31067672e-03,
-3.49833667e-02, 1.33375779e-01, 8.40289332e-03, -3.45170535e-02,
-3.47062238e-02, -9.73994732e-02, -2.54784450e-02, -1.39390659e-02,
-3.32783237e-02, 9.36794057e-02, 3.47191617e-02, 2.80651636e-02,
6.58571906e-03, 3.73428725e-02, -3.32412347e-02, -9.73492190e-02,
-7.07265735e-02, -7.01062232e-02, 3.67225669e-02, -2.62719765e-02,
5.82991205e-02, -7.42069781e-02, 1.66096780e-02, -8.83689746e-02,
-1.62591994e-01, 4.79482487e-02, 5.83929494e-02, -1.04699671e-01,
3.52650951e-03, 2.50546616e-02, 3.84298228e-02, -4.36684191e-02,
5.68282753e-02, 6.57160487e-03, -3.02405991e-02, 2.51490474e-02],
dtype=float32)
'кот_NOUN') w2v_vectors.most_similar(
[('кошка_NOUN', 0.7570087909698486),
('котенок_NOUN', 0.6676193475723267),
('пес_NOUN', 0.5633267164230347),
('мяукать_VERB', 0.561974287033081),
('тобик_NOUN', 0.5586473941802979),
('фоксик_NOUN', 0.5572988986968994),
('собака_NOUN', 0.5567899942398071),
('мяучать_VERB', 0.5535756349563599),
('харлашка_NOUN', 0.551755428314209),
('котяра_NOUN', 0.5508568286895752)]
GloVe
= gensim.downloader.load("glove-twitter-25") # load glove vectors glove_model
print(glove_model['cat']) # word embedding for 'cat'
"cat") # show words that similar to word 'cat' glove_model.most_similar(
[-0.96419 -0.60978 0.67449 0.35113 0.41317 -0.21241 1.3796
0.12854 0.31567 0.66325 0.3391 -0.18934 -3.325 -1.1491
-0.4129 0.2195 0.8706 -0.50616 -0.12781 -0.066965 0.065761
0.43927 0.1758 -0.56058 0.13529 ]
[('dog', 0.9590820074081421),
('monkey', 0.920357882976532),
('bear', 0.9143136739730835),
('pet', 0.9108031392097473),
('girl', 0.8880629539489746),
('horse', 0.8872726559638977),
('kitty', 0.8870542049407959),
('puppy', 0.886769711971283),
('hot', 0.886525571346283),
('lady', 0.8845519423484802)]
'cat', 'bus') glove_model.similarity(
0.60927683