Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

254 KiB

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics.pairwise import euclidean_distances

X,y = datasets.make_blobs(n_samples=100, centers = 4, random_state = 1 )

plt.scatter (X[:,0], X[:,1])
plt.show()

from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
import matplotlib.pyplot as plt
import pandas as pd
 

# Реализация иерархической кластеризации при помощи функции linkage

mergings = linkage(X, method='complete')
 
# Строим дендрограмму, указав параметры удобные для отображения

plt.figure(figsize=(15, 10))
dendrogram(mergings)
plt.show()
 

T = fcluster (mergings,7, 'distance')
print (T)
[2 3 2 3 1 1 4 4 4 2 4 4 3 4 2 4 4 2 1 1 2 2 2 3 1 4 4 2 3 1 1 2 1 3 2 1 2
 4 3 2 2 4 4 3 2 1 2 3 2 1 3 1 1 3 4 2 1 1 2 3 4 1 3 3 4 2 4 3 4 1 3 2 2 4
 3 1 3 2 4 1 3 2 2 2 4 1 1 1 1 3 1 4 1 3 3 2 4 4 3 4]
plt.figure(figsize=(10, 10))
plt.scatter (X[:,0], X[:,1], c=T)
plt.show()

def update_cluster_centers(X, c):
    ix = np.where(c==1)
    mu[0,:] = np.mean(X[ix,:], axis=1)
    ix = np.where(c==2)
    mu[1,:] = np.mean(X[ix,:], axis=1)
    ix = np.where(c==3)
    mu[2,:] = np.mean(X[ix,:], axis=1)
    ix = np.where(c==4)
    mu[3,:] = np.mean(X[ix,:], axis=1)
    return mu
mu = np.array([[0.0,0], [0,0], [0,0], [0,0]])
mu = update_cluster_centers(X, T)
print(mu)
[[-1.56129966  4.21898437]
 [-7.07614521 -7.97322589]
 [-9.96347347 -3.48454825]
 [-5.80277079 -3.06555518]]
plt.figure(figsize = (10,10))
plt.scatter (X[:,0], X[:,1], c=T)
plt.scatter(mu[:,0],mu[:,1], c = 'red', marker = 'o')
plt.show()

c1 = np.where(T==1)
print ('objects of cluster #i:' ,c1)
s = np.mean(X[c1,0])
print (s)
print ('Dataset: ' ,X)

print('centroids: ' ,mu)
print('Number of centroids:', np.shape(mu)[0])
print('Number of obj in cluster #i:', np.shape(c1)[1]) 
objects of cluster #i: (array([ 4,  5, 18, 19, 24, 29, 30, 32, 35, 45, 49, 51, 52, 56, 57, 61, 69,
       75, 79, 85, 86, 87, 88, 90, 92], dtype=int64),)
-1.5612996617417532
Dataset:  [[-8.92286405e+00 -6.91706407e+00]
 [-1.02200406e+01 -4.15410662e+00]
 [-6.19599603e+00 -7.40281646e+00]
 [-8.86608312e+00 -2.43353173e+00]
 [ 8.52518583e-02  3.64528297e+00]
 [-7.57969185e-01  4.90898421e+00]
 [-5.11445720e+00 -2.71929274e+00]
 [-4.94333866e+00 -3.37609332e+00]
 [-6.41124052e+00 -3.20783965e+00]
 [-6.26302115e+00 -8.10666081e+00]
 [-5.28846058e+00 -2.87525156e+00]
 [-6.92171246e+00 -2.18729857e+00]
 [-8.79879462e+00 -3.76819213e+00]
 [-6.25738636e+00 -4.21080419e+00]
 [-5.79112625e+00 -6.18612636e+00]
 [-5.99413070e+00 -3.16189816e+00]
 [-5.77161096e+00 -1.79555958e+00]
 [-6.98706106e+00 -7.53484784e+00]
 [-2.33080604e+00  4.39382527e+00]
 [-1.60875215e+00  3.76949422e+00]
 [-6.21872154e+00 -9.01274405e+00]
 [-8.43799950e+00 -7.83806871e+00]
 [-7.10357769e+00 -9.76900046e+00]
 [-9.83767543e+00 -3.07717963e+00]
 [-1.97451969e-01  2.34634916e+00]
 [-5.71203468e+00 -2.84804837e+00]
 [-3.74647007e+00 -3.33742024e+00]
 [-6.02605758e+00 -5.96624846e+00]
 [-1.06177133e+01 -3.25531651e+00]
 [-2.80207810e+00  4.05714715e+00]
 [-1.98197711e+00  4.02243551e+00]
 [-7.25145196e+00 -8.25497398e+00]
 [-2.35122066e+00  4.00973634e+00]
 [-1.02066067e+01 -3.36672536e+00]
 [-6.83238762e+00 -7.47067670e+00]
 [-2.40671820e+00  6.09894447e+00]
 [-6.27207532e+00 -8.77675883e+00]
 [-5.77958445e+00 -3.26348862e+00]
 [-1.03729975e+01 -4.59207895e+00]
 [-5.94346448e+00 -7.74432757e+00]
 [-8.29494032e+00 -7.60269061e+00]
 [-5.46384410e+00 -2.04434336e+00]
 [-4.37013706e+00 -1.97772876e+00]
 [-9.57421815e+00 -3.87600848e+00]
 [-6.32432573e+00 -9.10692871e+00]
 [-5.25790464e-01  3.30659860e+00]
 [-6.54430585e+00 -9.29756949e+00]
 [-1.06683748e+01 -3.57578476e+00]
 [-6.71433620e+00 -9.46551152e+00]
 [-7.58703957e-01  3.72276201e+00]
 [-1.14418263e+01 -4.45781441e+00]
 [ 2.42271161e-04  5.14853403e+00]
 [-1.92744799e+00  4.93684534e+00]
 [-9.81115111e+00 -3.54329690e+00]
 [-6.02599661e+00 -3.38542661e+00]
 [-7.37499896e+00 -1.05880659e+01]
 [-1.34052081e+00  4.15711949e+00]
 [-1.35938959e+00  4.05424002e+00]
 [-8.26415022e+00 -7.28988279e+00]
 [-1.04448411e+01 -2.72884084e+00]
 [-4.08409604e+00 -4.98514638e+00]
 [-2.76017908e+00  5.55121358e+00]
 [-9.50919436e+00 -4.02892026e+00]
 [-9.79941278e+00 -3.83433990e+00]
 [-6.23112678e+00 -3.31509970e+00]
 [-5.43723143e+00 -7.81521641e+00]
 [-7.54735453e+00 -2.77523774e+00]
 [-9.48478268e+00 -4.25144138e+00]
 [-6.38524280e+00 -3.70614752e+00]
 [-1.61734616e+00  4.98930508e+00]
 [-1.03415662e+01 -3.90975169e+00]
 [-8.15979403e+00 -7.98384567e+00]
 [-7.33110069e+00 -8.12061356e+00]
 [-6.83710120e+00 -1.13390738e+00]
 [-9.71212518e+00 -3.06820738e+00]
 [-1.85139546e+00  3.51886090e+00]
 [-1.03039165e+01 -3.12537390e+00]
 [-6.53541686e+00 -8.01552689e+00]
 [-5.59472593e+00 -3.40858706e+00]
 [-7.94152277e-01  2.10495117e+00]
 [-9.59422086e+00 -3.35977002e+00]
 [-7.81213710e+00 -5.34984488e+00]
 [-7.24580249e+00 -8.75714873e+00]
 [-9.68207756e+00 -5.97554976e+00]
 [-5.67247649e+00 -2.66850326e+00]
 [-2.34673261e+00  3.56128423e+00]
 [-1.83198811e+00  3.52863145e+00]
 [-1.53940095e+00  5.02369298e+00]
 [-1.78245013e+00  3.47072043e+00]
 [-9.76761777e+00 -3.19133737e+00]
 [-2.77687025e+00  4.64090557e+00]
 [-5.61574598e+00 -4.71622380e+00]
 [-1.46864442e+00  6.50674501e+00]
 [-9.15872909e+00 -3.02224647e+00]
 [-9.87589123e+00 -2.82386464e+00]
 [-7.08949914e+00 -8.92838972e+00]
 [-5.83343133e+00 -3.18894069e+00]
 [-6.79189023e+00 -4.08581229e+00]
 [-1.07521104e+01 -2.70048039e+00]
 [-6.67567396e+00 -2.26477984e+00]]
centroids:  [[-1.56129966  4.21898437]
 [-7.07614521 -7.97322589]
 [-9.96347347 -3.48454825]
 [-5.80277079 -3.06555518]]
Number of centroids: 4
Number of obj in cluster #i: 25
#Сумма квадратов расстояний до центроида

#print (X[c1,:])
print(c1)

print(np.shape(c1)[1])
#print ( 'dist between 2 points', euclidean_distances(X[c1[0][0],:].reshape(1,-1), X[c1[0][3],:].reshape(1,-1)))
cluster_dist=0
for j in range(0, np.shape(mu)[0]):
    summ = 0
    obj = np.where(T==j+1)
    for i in range(0, np.shape(obj)[1]):
        #print(euclidean_distances(mu[j].reshape(1,-1), X[obj[0][i],:].reshape(1,-1)))
        summ = summ + (euclidean_distances(mu[j].reshape(1,-1), X[obj[0][i],:].reshape(1,-1)))**2
    if(summ>0):
        summ = summ
    cluster_dist = cluster_dist + summ
    print(j,' custer dist: ', summ)
print ("Summary of squared cluster dist: ", cluster_dist)
(array([ 4,  5, 18, 19, 24, 29, 30, 32, 35, 45, 49, 51, 52, 56, 57, 61, 69,
       75, 79, 85, 86, 87, 88, 90, 92], dtype=int64),)
25
0  custer dist:  [[43.11598405]]
1  custer dist:  [[67.14489804]]
2  custer dist:  [[16.25475117]]
3  custer dist:  [[38.36151505]]
Summary of squared cluster dist:  [[164.87714831]]

K-means

from sklearn.cluster import KMeans
model = KMeans(n_clusters=3)
model.fit(X)
all_predictions = model.predict(X)
print (all_predictions)
[2 0 2 0 1 1 0 0 0 2 0 0 0 0 2 0 0 2 1 1 2 2 2 0 1 0 0 2 0 1 1 2 1 0 2 1 2
 0 0 2 2 0 0 0 2 1 2 0 2 1 0 1 1 0 0 2 1 1 2 0 0 1 0 0 0 2 0 0 0 1 0 2 2 0
 0 1 0 2 0 1 0 0 2 0 0 1 1 1 1 0 1 0 1 0 0 2 0 0 0 0]
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
plt.scatter (X[:,0], X[:,1], c=all_predictions)
plt.show()

print('Sum of squared distances of samples to their closest cluster center.:', model.inertia_)
Sum of squared distances of samples to their closest cluster center.: 766.1594927527818
inertia = []
for k in range(1,10):
    kmeans = KMeans(n_clusters=k, random_state=1).fit(X)
    inertia.append((kmeans.inertia_))
print (inertia)
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
C:\Users\Андрей\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
[3015.7692276196085, 766.1594927527818, 369.5556880065657, 158.76686919578418, 136.07776106430828, 119.41082903949317, 102.06541733084515, 90.40896447881985, 79.46925530063092]
plt.figure(figsize=(5,5))
plt.plot(range (1,10), inertia, marker='s')

plt.show
<function matplotlib.pyplot.show(close=None, block=None)>

plt.scatter (X[:,0], X[:,1], c=kmeans.predict(X))
plt.show()