import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.manifold import TSNE from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score # Загрузка датасета MLB mlb_data = pd.read_csv('SOCR_Data_MLB_HeightsWeights.txt', sep='\t') mlb_data.columns = ['Name', 'Team', 'Position', 'Height', 'Weight', 'Age'] mlb_pd = pd.DataFrame(data=np.c_[mlb_data[['Height', 'Weight', 'Age']]], columns=['Height (inches)', 'Weight (pounds)', 'Age (years)']) data = mlb_pd[['Height (inches)', 'Weight (pounds)', 'Age (years)']].to_numpy() # Лучшие перплексии для анализа кластеров best_perplexities = [10, 30, 60, 90] for perplexity in best_perplexities: # t-SNE data_embedded = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=perplexity, random_state=42).fit_transform(data) # Кластеризация K-means kmeans = KMeans(n_clusters=3, random_state=42, n_init=10) labels = kmeans.fit_predict(data_embedded) # Silhouette Score sil_score = silhouette_score(data_embedded, labels) # Визуализация plt.figure(figsize=(8, 6)) scatter = plt.scatter(data_embedded[:, 0], data_embedded[:, 1], c=labels, cmap='tab10', alpha=0.7, s=40) plt.title(f't-SNE + K-means\nPerplexity = {perplexity}\nSilhouette: {sil_score:.3f}') plt.xlabel('t-SNE Component 1') plt.ylabel('t-SNE Component 2') plt.colorbar(scatter, label='Cluster') plt.grid(True, alpha=0.3) plt.show() print(f"Perplexity {perplexity}: {len(np.unique(labels))} кластеров, Silhouette = {sil_score:.3f}")