diff --git a/ИАД/lr2/task2_Fisher.py b/ИАД/lr2/task2_Fisher.py new file mode 100644 index 0000000..47b4e6c --- /dev/null +++ b/ИАД/lr2/task2_Fisher.py @@ -0,0 +1,64 @@ +import numpy as np +from matplotlib import pyplot as plt +from scipy.cluster.hierarchy import dendrogram +import pandas as pd +from sklearn.cluster import AgglomerativeClustering +from sklearn.datasets import load_iris +from scipy.spatial import distance + +iris = load_iris() +iris_pd=pd.DataFrame(data=np.c_[iris['data']], columns=iris['feature_names']) +data = iris_pd[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)']].to_numpy() + +# расчет матрицы расстояний Чебышева +distance_matrix = np.zeros((len(data), len(data))) +for i in range(len(data)): + for j in range(i+1, len(data)): + distance_matrix[i][j] = distance.chebyshev(data[i], data[j]) + distance_matrix[j][i] = distance_matrix[i][j] + +def plot_dendrogram(model, **kwargs): + # Create linkage matrix and then plot the dendrogram + + # create the counts of samples under each node + counts = np.zeros(model.children_.shape[0]) + n_samples = len(model.labels_) + for i, merge in enumerate(model.children_): + current_count = 0 + for child_idx in merge: + if child_idx < n_samples: + current_count += 1 # leaf node + else: + current_count += counts[child_idx - n_samples] + counts[i] = current_count + + linkage_matrix = np.column_stack( + [model.children_, model.distances_, counts] + ).astype(float) + + # Plot the corresponding dendrogram + dendrogram(linkage_matrix, **kwargs) + + +# setting distance_threshold=0 ensures we compute the full tree. +metric='precomputed' +linkage="single" +model = AgglomerativeClustering(compute_distances=True, metric=metric, linkage=linkage) + +model = model.fit(distance_matrix) +print(model.labels_) + +plt.title('Hierarchical Clustering Dendrogram \n metric="{}", linkage="{}'.format(metric, linkage)) +# plot the top three levels of the dendrogram +plot_dendrogram(model, truncate_mode="level", p=3) +plt.xlabel("Number of points in node") + +fig1 = plt.figure() +ax = plt.axes(projection='3d') +ax.scatter3D(iris_pd['petal length (cm)'], iris_pd['petal width (cm)'], iris_pd['sepal length (cm)'], c = model.labels_, cmap='tab10') +ax.set_title('Agglomerative Clustering \n metric="{}", linkage="{}"'.format(metric, linkage)) +ax.set_xlabel('petal length (cm)') +ax.set_ylabel('petal width (cm)') +ax.set_zlabel('sepal length (cm)') + +plt.show() \ No newline at end of file diff --git a/ИАД/lr2/task3_Fisher.py b/ИАД/lr2/task3_Fisher.py new file mode 100644 index 0000000..6c4cd95 --- /dev/null +++ b/ИАД/lr2/task3_Fisher.py @@ -0,0 +1,26 @@ +import numpy as np +from matplotlib import pyplot as plt +import pandas as pd +from sklearn.datasets import load_iris +from sklearn.decomposition import PCA + + +iris = load_iris() +iris_pd=pd.DataFrame(data=np.c_[iris['data']], columns=iris['feature_names']) +data = iris_pd[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)']].to_numpy() + +pca = PCA(n_components=2) +pca.fit(data) +principalComponents = pca.fit_transform(data) + +# вывод объясненной дисперсии +print(pca.explained_variance_ratio_) + +pc_df = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2']) + + +plt.scatter(pc_df['PC1'], pc_df['PC2']) +plt.xlabel('PC1') +plt.ylabel('PC2') +plt.title("Projection on First Two Principal Components") +plt.show() \ No newline at end of file diff --git a/ИАД/lr2/task4_Fisher.py b/ИАД/lr2/task4_Fisher.py new file mode 100644 index 0000000..179da26 --- /dev/null +++ b/ИАД/lr2/task4_Fisher.py @@ -0,0 +1,19 @@ +import numpy as np +from matplotlib import pyplot as plt +import pandas as pd +from sklearn.datasets import load_iris +from sklearn.manifold import TSNE + + +iris = load_iris() +iris_pd=pd.DataFrame(data=np.c_[iris['data']], columns=iris['feature_names']) +data = iris_pd[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)']].to_numpy() + +data_embedded = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3).fit_transform(data) +print(data_embedded) + +plt.scatter(data_embedded[:, 0], data_embedded[:, 1]) +plt.title('Visualization using t-SNE') + + +plt.show() \ No newline at end of file diff --git a/ИАД/lr2/task6_Fisher.py b/ИАД/lr2/task6_Fisher.py new file mode 100644 index 0000000..3f551b8 --- /dev/null +++ b/ИАД/lr2/task6_Fisher.py @@ -0,0 +1,24 @@ +import numpy as np +from matplotlib import pyplot as plt +import pandas as pd +from sklearn.datasets import load_iris +from sklearn.cluster import KMeans + + +iris = load_iris() +iris_pd=pd.DataFrame(data=np.c_[iris['data']], columns=iris['feature_names']) +data = iris_pd[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)']].to_numpy() + + +model = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(data) + + +fig1 = plt.figure() +ax = plt.axes(projection='3d') +ax.scatter3D(iris_pd['petal length (cm)'], iris_pd['petal width (cm)'], iris_pd['sepal length (cm)'], c = model.labels_, cmap='tab10') +ax.set_title('K-means Method \n n_clusters=2') +ax.set_xlabel('petal length (cm)') +ax.set_ylabel('petal width (cm)') +ax.set_zlabel('sepal length (cm)') + +plt.show() \ No newline at end of file diff --git a/ИАД/lr2/task_1_Fisher.py b/ИАД/lr2/task_1_Fisher.py new file mode 100644 index 0000000..f055b55 --- /dev/null +++ b/ИАД/lr2/task_1_Fisher.py @@ -0,0 +1,21 @@ +from sklearn.datasets import load_iris +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np + +# Датасет Ирисы Фишера +iris = load_iris() +iris_pd=pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target']) + + + +fig = plt.figure() +ax = plt.axes(projection='3d') + +ax.scatter3D(iris_pd['petal length (cm)'], iris_pd['petal width (cm)'], iris_pd['sepal length (cm)']) +ax.set_title('3D Scatter Plot') +ax.set_xlabel('petal length (cm)') +ax.set_ylabel('petal width (cm)') +ax.set_zlabel('sepal length (cm)') + +plt.show() \ No newline at end of file