1.5 MiB
Лабораторная работа №1. Бинарная классификация фактографических данных
Ли Тэ Хо А-02-22
Вариант 8: moons, random_state=15, noise=0.2, n_samples=1000
Импорт библиотек
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import pandas as pdФункция для отображения области принятия решений
def plot_2d_separator(classifier, X, fill=False, line=True, ax=None, eps=None):
if eps is None:
eps = 1.0
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx = np.linspace(x_min, x_max, 100)
yy = np.linspace(y_min, y_max, 100)
X1, X2 = np.meshgrid(xx, yy)
X_grid = np.c_[X1.ravel(), X2.ravel()]
try:
decision_values = classifier.decision_function(X_grid)
levels = [0]
fill_levels = [decision_values.min(), 0, decision_values.max()]
except AttributeError:
decision_values = classifier.predict_proba(X_grid)[:, 1]
levels = [.5]
fill_levels = [0, .5, 1]
if ax is None:
ax = plt.gca()
if fill:
ax.contourf(X1, X2, decision_values.reshape(X1.shape),
levels=fill_levels, colors=['cyan', 'pink', 'yellow'])
if line:
ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels, colors="black")
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_xticks(())
ax.set_yticks(())Генерация и знакомство с выборкой
Сгенерируем данные с параметрами варианта 8:
n_samples = 1000— количество объектовnoise = 0.2— уровень шумаrandom_state = 15— основа для генерации случайных чисел
X, y = make_moons(n_samples=1000, noise=0.2, random_state=15)Выведем первые 15 элементов выборки:
print("Координаты точек:")
print(X[:15])
print("Метки класса:")
print(y[:15])Координаты точек:
,[[ 1.7271961 -0.39285757]
, [-0.91801735 0.81910014]
, [-0.91532959 -0.05460812]
, [ 0.14537408 0.2064726 ]
, [ 0.95552152 0.20921022]
, [ 1.85825106 -0.35738814]
, [ 0.0761107 0.90867532]
, [-0.66311624 1.08115035]
, [ 0.13798809 0.98723143]
, [ 1.85704117 0.34111441]
, [ 0.17994761 0.15344022]
, [ 0.91666297 0.49152481]
, [ 1.25585707 -0.50035284]
, [ 1.11412853 -0.36151518]
, [-0.41601705 0.82276341]]
,Метки класса:
,[1 0 0 1 0 1 0 0 0 1 1 0 1 1 0]
Отобразим на графике сгенерированную выборку:
Разбиение на обучающее и тестовое множества
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)Отобразим обучающую и тестовую выборки:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train)
axes[0].set_title('Обучающая выборка (75%)')
axes[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test)
axes[1].set_title('Тестовая выборка (25%)')
plt.tight_layout()
plt.show()Обучение моделей и классификация
def evaluate_classifier(clf, X_train, y_train, X_test, y_test, X_all, y_all, name):
"""Обучение, предсказание, вывод метрик и области принятия решений."""
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
print(f'=== {name} ===')
print('Предсказанные метки:')
print(prediction)
print('Истинные метки:')
print(y_test)
print('\nConfusion matrix:')
print(confusion_matrix(y_test, prediction))
print('\nAccuracy score:', accuracy_score(y_test, prediction))
print('\nClassification report:')
print(classification_report(y_test, prediction))
auc = roc_auc_score(y_test, prediction)
print('AUC ROC:', auc)
plt.figure()
plt.xlabel('first feature')
plt.ylabel('second feature')
plot_2d_separator(clf, X_all, fill=True)
plt.scatter(X_all[:, 0], X_all[:, 1], c=y_all, s=70)
plt.title(name)
plt.show()
report = classification_report(y_test, prediction, output_dict=True)
return {
'Метод': name,
'Precision': round(report['weighted avg']['precision'], 4),
'Recall': round(report['weighted avg']['recall'], 4),
'F1': round(report['weighted avg']['f1-score'], 4),
'Accuracy': round(accuracy_score(y_test, prediction), 4),
'AUC ROC': round(auc, 4)
}KNN (k=1)
results = []
results.append(evaluate_classifier(
KNeighborsClassifier(n_neighbors=1, metric='euclidean'),
X_train, y_train, X_test, y_test, X, y, 'KNN (k=1)'))=== KNN (k=1) ===
,Предсказанные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0
, 0 1 1 0 1 0 0 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[119 5]
, [ 4 122]]
,
,Accuracy score: 0.964
,
,Classification report:
, precision recall f1-score support
,
, 0 0.97 0.96 0.96 124
, 1 0.96 0.97 0.96 126
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9639656938044036
KNN (k=3)
results.append(evaluate_classifier(
KNeighborsClassifier(n_neighbors=3, metric='euclidean'),
X_train, y_train, X_test, y_test, X, y, 'KNN (k=3)'))=== KNN (k=3) ===
,Предсказанные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0
, 0 1 1 0 1 1 0 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[118 6]
, [ 4 122]]
,
,Accuracy score: 0.96
,
,Classification report:
, precision recall f1-score support
,
, 0 0.97 0.95 0.96 124
, 1 0.95 0.97 0.96 126
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9599334357398874
KNN (k=9)
results.append(evaluate_classifier(
KNeighborsClassifier(n_neighbors=9, metric='euclidean'),
X_train, y_train, X_test, y_test, X, y, 'KNN (k=9)'))=== KNN (k=9) ===
,Предсказанные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0
, 0 1 1 0 1 0 0 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[117 7]
, [ 4 122]]
,
,Accuracy score: 0.956
,
,Classification report:
, precision recall f1-score support
,
, 0 0.97 0.94 0.96 124
, 1 0.95 0.97 0.96 126
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9559011776753714
Наивный байесовский метод
results.append(evaluate_classifier(
GaussianNB(),
X_train, y_train, X_test, y_test, X, y, 'Naive Bayes'))=== Naive Bayes ===
,Предсказанные метки:
,[0 1 1 1 0 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 0 0 1 1 0 1 0 1 0 1 1 0 0 0 1 0 0 1 0 1 1 0 1 1 1 1 0 1 0 0
, 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 0 1 0 1 1 0 1 1 0 0 1 1 0 0
, 0 1 1 0 0 1 0 1 0 1 1 1 1 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 0 1 1 0 1 0 1 0 1
, 0 0 1 1 0 1 1 0 0 0 0 1 0 1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0
, 0 1 0 0 1 1 0 1 0 1 0 1 1 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 1 1 0 0 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 1 1 1 1 0 1 1]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[104 20]
, [ 17 109]]
,
,Accuracy score: 0.852
,
,Classification report:
, precision recall f1-score support
,
, 0 0.86 0.84 0.85 124
, 1 0.84 0.87 0.85 126
,
, accuracy 0.85 250
, macro avg 0.85 0.85 0.85 250
,weighted avg 0.85 0.85 0.85 250
,
,AUC ROC: 0.85189452124936
Случайный лес (n_estimators=5)
results.append(evaluate_classifier(
RandomForestClassifier(n_estimators=5, random_state=42),
X_train, y_train, X_test, y_test, X, y, 'Random Forest (n=5)'))=== Random Forest (n=5) ===
,Предсказанные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 1 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 1 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0
, 0 1 1 0 1 1 0 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 1 1 0 1 1 0 1 0 1 1]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[113 11]
, [ 3 123]]
,
,Accuracy score: 0.944
,
,Classification report:
, precision recall f1-score support
,
, 0 0.97 0.91 0.94 124
, 1 0.92 0.98 0.95 126
,
, accuracy 0.94 250
, macro avg 0.95 0.94 0.94 250
,weighted avg 0.95 0.94 0.94 250
,
,AUC ROC: 0.9437403993855606
Случайный лес (n_estimators=15)
results.append(evaluate_classifier(
RandomForestClassifier(n_estimators=15, random_state=42),
X_train, y_train, X_test, y_test, X, y, 'Random Forest (n=15)'))=== Random Forest (n=15) ===
,Предсказанные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0
, 0 1 1 0 1 1 0 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[117 7]
, [ 2 124]]
,
,Accuracy score: 0.964
,
,Classification report:
, precision recall f1-score support
,
, 0 0.98 0.94 0.96 124
, 1 0.95 0.98 0.96 126
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9638376856118792
Случайный лес (n_estimators=50)
results.append(evaluate_classifier(
RandomForestClassifier(n_estimators=50, random_state=42),
X_train, y_train, X_test, y_test, X, y, 'Random Forest (n=50)'))=== Random Forest (n=50) ===
,Предсказанные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0
, 0 1 1 0 1 1 0 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,Истинные метки:
,[0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
, 0 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0
, 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0
, 0 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1
, 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0
, 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0
, 0 1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1]
,
,Confusion matrix:
,[[117 7]
, [ 2 124]]
,
,Accuracy score: 0.964
,
,Classification report:
, precision recall f1-score support
,
, 0 0.98 0.94 0.96 124
, 1 0.95 0.98 0.96 126
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9638376856118792
Сводная таблица результатов (эксперимент 1, random_state=42)
df_results_1 = pd.DataFrame(results).set_index('Метод')
df_results_1| Precision | Recall | F1 | Accuracy | AUC ROC | |
|---|---|---|---|---|---|
| Метод | |||||
| KNN (k=1) | 0.9640 | 0.964 | 0.9640 | 0.964 | 0.9640 |
| KNN (k=3) | 0.9601 | 0.960 | 0.9600 | 0.960 | 0.9599 |
| KNN (k=9) | 0.9563 | 0.956 | 0.9560 | 0.956 | 0.9559 |
| Naive Bayes | 0.8522 | 0.852 | 0.8520 | 0.852 | 0.8519 |
| Random Forest (n=5) | 0.9458 | 0.944 | 0.9439 | 0.944 | 0.9437 |
| Random Forest (n=15) | 0.9647 | 0.964 | 0.9640 | 0.964 | 0.9638 |
| Random Forest (n=50) | 0.9647 | 0.964 | 0.9640 | 0.964 | 0.9638 |
Эксперимент 2 (random_state=10)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.25, random_state=10)
classifiers = [
(KNeighborsClassifier(n_neighbors=1, metric='euclidean'), 'KNN (k=1)'),
(KNeighborsClassifier(n_neighbors=3, metric='euclidean'), 'KNN (k=3)'),
(KNeighborsClassifier(n_neighbors=9, metric='euclidean'), 'KNN (k=9)'),
(GaussianNB(), 'Naive Bayes'),
(RandomForestClassifier(n_estimators=5, random_state=42), 'Random Forest (n=5)'),
(RandomForestClassifier(n_estimators=15, random_state=42), 'Random Forest (n=15)'),
(RandomForestClassifier(n_estimators=50, random_state=42), 'Random Forest (n=50)'),
]
results2 = []
for clf, name in classifiers:
results2.append(evaluate_classifier(clf, X_train2, y_train2, X_test2, y_test2, X, y, name))
df_results_2 = pd.DataFrame(results2).set_index('Метод')
df_results_2=== KNN (k=1) ===
,Предсказанные метки:
,[1 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 1
, 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0
, 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 1 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[135 5]
, [ 7 103]]
,
,Accuracy score: 0.952
,
,Classification report:
, precision recall f1-score support
,
, 0 0.95 0.96 0.96 140
, 1 0.95 0.94 0.94 110
,
, accuracy 0.95 250
, macro avg 0.95 0.95 0.95 250
,weighted avg 0.95 0.95 0.95 250
,
,AUC ROC: 0.9503246753246753
=== KNN (k=3) ===
,Предсказанные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 0 0 1 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[135 5]
, [ 4 106]]
,
,Accuracy score: 0.964
,
,Classification report:
, precision recall f1-score support
,
, 0 0.97 0.96 0.97 140
, 1 0.95 0.96 0.96 110
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.963961038961039
=== KNN (k=9) ===
,Предсказанные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[136 4]
, [ 3 107]]
,
,Accuracy score: 0.972
,
,Classification report:
, precision recall f1-score support
,
, 0 0.98 0.97 0.97 140
, 1 0.96 0.97 0.97 110
,
, accuracy 0.97 250
, macro avg 0.97 0.97 0.97 250
,weighted avg 0.97 0.97 0.97 250
,
,AUC ROC: 0.972077922077922
=== Naive Bayes ===
,Предсказанные метки:
,[0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 0 1 1 0 1 0 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 0 1 0 0 0 1 1 1 1
, 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 0
, 0 0 1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0
, 0 0 1 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 1 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 1 0 1 1 0 1 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[122 18]
, [ 20 90]]
,
,Accuracy score: 0.848
,
,Classification report:
, precision recall f1-score support
,
, 0 0.86 0.87 0.87 140
, 1 0.83 0.82 0.83 110
,
, accuracy 0.85 250
, macro avg 0.85 0.84 0.85 250
,weighted avg 0.85 0.85 0.85 250
,
,AUC ROC: 0.8448051948051949
=== Random Forest (n=5) ===
,Предсказанные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 0 0 1 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[135 5]
, [ 5 105]]
,
,Accuracy score: 0.96
,
,Classification report:
, precision recall f1-score support
,
, 0 0.96 0.96 0.96 140
, 1 0.95 0.95 0.95 110
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9594155844155845
=== Random Forest (n=15) ===
,Предсказанные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[134 6]
, [ 4 106]]
,
,Accuracy score: 0.96
,
,Classification report:
, precision recall f1-score support
,
, 0 0.97 0.96 0.96 140
, 1 0.95 0.96 0.95 110
,
, accuracy 0.96 250
, macro avg 0.96 0.96 0.96 250
,weighted avg 0.96 0.96 0.96 250
,
,AUC ROC: 0.9603896103896105
=== Random Forest (n=50) ===
,Предсказанные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,Истинные метки:
,[1 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 0 1 1 1
, 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1
, 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1
, 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1
, 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 0
, 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1
, 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0]
,
,Confusion matrix:
,[[136 4]
, [ 2 108]]
,
,Accuracy score: 0.976
,
,Classification report:
, precision recall f1-score support
,
, 0 0.99 0.97 0.98 140
, 1 0.96 0.98 0.97 110
,
, accuracy 0.98 250
, macro avg 0.97 0.98 0.98 250
,weighted avg 0.98 0.98 0.98 250
,
,AUC ROC: 0.9766233766233767
| Precision | Recall | F1 | Accuracy | AUC ROC | |
|---|---|---|---|---|---|
| Метод | |||||
| KNN (k=1) | 0.9520 | 0.952 | 0.9520 | 0.952 | 0.9503 |
| KNN (k=3) | 0.9641 | 0.964 | 0.9640 | 0.964 | 0.9640 |
| KNN (k=9) | 0.9721 | 0.972 | 0.9720 | 0.972 | 0.9721 |
| Naive Bayes | 0.8478 | 0.848 | 0.8478 | 0.848 | 0.8448 |
| Random Forest (n=5) | 0.9600 | 0.960 | 0.9600 | 0.960 | 0.9594 |
| Random Forest (n=15) | 0.9602 | 0.960 | 0.9600 | 0.960 | 0.9604 |
| Random Forest (n=50) | 0.9762 | 0.976 | 0.9760 | 0.976 | 0.9766 |
Эксперимент 3 (random_state=99)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.25, random_state=99)
results3 = []
for clf, name in classifiers:
results3.append(evaluate_classifier(clf, X_train3, y_train3, X_test3, y_test3, X, y, name))
df_results_3 = pd.DataFrame(results3).set_index('Метод')
df_results_3=== KNN (k=1) ===
,Предсказанные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1
, 1 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[118 10]
, [ 9 113]]
,
,Accuracy score: 0.924
,
,Classification report:
, precision recall f1-score support
,
, 0 0.93 0.92 0.93 128
, 1 0.92 0.93 0.92 122
,
, accuracy 0.92 250
, macro avg 0.92 0.92 0.92 250
,weighted avg 0.92 0.92 0.92 250
,
,AUC ROC: 0.9240522540983607
=== KNN (k=3) ===
,Предсказанные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1
, 1 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[120 8]
, [ 9 113]]
,
,Accuracy score: 0.932
,
,Classification report:
, precision recall f1-score support
,
, 0 0.93 0.94 0.93 128
, 1 0.93 0.93 0.93 122
,
, accuracy 0.93 250
, macro avg 0.93 0.93 0.93 250
,weighted avg 0.93 0.93 0.93 250
,
,AUC ROC: 0.9318647540983607
=== KNN (k=9) ===
,Предсказанные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1
, 1 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[120 8]
, [ 9 113]]
,
,Accuracy score: 0.932
,
,Classification report:
, precision recall f1-score support
,
, 0 0.93 0.94 0.93 128
, 1 0.93 0.93 0.93 122
,
, accuracy 0.93 250
, macro avg 0.93 0.93 0.93 250
,weighted avg 0.93 0.93 0.93 250
,
,AUC ROC: 0.9318647540983607
=== Naive Bayes ===
,Предсказанные метки:
,[0 1 0 0 1 1 0 1 1 0 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 1 0 0 1 1 0
, 0 1 0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1
, 0 0 0 1 1 1 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 0
, 1 1 0 0 0 1 0 0 1 0 1 0 1 0 1 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1
, 1 0 0 1 1 0 1 1 1 0 0 1 1 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[113 15]
, [ 22 100]]
,
,Accuracy score: 0.852
,
,Classification report:
, precision recall f1-score support
,
, 0 0.84 0.88 0.86 128
, 1 0.87 0.82 0.84 122
,
, accuracy 0.85 250
, macro avg 0.85 0.85 0.85 250
,weighted avg 0.85 0.85 0.85 250
,
,AUC ROC: 0.8512423155737705
=== Random Forest (n=5) ===
,Предсказанные метки:
,[0 1 0 0 1 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0
, 1 1 0 1 0 1 0 0 0 1 1 0 0 1 1 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0
, 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[118 10]
, [ 7 115]]
,
,Accuracy score: 0.932
,
,Classification report:
, precision recall f1-score support
,
, 0 0.94 0.92 0.93 128
, 1 0.92 0.94 0.93 122
,
, accuracy 0.93 250
, macro avg 0.93 0.93 0.93 250
,weighted avg 0.93 0.93 0.93 250
,
,AUC ROC: 0.9322489754098361
=== Random Forest (n=15) ===
,Предсказанные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[122 6]
, [ 8 114]]
,
,Accuracy score: 0.944
,
,Classification report:
, precision recall f1-score support
,
, 0 0.94 0.95 0.95 128
, 1 0.95 0.93 0.94 122
,
, accuracy 0.94 250
, macro avg 0.94 0.94 0.94 250
,weighted avg 0.94 0.94 0.94 250
,
,AUC ROC: 0.9437756147540983
=== Random Forest (n=50) ===
,Предсказанные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0]
,Истинные метки:
,[0 1 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0
, 1 1 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1
, 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0
, 1 1 1 0 0 1 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0
, 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0
, 1 0 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 1
, 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0]
,
,Confusion matrix:
,[[122 6]
, [ 8 114]]
,
,Accuracy score: 0.944
,
,Classification report:
, precision recall f1-score support
,
, 0 0.94 0.95 0.95 128
, 1 0.95 0.93 0.94 122
,
, accuracy 0.94 250
, macro avg 0.94 0.94 0.94 250
,weighted avg 0.94 0.94 0.94 250
,
,AUC ROC: 0.9437756147540983
| Precision | Recall | F1 | Accuracy | AUC ROC | |
|---|---|---|---|---|---|
| Метод | |||||
| KNN (k=1) | 0.9240 | 0.924 | 0.9240 | 0.924 | 0.9241 |
| KNN (k=3) | 0.9320 | 0.932 | 0.9320 | 0.932 | 0.9319 |
| KNN (k=9) | 0.9320 | 0.932 | 0.9320 | 0.932 | 0.9319 |
| Naive Bayes | 0.8529 | 0.852 | 0.8518 | 0.852 | 0.8512 |
| Random Forest (n=5) | 0.9323 | 0.932 | 0.9320 | 0.932 | 0.9322 |
| Random Forest (n=15) | 0.9441 | 0.944 | 0.9440 | 0.944 | 0.9438 |
| Random Forest (n=50) | 0.9441 | 0.944 | 0.9440 | 0.944 | 0.9438 |
Итоговая таблица (среднее по трём экспериментам)
methods = df_results_1.index.tolist()
metrics = ['Precision', 'Recall', 'F1', 'Accuracy', 'AUC ROC']
rows = []
for method in methods:
for metric in metrics:
v1 = df_results_1.loc[method, metric]
v2 = df_results_2.loc[method, metric]
v3 = df_results_3.loc[method, metric]
avg = (v1 + v2 + v3) / 3
rows.append({
'Метод': method,
'Метрика': metric,
'Эксп. 1 (rs=42)': round(v1, 4),
'Эксп. 2 (rs=10)': round(v2, 4),
'Эксп. 3 (rs=99)': round(v3, 4),
'Среднее': round(avg, 4)
})
df_final = pd.DataFrame(rows)
df_final.set_index(['Метод', 'Метрика'], inplace=True)
df_final| Эксп. 1 (rs=42) | Эксп. 2 (rs=10) | Эксп. 3 (rs=99) | Среднее | ||
|---|---|---|---|---|---|
| Метод | Метрика | ||||
| KNN (k=1) | Precision | 0.9640 | 0.9520 | 0.9240 | 0.9467 |
| Recall | 0.9640 | 0.9520 | 0.9240 | 0.9467 | |
| F1 | 0.9640 | 0.9520 | 0.9240 | 0.9467 | |
| Accuracy | 0.9640 | 0.9520 | 0.9240 | 0.9467 | |
| AUC ROC | 0.9640 | 0.9503 | 0.9241 | 0.9461 | |
| KNN (k=3) | Precision | 0.9601 | 0.9641 | 0.9320 | 0.9521 |
| Recall | 0.9600 | 0.9640 | 0.9320 | 0.9520 | |
| F1 | 0.9600 | 0.9640 | 0.9320 | 0.9520 | |
| Accuracy | 0.9600 | 0.9640 | 0.9320 | 0.9520 | |
| AUC ROC | 0.9599 | 0.9640 | 0.9319 | 0.9519 | |
| KNN (k=9) | Precision | 0.9563 | 0.9721 | 0.9320 | 0.9535 |
| Recall | 0.9560 | 0.9720 | 0.9320 | 0.9533 | |
| F1 | 0.9560 | 0.9720 | 0.9320 | 0.9533 | |
| Accuracy | 0.9560 | 0.9720 | 0.9320 | 0.9533 | |
| AUC ROC | 0.9559 | 0.9721 | 0.9319 | 0.9533 | |
| Naive Bayes | Precision | 0.8522 | 0.8478 | 0.8529 | 0.8510 |
| Recall | 0.8520 | 0.8480 | 0.8520 | 0.8507 | |
| F1 | 0.8520 | 0.8478 | 0.8518 | 0.8505 | |
| Accuracy | 0.8520 | 0.8480 | 0.8520 | 0.8507 | |
| AUC ROC | 0.8519 | 0.8448 | 0.8512 | 0.8493 | |
| Random Forest (n=5) | Precision | 0.9458 | 0.9600 | 0.9323 | 0.9460 |
| Recall | 0.9440 | 0.9600 | 0.9320 | 0.9453 | |
| F1 | 0.9439 | 0.9600 | 0.9320 | 0.9453 | |
| Accuracy | 0.9440 | 0.9600 | 0.9320 | 0.9453 | |
| AUC ROC | 0.9437 | 0.9594 | 0.9322 | 0.9451 | |
| Random Forest (n=15) | Precision | 0.9647 | 0.9602 | 0.9441 | 0.9563 |
| Recall | 0.9640 | 0.9600 | 0.9440 | 0.9560 | |
| F1 | 0.9640 | 0.9600 | 0.9440 | 0.9560 | |
| Accuracy | 0.9640 | 0.9600 | 0.9440 | 0.9560 | |
| AUC ROC | 0.9638 | 0.9604 | 0.9438 | 0.9560 | |
| Random Forest (n=50) | Precision | 0.9647 | 0.9762 | 0.9441 | 0.9617 |
| Recall | 0.9640 | 0.9760 | 0.9440 | 0.9613 | |
| F1 | 0.9640 | 0.9760 | 0.9440 | 0.9613 | |
| Accuracy | 0.9640 | 0.9760 | 0.9440 | 0.9613 | |
| AUC ROC | 0.9638 | 0.9766 | 0.9438 | 0.9614 |
Выводы
По результатам трёх экспериментов с различными разбиениями выборки (random_state = 42, 10, 99) можно сделать следующие выводы:
Random Forest (n=50) показал лучшие результаты среди всех методов: средняя аккуратность — 0.9613, AUC ROC — 0.9614. Увеличение числа деревьев стабильно повышало качество: 0.9453 (n=5) → 0.9560 (n=15) → 0.9613 (n=50), что говорит о преимуществе ансамблевого подхода на данных с нелинейной границей классов.
KNN показал хорошие результаты: KNN (k=9) — Accuracy 0.9533, KNN (k=3) — 0.9520, KNN (k=1) — 0.9467. Метод хорошо справляется с нелинейной формой классов
make_moons, поскольку строит локальную границу решений на основе ближайших соседей.Наивный байесовский классификатор показал худшие результаты среди всех методов: Accuracy = 0.8507, AUC ROC = 0.8493. Это объясняется тем, что
make_moonsгенерирует два «полумесяца» с нелинейной границей, а GaussianNB предполагает нормальное распределение признаков и строит линейную границу, что не соответствует структуре данных.В отличие от данных
make_blobs(вариант 3), где Naive Bayes был лучшим методом, на данныхmake_moonsон оказался худшим. Это подтверждает, что выбор метода классификации должен учитывать характер данных: для линейно разделимых кластеров подходит Naive Bayes, для нелинейных границ — Random Forest и KNN.Все методы, кроме Naive Bayes, показали аккуратность выше 0.93, что свидетельствует о невысоком уровне шума (
noise=0.2) и хорошей различимости классов.
Наиболее подходящий метод для классификации данных варианта 8 — случайный лес (Random Forest, n_estimators=50), показавший лучшие значения по всем метрикам и наибольшую стабильность между экспериментами.






















