{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "2b6fb168", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "\n", "from sklearn import metrics\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, StratifiedKFold\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "from sklearn.manifold import TSNE\n", "from sklearn.manifold import MDS\n", "\n", "\n", "from sklearn import preprocessing" ] }, { "cell_type": "code", "execution_count": 2, "id": "75f28a82", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('titanic.csv')" ] }, { "cell_type": "markdown", "id": "34567fe6", "metadata": {}, "source": [ "# Знакомство с датасетом" ] }, { "cell_type": "code", "execution_count": 3, "id": "26b735e0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | PassengerId | \n", "Survived | \n", "Pclass | \n", "Name | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Ticket | \n", "Fare | \n", "Cabin | \n", "Embarked | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0 | \n", "3 | \n", "Braund, Mr. Owen Harris | \n", "male | \n", "22.0 | \n", "1 | \n", "0 | \n", "A/5 21171 | \n", "7.2500 | \n", "NaN | \n", "S | \n", "
1 | \n", "2 | \n", "1 | \n", "1 | \n", "Cumings, Mrs. John Bradley (Florence Briggs Th... | \n", "female | \n", "38.0 | \n", "1 | \n", "0 | \n", "PC 17599 | \n", "71.2833 | \n", "C85 | \n", "C | \n", "
2 | \n", "3 | \n", "1 | \n", "3 | \n", "Heikkinen, Miss. Laina | \n", "female | \n", "26.0 | \n", "0 | \n", "0 | \n", "STON/O2. 3101282 | \n", "7.9250 | \n", "NaN | \n", "S | \n", "
3 | \n", "4 | \n", "1 | \n", "1 | \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n", "female | \n", "35.0 | \n", "1 | \n", "0 | \n", "113803 | \n", "53.1000 | \n", "C123 | \n", "S | \n", "
4 | \n", "5 | \n", "0 | \n", "3 | \n", "Allen, Mr. William Henry | \n", "male | \n", "35.0 | \n", "0 | \n", "0 | \n", "373450 | \n", "8.0500 | \n", "NaN | \n", "S | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
886 | \n", "887 | \n", "0 | \n", "2 | \n", "Montvila, Rev. Juozas | \n", "male | \n", "27.0 | \n", "0 | \n", "0 | \n", "211536 | \n", "13.0000 | \n", "NaN | \n", "S | \n", "
887 | \n", "888 | \n", "1 | \n", "1 | \n", "Graham, Miss. Margaret Edith | \n", "female | \n", "19.0 | \n", "0 | \n", "0 | \n", "112053 | \n", "30.0000 | \n", "B42 | \n", "S | \n", "
888 | \n", "889 | \n", "0 | \n", "3 | \n", "Johnston, Miss. Catherine Helen \"Carrie\" | \n", "female | \n", "NaN | \n", "1 | \n", "2 | \n", "W./C. 6607 | \n", "23.4500 | \n", "NaN | \n", "S | \n", "
889 | \n", "890 | \n", "1 | \n", "1 | \n", "Behr, Mr. Karl Howell | \n", "male | \n", "26.0 | \n", "0 | \n", "0 | \n", "111369 | \n", "30.0000 | \n", "C148 | \n", "C | \n", "
890 | \n", "891 | \n", "0 | \n", "3 | \n", "Dooley, Mr. Patrick | \n", "male | \n", "32.0 | \n", "0 | \n", "0 | \n", "370376 | \n", "7.7500 | \n", "NaN | \n", "Q | \n", "
891 rows × 12 columns
\n", "\n", " | PassengerId | \n", "Survived | \n", "Pclass | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Fare | \n", "
---|---|---|---|---|---|---|---|
count | \n", "891.000000 | \n", "891.000000 | \n", "891.000000 | \n", "714.000000 | \n", "891.000000 | \n", "891.000000 | \n", "891.000000 | \n", "
mean | \n", "446.000000 | \n", "0.383838 | \n", "2.308642 | \n", "29.699118 | \n", "0.523008 | \n", "0.381594 | \n", "32.204208 | \n", "
std | \n", "257.353842 | \n", "0.486592 | \n", "0.836071 | \n", "14.526497 | \n", "1.102743 | \n", "0.806057 | \n", "49.693429 | \n", "
min | \n", "1.000000 | \n", "0.000000 | \n", "1.000000 | \n", "0.420000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
25% | \n", "223.500000 | \n", "0.000000 | \n", "2.000000 | \n", "20.125000 | \n", "0.000000 | \n", "0.000000 | \n", "7.910400 | \n", "
50% | \n", "446.000000 | \n", "0.000000 | \n", "3.000000 | \n", "28.000000 | \n", "0.000000 | \n", "0.000000 | \n", "14.454200 | \n", "
75% | \n", "668.500000 | \n", "1.000000 | \n", "3.000000 | \n", "38.000000 | \n", "1.000000 | \n", "0.000000 | \n", "31.000000 | \n", "
max | \n", "891.000000 | \n", "1.000000 | \n", "3.000000 | \n", "80.000000 | \n", "8.000000 | \n", "6.000000 | \n", "512.329200 | \n", "
\n", " | Survived | \n", "Pclass | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Fare | \n", "Embarked | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "3 | \n", "True | \n", "22.0 | \n", "1 | \n", "0 | \n", "7.2500 | \n", "2 | \n", "
1 | \n", "1 | \n", "1 | \n", "False | \n", "38.0 | \n", "1 | \n", "0 | \n", "71.2833 | \n", "0 | \n", "
2 | \n", "1 | \n", "3 | \n", "False | \n", "26.0 | \n", "0 | \n", "0 | \n", "7.9250 | \n", "2 | \n", "
3 | \n", "1 | \n", "1 | \n", "False | \n", "35.0 | \n", "1 | \n", "0 | \n", "53.1000 | \n", "2 | \n", "
4 | \n", "0 | \n", "3 | \n", "True | \n", "35.0 | \n", "0 | \n", "0 | \n", "8.0500 | \n", "2 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
886 | \n", "0 | \n", "2 | \n", "True | \n", "27.0 | \n", "0 | \n", "0 | \n", "13.0000 | \n", "2 | \n", "
887 | \n", "1 | \n", "1 | \n", "False | \n", "19.0 | \n", "0 | \n", "0 | \n", "30.0000 | \n", "2 | \n", "
888 | \n", "0 | \n", "3 | \n", "False | \n", "NaN | \n", "1 | \n", "2 | \n", "23.4500 | \n", "2 | \n", "
889 | \n", "1 | \n", "1 | \n", "True | \n", "26.0 | \n", "0 | \n", "0 | \n", "30.0000 | \n", "0 | \n", "
890 | \n", "0 | \n", "3 | \n", "True | \n", "32.0 | \n", "0 | \n", "0 | \n", "7.7500 | \n", "1 | \n", "
891 rows × 8 columns
\n", "