diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..42bccd7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+invisible*
+.venv*
+.~lock*
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..92b96bb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+# Интеллектуальные информационные системы
+
+## Лекции
+
+| Дата |Лекция |
+|:----------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 05.09.2024 | [Вводная лекция](lectures/lec1.odp) |
+| 12.09.2024 | [Изолирование окружения. Docker](lectures/lec2-Docker.odp) |
+| 19.09.2024 | [Разведочный анализ данных](lectures/lec3-eda) |
+
+## Лабораторные работы
diff --git a/assets/docker/Dockerfile b/assets/docker/Dockerfile
new file mode 100644
index 0000000..e079d9e
--- /dev/null
+++ b/assets/docker/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.11-slim
+
+COPY . /my_app
+
+WORKDIR /my_app
+
+RUN pip install tqdm
+
+ENTRYPOINT [ "python", "main.py" ]
\ No newline at end of file
diff --git a/assets/docker/main.py b/assets/docker/main.py
new file mode 100644
index 0000000..92ea01b
--- /dev/null
+++ b/assets/docker/main.py
@@ -0,0 +1,6 @@
+import sys
+
+def main(a = 3, b = 5):
+ print(f"multiply {a} by {b} is {a * b}")
+
+main(int(sys.argv[1]), int(sys.argv[2]))
\ No newline at end of file
diff --git a/assets/eda/eda.ipynb b/assets/eda/eda.ipynb
new file mode 100644
index 0000000..726c7f5
--- /dev/null
+++ b/assets/eda/eda.ipynb
@@ -0,0 +1,449 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "e312113e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "import pandas as pd\n",
+ "import matplotlib as plt\n",
+ "import seaborn as sns\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de2c028d",
+ "metadata": {},
+ "source": [
+ "# Загрузка и знакомство с данными"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "5cd00195",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# dataset https://www.kaggle.com/datasets/mrdaniilak/russia-real-estate-20182021/data \n",
+ "\n",
+ "df = pd.read_csv('data/all_v2.csv')\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "05b57100",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " price | \n",
+ " date | \n",
+ " time | \n",
+ " geo_lat | \n",
+ " geo_lon | \n",
+ " region | \n",
+ " building_type | \n",
+ " level | \n",
+ " levels | \n",
+ " rooms | \n",
+ " area | \n",
+ " kitchen_area | \n",
+ " object_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 6050000 | \n",
+ " 2018-02-19 | \n",
+ " 20:00:21 | \n",
+ " 59.805808 | \n",
+ " 30.376141 | \n",
+ " 2661 | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " 10 | \n",
+ " 3 | \n",
+ " 82.6 | \n",
+ " 10.8 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8650000 | \n",
+ " 2018-02-27 | \n",
+ " 12:04:54 | \n",
+ " 55.683807 | \n",
+ " 37.297405 | \n",
+ " 81 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 24 | \n",
+ " 2 | \n",
+ " 69.1 | \n",
+ " 12.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4000000 | \n",
+ " 2018-02-28 | \n",
+ " 15:44:00 | \n",
+ " 56.295250 | \n",
+ " 44.061637 | \n",
+ " 2871 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 9 | \n",
+ " 3 | \n",
+ " 66.0 | \n",
+ " 10.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1850000 | \n",
+ " 2018-03-01 | \n",
+ " 11:24:52 | \n",
+ " 44.996132 | \n",
+ " 39.074783 | \n",
+ " 2843 | \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 16 | \n",
+ " 2 | \n",
+ " 38.0 | \n",
+ " 5.0 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5450000 | \n",
+ " 2018-03-01 | \n",
+ " 17:42:43 | \n",
+ " 55.918767 | \n",
+ " 37.984642 | \n",
+ " 81 | \n",
+ " 3 | \n",
+ " 13 | \n",
+ " 14 | \n",
+ " 2 | \n",
+ " 60.0 | \n",
+ " 10.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 3300000 | \n",
+ " 2018-03-02 | \n",
+ " 21:18:42 | \n",
+ " 55.908253 | \n",
+ " 37.726448 | \n",
+ " 81 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 32.0 | \n",
+ " 6.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 4704280 | \n",
+ " 2018-03-04 | \n",
+ " 12:35:25 | \n",
+ " 55.621097 | \n",
+ " 37.431002 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 31.7 | \n",
+ " 6.0 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 3600000 | \n",
+ " 2018-03-04 | \n",
+ " 20:52:38 | \n",
+ " 59.875526 | \n",
+ " 30.395457 | \n",
+ " 2661 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 31.1 | \n",
+ " 6.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 3390000 | \n",
+ " 2018-03-05 | \n",
+ " 07:07:05 | \n",
+ " 53.195031 | \n",
+ " 50.106952 | \n",
+ " 3106 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 24 | \n",
+ " 2 | \n",
+ " 64.0 | \n",
+ " 13.0 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 2800000 | \n",
+ " 2018-03-06 | \n",
+ " 09:57:10 | \n",
+ " 55.736972 | \n",
+ " 38.846457 | \n",
+ " 81 | \n",
+ " 1 | \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 2 | \n",
+ " 55.0 | \n",
+ " 8.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " price date time geo_lat geo_lon region building_type \\\n",
+ "0 6050000 2018-02-19 20:00:21 59.805808 30.376141 2661 1 \n",
+ "1 8650000 2018-02-27 12:04:54 55.683807 37.297405 81 3 \n",
+ "2 4000000 2018-02-28 15:44:00 56.295250 44.061637 2871 1 \n",
+ "3 1850000 2018-03-01 11:24:52 44.996132 39.074783 2843 4 \n",
+ "4 5450000 2018-03-01 17:42:43 55.918767 37.984642 81 3 \n",
+ "5 3300000 2018-03-02 21:18:42 55.908253 37.726448 81 1 \n",
+ "6 4704280 2018-03-04 12:35:25 55.621097 37.431002 3 2 \n",
+ "7 3600000 2018-03-04 20:52:38 59.875526 30.395457 2661 1 \n",
+ "8 3390000 2018-03-05 07:07:05 53.195031 50.106952 3106 2 \n",
+ "9 2800000 2018-03-06 09:57:10 55.736972 38.846457 81 1 \n",
+ "\n",
+ " level levels rooms area kitchen_area object_type \n",
+ "0 8 10 3 82.6 10.8 1 \n",
+ "1 5 24 2 69.1 12.0 1 \n",
+ "2 5 9 3 66.0 10.0 1 \n",
+ "3 12 16 2 38.0 5.0 11 \n",
+ "4 13 14 2 60.0 10.0 1 \n",
+ "5 4 5 1 32.0 6.0 1 \n",
+ "6 1 25 1 31.7 6.0 11 \n",
+ "7 2 5 1 31.1 6.0 1 \n",
+ "8 4 24 2 64.0 13.0 11 \n",
+ "9 9 10 2 55.0 8.0 1 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c892b3e",
+ "metadata": {},
+ "source": [
+ "# Очистка данных"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a3d3ad69",
+ "metadata": {},
+ "source": [
+ "# Анализ признаков для модели\n",
+ "\n",
+ "https://seaborn.pydata.org/examples/index.html - галерея примеров"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "78845b8b",
+ "metadata": {},
+ "source": [
+ "## histplot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9318a819",
+ "metadata": {},
+ "source": [
+ "## heatmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f4ab2ef2",
+ "metadata": {},
+ "source": [
+ "# Групповые операции"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "11e4da4e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def flat_index(df_stats): \n",
+ " df_stats.columns = df_stats.columns.get_level_values(0) + '_' + df_stats.columns.get_level_values(1) \n",
+ " df_stats.columns = df_stats.columns.to_flat_index() \n",
+ " df_stats.reset_index(inplace=True) \n",
+ " return df_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0fbb62de",
+ "metadata": {},
+ "source": [
+ "## lineplot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8bc652d",
+ "metadata": {},
+ "source": [
+ "## subplots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7fab88d2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, axs = plt.pyplot.subplots(2,2)\n",
+ "fig.tight_layout(pad=1)\n",
+ "fig.set_size_inches(16.5, 14, forward=True)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ba7d6b7c",
+ "metadata": {},
+ "source": [
+ "## displot"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eca41c1e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for col in categorial_cols:\n",
+ " print(f'Unique categories in {col}: {df[col].nunique()}')dd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef2501d0",
+ "metadata": {},
+ "source": [
+ "## histplot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d64d58a",
+ "metadata": {},
+ "source": [
+ "# Bokeh\n",
+ "https://bokeh.org/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fddb38a2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from bokeh.plotting import figure, show\n",
+ "from bokeh.models import ColumnDataSource, HoverTool, Legend\n",
+ "from bokeh.io import output_notebook \n",
+ "output_notebook()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3a7cbaaa",
+ "metadata": {},
+ "source": [
+ "# Выводы после EDA"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "695334e5",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv_sprint02",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/assets/eda/requirements.txt b/assets/eda/requirements.txt
new file mode 100644
index 0000000..df2aea8
--- /dev/null
+++ b/assets/eda/requirements.txt
@@ -0,0 +1,4 @@
+pandas
+bokeh
+matplotlib
+seaborn
\ No newline at end of file
diff --git a/assets/mlflow/requirements b/assets/mlflow/requirements
new file mode 100644
index 0000000..acede8f
--- /dev/null
+++ b/assets/mlflow/requirements
@@ -0,0 +1 @@
+mlflow==2.7.1
diff --git a/assets/virtual_env/proj1/req.txt b/assets/virtual_env/proj1/req.txt
new file mode 100644
index 0000000..b5dcdea
--- /dev/null
+++ b/assets/virtual_env/proj1/req.txt
@@ -0,0 +1,2 @@
+tdqm==0.0.1
+tqdm==4.66.5
diff --git a/assets/virtual_env/proj1/requirements.txt b/assets/virtual_env/proj1/requirements.txt
new file mode 100644
index 0000000..78620c4
--- /dev/null
+++ b/assets/virtual_env/proj1/requirements.txt
@@ -0,0 +1 @@
+tqdm
diff --git a/lectures/lec1.odp b/lectures/lec1.odp
new file mode 100644
index 0000000..0e757fc
Binary files /dev/null and b/lectures/lec1.odp differ
diff --git a/lectures/lec2-docker.odp b/lectures/lec2-docker.odp
new file mode 100644
index 0000000..34ced31
Binary files /dev/null and b/lectures/lec2-docker.odp differ
diff --git a/lectures/lec3-eda.odp b/lectures/lec3-eda.odp
new file mode 100644
index 0000000..05ae0fc
Binary files /dev/null and b/lectures/lec3-eda.odp differ