Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

4638 строки
230 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"id": "662d04e7-1b0b-4e4a-9ddf-4526d7fef119",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "bf80fbc5-b660-4fac-8fbb-a5cae77313b3",
"metadata": {},
"source": [
"# === ЭТАП 1 ==="
]
},
{
"cell_type": "markdown",
"id": "5263a8b3-fe99-4204-8a2e-105182792c11",
"metadata": {},
"source": [
"# Загрузка первичных данных"
]
},
{
"cell_type": "markdown",
"id": "1b54a6a5-1656-4e3c-99d1-49dc39451d33",
"metadata": {},
"source": [
"Загружаем первичные данные из файлов:\n",
"- tracks.parquet\n",
"- catalog_names.parquet\n",
"- interactions.parquet"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "5d4b8961-3f35-4e58-9d6b-3e2dbd2c4224",
"metadata": {},
"outputs": [],
"source": [
"tracks = pd.read_parquet('../data/tracks.parquet')\n",
"interactions = pd.read_parquet('../data/interactions.parquet')\n",
"catalog = pd.read_parquet('../data/catalog_names.parquet')"
]
},
{
"cell_type": "markdown",
"id": "e8f2a1f7-a05f-4f39-af90-5f4018aa6f9d",
"metadata": {},
"source": [
"# Обзор данных"
]
},
{
"cell_type": "markdown",
"id": "46a85307-896c-4fac-9fcf-f0dffa90889e",
"metadata": {},
"source": [
"Проверяем данные, есть ли с ними явные проблемы."
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "c9f8f17e-9b56-4f5a-a463-f694a993effb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>albums</th>\n",
" <th>artists</th>\n",
" <th>genres</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>26</td>\n",
" <td>[3, 2490753]</td>\n",
" <td>[16]</td>\n",
" <td>[11, 21]</td>\n",
" <td>Complimentary Me</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>38</td>\n",
" <td>[3, 2490753]</td>\n",
" <td>[16]</td>\n",
" <td>[11, 21]</td>\n",
" <td>Momma's Boy</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>135</td>\n",
" <td>[12, 214, 2490809]</td>\n",
" <td>[84]</td>\n",
" <td>[11]</td>\n",
" <td>Atticus</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>136</td>\n",
" <td>[12, 214, 2490809]</td>\n",
" <td>[84]</td>\n",
" <td>[11]</td>\n",
" <td>24 Hours</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>138</td>\n",
" <td>[12, 214, 322, 72275, 72292, 91199, 213505, 24...</td>\n",
" <td>[84]</td>\n",
" <td>[11]</td>\n",
" <td>Don't Upset The Rhythm (Go Baby Go)</td>\n",
" <td>17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" track_id albums artists \\\n",
"0 26 [3, 2490753] [16] \n",
"1 38 [3, 2490753] [16] \n",
"2 135 [12, 214, 2490809] [84] \n",
"3 136 [12, 214, 2490809] [84] \n",
"4 138 [12, 214, 322, 72275, 72292, 91199, 213505, 24... [84] \n",
"\n",
" genres name count \n",
"0 [11, 21] Complimentary Me 5 \n",
"1 [11, 21] Momma's Boy 8 \n",
"2 [11] Atticus 16 \n",
"3 [11] 24 Hours 7 \n",
"4 [11] Don't Upset The Rhythm (Go Baby Go) 17 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tracks.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0d9d8b5d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 932664 entries, 0 to 932663\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 track_id 932664 non-null int64 \n",
" 1 albums 932664 non-null object\n",
" 2 artists 932664 non-null object\n",
" 3 genres 932664 non-null object\n",
" 4 name 932664 non-null object\n",
" 5 count 932664 non-null int64 \n",
"dtypes: int64(2), object(4)\n",
"memory usage: 42.7+ MB\n"
]
}
],
"source": [
"tracks.info()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "092a1f8b",
"metadata": {},
"outputs": [],
"source": [
"tracks['empty_genre'] = tracks['genres'].apply(lambda data: len(data) == 0)\n",
"tracks['empty_album'] = tracks['albums'].apply(lambda data: len(data) == 0)\n",
"tracks['empty_artist'] = tracks['artists'].apply(lambda data: len(data) == 0)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7bd05c0f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"932664"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tracks = tracks.loc[~tracks['empty_artist']]\n",
"tracks = tracks.loc[~tracks['empty_album']]\n",
"tracks = tracks.loc[~tracks['empty_genre']]\n",
"len(tracks)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "6e455a1f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>album</td>\n",
" <td>Taller Children</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>album</td>\n",
" <td>Wild Young Hearts</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13</td>\n",
" <td>album</td>\n",
" <td>Lonesome Crow</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17</td>\n",
" <td>album</td>\n",
" <td>Graffiti Soul</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>26</td>\n",
" <td>album</td>\n",
" <td>Blues Six Pack</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type name\n",
"0 3 album Taller Children\n",
"1 12 album Wild Young Hearts\n",
"2 13 album Lonesome Crow\n",
"3 17 album Graffiti Soul\n",
"4 26 album Blues Six Pack"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog.head()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "97a67071",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['album', 'artist', 'genre', 'track'], dtype=object)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog.type.unique()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5ceb5505",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>819116</th>\n",
" <td>38012</td>\n",
" <td>track</td>\n",
" <td>The Riddle</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type name\n",
"819116 38012 track The Riddle"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog.loc[(catalog.type == 'track') & (catalog.id == 38012 )]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "61fc07be",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"166\n",
"658724\n",
"153581\n"
]
}
],
"source": [
"genre_list = catalog.loc[catalog.type=='genre', 'id'].values\n",
"print(len(genre_list))\n",
"album_list = catalog.loc[catalog.type=='album', 'id'].values\n",
"print(len(album_list))\n",
"artist_list = catalog.loc[catalog.type=='artist', 'id'].values\n",
"print(len(artist_list))"
]
},
{
"cell_type": "markdown",
"id": "4b96df54",
"metadata": {},
"source": [
"Проверяем, есть ли неизвестные данные в столбцах (которых нет в каталоге)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "0c302f8d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>albums</th>\n",
" <th>artists</th>\n",
" <th>genres</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" <th>empty_genre</th>\n",
" <th>empty_album</th>\n",
" <th>empty_artist</th>\n",
" <th>unknown_genre</th>\n",
" <th>unknown_artist</th>\n",
" <th>unknown_album</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [track_id, albums, artists, genres, name, count, empty_genre, empty_album, empty_artist, unknown_genre, unknown_artist, unknown_album]\n",
"Index: []"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tracks['unknown_genre'] = tracks['genres'].apply(lambda alb: sum([1 if a in genre_list else 0 for a in alb]) / len(alb))\n",
"tracks.query('unknown_genre < 1')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "efd2e2eb-3bec-4ce1-87ac-232bab8bc0d0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>albums</th>\n",
" <th>artists</th>\n",
" <th>genres</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" <th>empty_genre</th>\n",
" <th>empty_album</th>\n",
" <th>empty_artist</th>\n",
" <th>unknown_genre</th>\n",
" <th>unknown_artist</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [track_id, albums, artists, genres, name, count, empty_genre, empty_album, empty_artist, unknown_genre, unknown_artist]\n",
"Index: []"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tracks['unknown_artist'] = tracks['artists'].apply(lambda alb: sum([1 if a in artist_list else 0 for a in alb]) / len(alb))\n",
"tracks.query('unknown_artist < 1')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f462a430",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>albums</th>\n",
" <th>artists</th>\n",
" <th>genres</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" <th>empty_genre</th>\n",
" <th>empty_album</th>\n",
" <th>empty_artist</th>\n",
" <th>unknown_genre</th>\n",
" <th>unknown_artist</th>\n",
" <th>unknown_album</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [track_id, albums, artists, genres, name, count, empty_genre, empty_album, empty_artist, unknown_genre, unknown_artist, unknown_album]\n",
"Index: []"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 10 minutes\n",
"tracks['unknown_album'] = tracks['albums'].apply(lambda alb: sum([1 if a in album_list else 0 for a in alb]) / len(alb))\n",
"tracks.query('unknown_album < 1')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "2e0fa8d5",
"metadata": {},
"outputs": [],
"source": [
"tracks = tracks.loc[tracks['unknown_genre'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "5f6d9ced",
"metadata": {},
"outputs": [],
"source": [
"track_list = tracks.track_id.unique()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "2a64e8e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 214714415 entries, 0 to 291\n",
"Data columns (total 4 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 user_id int32 \n",
" 1 track_id int32 \n",
" 2 track_seq int16 \n",
" 3 started_at datetime64[ns]\n",
"dtypes: datetime64[ns](1), int16(1), int32(2)\n",
"memory usage: 5.2 GB\n"
]
},
{
"data": {
"text/plain": [
"user_id 1368700\n",
"track_id 932664\n",
"track_seq 16636\n",
"started_at 365\n",
"dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interactions.info()\n",
"interactions.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "5ea6c6df",
"metadata": {},
"outputs": [],
"source": [
"interactions = interactions.loc[interactions['track_id'].isin(track_list)]\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "55b0cb35",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 214714415 entries, 0 to 291\n",
"Data columns (total 4 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 user_id int32 \n",
" 1 track_id int32 \n",
" 2 track_seq int16 \n",
" 3 started_at datetime64[ns]\n",
"dtypes: datetime64[ns](1), int16(1), int32(2)\n",
"memory usage: 5.2 GB\n"
]
},
{
"data": {
"text/plain": [
"user_id 1368700\n",
"track_id 932664\n",
"track_seq 16636\n",
"started_at 365\n",
"dtype: int64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interactions.info()\n",
"interactions.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "267e9d6d",
"metadata": {},
"outputs": [],
"source": [
"def flat_index(df_stats): \n",
" df_stats.columns = df_stats.columns.get_level_values(0) + '_' + df_stats.columns.get_level_values(1) \n",
" df_stats.columns.to_flat_index() \n",
" df_stats.columns = df_stats.columns.to_flat_index() \n",
" df_stats.reset_index(inplace=True) \n",
" return df_stats"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "064423eb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['user_id', 'track_id', 'track_seq']"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numeric_cols = interactions.select_dtypes(include=['int16','int32']).columns.tolist()\n",
"numeric_cols"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7b22d39e",
"metadata": {},
"outputs": [],
"source": [
"users_grouped = interactions[numeric_cols].groupby('user_id').agg(['count', 'sum', 'max'])\n",
"users_grouped = flat_index(users_grouped)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "28240cf5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>track_id_count</th>\n",
" <th>track_id_sum</th>\n",
" <th>track_id_max</th>\n",
" <th>track_seq_count</th>\n",
" <th>track_seq_sum</th>\n",
" <th>track_seq_max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>26</td>\n",
" <td>222329191</td>\n",
" <td>20497621</td>\n",
" <td>26</td>\n",
" <td>351</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>36</td>\n",
" <td>1433967998</td>\n",
" <td>83436771</td>\n",
" <td>36</td>\n",
" <td>666</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>13</td>\n",
" <td>382479927</td>\n",
" <td>71650200</td>\n",
" <td>13</td>\n",
" <td>103</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>33</td>\n",
" <td>1437092275</td>\n",
" <td>78194999</td>\n",
" <td>33</td>\n",
" <td>561</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>245</td>\n",
" <td>8570567142</td>\n",
" <td>98766295</td>\n",
" <td>245</td>\n",
" <td>31738</td>\n",
" <td>256</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1368695</th>\n",
" <td>1374578</td>\n",
" <td>11</td>\n",
" <td>360121964</td>\n",
" <td>35785893</td>\n",
" <td>11</td>\n",
" <td>66</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1368696</th>\n",
" <td>1374579</td>\n",
" <td>23</td>\n",
" <td>178064881</td>\n",
" <td>41899516</td>\n",
" <td>23</td>\n",
" <td>289</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1368697</th>\n",
" <td>1374580</td>\n",
" <td>274</td>\n",
" <td>10513244081</td>\n",
" <td>96618101</td>\n",
" <td>274</td>\n",
" <td>37911</td>\n",
" <td>277</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1368698</th>\n",
" <td>1374581</td>\n",
" <td>503</td>\n",
" <td>21329665736</td>\n",
" <td>99242432</td>\n",
" <td>503</td>\n",
" <td>126756</td>\n",
" <td>503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1368699</th>\n",
" <td>1374582</td>\n",
" <td>290</td>\n",
" <td>17626701721</td>\n",
" <td>100736375</td>\n",
" <td>290</td>\n",
" <td>42717</td>\n",
" <td>292</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1368700 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" user_id track_id_count track_id_sum track_id_max track_seq_count \\\n",
"0 0 26 222329191 20497621 26 \n",
"1 1 36 1433967998 83436771 36 \n",
"2 2 13 382479927 71650200 13 \n",
"3 3 33 1437092275 78194999 33 \n",
"4 4 245 8570567142 98766295 245 \n",
"... ... ... ... ... ... \n",
"1368695 1374578 11 360121964 35785893 11 \n",
"1368696 1374579 23 178064881 41899516 23 \n",
"1368697 1374580 274 10513244081 96618101 274 \n",
"1368698 1374581 503 21329665736 99242432 503 \n",
"1368699 1374582 290 17626701721 100736375 290 \n",
"\n",
" track_seq_sum track_seq_max \n",
"0 351 26 \n",
"1 666 36 \n",
"2 103 14 \n",
"3 561 33 \n",
"4 31738 256 \n",
"... ... ... \n",
"1368695 66 11 \n",
"1368696 289 24 \n",
"1368697 37911 277 \n",
"1368698 126756 503 \n",
"1368699 42717 292 \n",
"\n",
"[1368700 rows x 7 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"users_grouped"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "93201b84",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>user_id_count</th>\n",
" <th>user_id_sum</th>\n",
" <th>user_id_max</th>\n",
" <th>track_seq_count</th>\n",
" <th>track_seq_sum</th>\n",
" <th>track_seq_max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>26</td>\n",
" <td>5</td>\n",
" <td>3187389</td>\n",
" <td>1008720</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>38</td>\n",
" <td>8</td>\n",
" <td>6510921</td>\n",
" <td>1304168</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>135</td>\n",
" <td>16</td>\n",
" <td>11203352</td>\n",
" <td>1324586</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>136</td>\n",
" <td>7</td>\n",
" <td>5253974</td>\n",
" <td>1203184</td>\n",
" <td>7</td>\n",
" <td>10</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>138</td>\n",
" <td>17</td>\n",
" <td>15656791</td>\n",
" <td>1324586</td>\n",
" <td>17</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932659</th>\n",
" <td>101478482</td>\n",
" <td>6</td>\n",
" <td>2809606</td>\n",
" <td>1055764</td>\n",
" <td>6</td>\n",
" <td>5549</td>\n",
" <td>4910</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932660</th>\n",
" <td>101490148</td>\n",
" <td>72</td>\n",
" <td>44514110</td>\n",
" <td>1322150</td>\n",
" <td>72</td>\n",
" <td>38987</td>\n",
" <td>4911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932661</th>\n",
" <td>101493057</td>\n",
" <td>9</td>\n",
" <td>6855759</td>\n",
" <td>1294431</td>\n",
" <td>9</td>\n",
" <td>11969</td>\n",
" <td>4912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932662</th>\n",
" <td>101495927</td>\n",
" <td>20</td>\n",
" <td>15485868</td>\n",
" <td>1320566</td>\n",
" <td>20</td>\n",
" <td>9718</td>\n",
" <td>1146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932663</th>\n",
" <td>101521819</td>\n",
" <td>34</td>\n",
" <td>24517206</td>\n",
" <td>1350411</td>\n",
" <td>34</td>\n",
" <td>19324</td>\n",
" <td>2768</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>932664 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" track_id user_id_count user_id_sum user_id_max track_seq_count \\\n",
"0 26 5 3187389 1008720 5 \n",
"1 38 8 6510921 1304168 8 \n",
"2 135 16 11203352 1324586 16 \n",
"3 136 7 5253974 1203184 7 \n",
"4 138 17 15656791 1324586 17 \n",
"... ... ... ... ... ... \n",
"932659 101478482 6 2809606 1055764 6 \n",
"932660 101490148 72 44514110 1322150 72 \n",
"932661 101493057 9 6855759 1294431 9 \n",
"932662 101495927 20 15485868 1320566 20 \n",
"932663 101521819 34 24517206 1350411 34 \n",
"\n",
" track_seq_sum track_seq_max \n",
"0 5 1 \n",
"1 9 2 \n",
"2 16 1 \n",
"3 10 2 \n",
"4 25 3 \n",
"... ... ... \n",
"932659 5549 4910 \n",
"932660 38987 4911 \n",
"932661 11969 4912 \n",
"932662 9718 1146 \n",
"932663 19324 2768 \n",
"\n",
"[932664 rows x 7 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tracks_grouped = interactions[numeric_cols].groupby('track_id').agg(['count','sum','max'])\n",
"tracks_grouped = flat_index(tracks_grouped)\n",
"tracks_grouped"
]
},
{
"cell_type": "markdown",
"id": "1acece8d",
"metadata": {},
"source": [
"Найдем пользователей с малой историей - для них нет смысла считать персональные рекомендации. "
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "407cedc8",
"metadata": {},
"outputs": [],
"source": [
"small_users = users_grouped.query('track_id_count <= 3')['user_id'] # пользователи, прослушавшие менее 3 треков\n",
"interactions = interactions.loc[~interactions['user_id'].isin(small_users)]\n"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "f6b86360",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"932664"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(interactions.track_id.unique())"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "3b02415b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>812471</th>\n",
" <td>26</td>\n",
" <td>Complimentary Me</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812472</th>\n",
" <td>38</td>\n",
" <td>Momma's Boy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812473</th>\n",
" <td>135</td>\n",
" <td>Atticus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812474</th>\n",
" <td>136</td>\n",
" <td>24 Hours</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812475</th>\n",
" <td>138</td>\n",
" <td>Don't Upset The Rhythm (Go Baby Go)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1812466</th>\n",
" <td>101478482</td>\n",
" <td>На лицо</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1812467</th>\n",
" <td>101490148</td>\n",
" <td>Без капли мысли</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1812468</th>\n",
" <td>101493057</td>\n",
" <td>SKITTLES</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1812469</th>\n",
" <td>101495927</td>\n",
" <td>Москва</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1812470</th>\n",
" <td>101521819</td>\n",
" <td>Вокзал</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000000 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id name\n",
"812471 26 Complimentary Me\n",
"812472 38 Momma's Boy\n",
"812473 135 Atticus\n",
"812474 136 24 Hours\n",
"812475 138 Don't Upset The Rhythm (Go Baby Go)\n",
"... ... ...\n",
"1812466 101478482 На лицо\n",
"1812467 101490148 Без капли мысли\n",
"1812468 101493057 SKITTLES\n",
"1812469 101495927 Москва\n",
"1812470 101521819 Вокзал\n",
"\n",
"[1000000 rows x 2 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_tracks = catalog.loc[catalog.type=='track'][['id', 'name']]\n",
"catalog_tracks.to_parquet('../data/catalog_tracks.parquet')\n",
"catalog_tracks\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e53cf75b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>812305</th>\n",
" <td>0</td>\n",
" <td>all</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812306</th>\n",
" <td>1</td>\n",
" <td>eastern</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812307</th>\n",
" <td>2</td>\n",
" <td>rusrock</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812308</th>\n",
" <td>3</td>\n",
" <td>rusrap</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812309</th>\n",
" <td>4</td>\n",
" <td>postrock</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812466</th>\n",
" <td>1182</td>\n",
" <td>balkan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812467</th>\n",
" <td>1197</td>\n",
" <td>experimental</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812468</th>\n",
" <td>1370</td>\n",
" <td>europop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812469</th>\n",
" <td>1484</td>\n",
" <td>meditation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812470</th>\n",
" <td>1542</td>\n",
" <td>asiapop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>166 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id name\n",
"812305 0 all\n",
"812306 1 eastern\n",
"812307 2 rusrock\n",
"812308 3 rusrap\n",
"812309 4 postrock\n",
"... ... ...\n",
"812466 1182 balkan\n",
"812467 1197 experimental\n",
"812468 1370 europop\n",
"812469 1484 meditation\n",
"812470 1542 asiapop\n",
"\n",
"[166 rows x 2 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_genres = catalog.loc[catalog.type=='genre'][['id', 'name']]\n",
"catalog_genres.to_parquet('../data/catalog_genres.parquet')\n",
"catalog_genres"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "f261c887",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>Taller Children</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>Wild Young Hearts</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13</td>\n",
" <td>Lonesome Crow</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17</td>\n",
" <td>Graffiti Soul</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>26</td>\n",
" <td>Blues Six Pack</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658719</th>\n",
" <td>21458141</td>\n",
" <td>The Lazy Singles</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658720</th>\n",
" <td>21458207</td>\n",
" <td>Jackie Mittoo Anthology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658721</th>\n",
" <td>21458968</td>\n",
" <td>Master Composers: Johann Sebastian Bach</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658722</th>\n",
" <td>21459622</td>\n",
" <td>Take the Money and Run</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658723</th>\n",
" <td>21461648</td>\n",
" <td>Tropical</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>658724 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id name\n",
"0 3 Taller Children\n",
"1 12 Wild Young Hearts\n",
"2 13 Lonesome Crow\n",
"3 17 Graffiti Soul\n",
"4 26 Blues Six Pack\n",
"... ... ...\n",
"658719 21458141 The Lazy Singles\n",
"658720 21458207 Jackie Mittoo Anthology\n",
"658721 21458968 Master Composers: Johann Sebastian Bach\n",
"658722 21459622 Take the Money and Run\n",
"658723 21461648 Tropical\n",
"\n",
"[658724 rows x 2 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_albums = catalog.loc[catalog.type=='album'][['id', 'name']]\n",
"catalog_albums.to_parquet('../data/catalog_albums.parquet')\n",
"catalog_albums"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "d5095297",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>658724</th>\n",
" <td>4</td>\n",
" <td>Kenny Dorham</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658725</th>\n",
" <td>5</td>\n",
" <td>Max Roach</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658726</th>\n",
" <td>7</td>\n",
" <td>Francis Rossi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658727</th>\n",
" <td>9</td>\n",
" <td>Status Quo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658728</th>\n",
" <td>12</td>\n",
" <td>Phil Everly</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812300</th>\n",
" <td>16093680</td>\n",
" <td>Los Tiburones</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812301</th>\n",
" <td>16097398</td>\n",
" <td>AMELI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812302</th>\n",
" <td>16098445</td>\n",
" <td>2GANGSTA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812303</th>\n",
" <td>16099125</td>\n",
" <td>Daria</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812304</th>\n",
" <td>16102782</td>\n",
" <td>Pan dö Baré</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>153581 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id name\n",
"658724 4 Kenny Dorham\n",
"658725 5 Max Roach\n",
"658726 7 Francis Rossi\n",
"658727 9 Status Quo\n",
"658728 12 Phil Everly\n",
"... ... ...\n",
"812300 16093680 Los Tiburones\n",
"812301 16097398 AMELI\n",
"812302 16098445 2GANGSTA\n",
"812303 16099125 Daria\n",
"812304 16102782 Pan dö Baré\n",
"\n",
"[153581 rows x 2 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_artists = catalog.loc[catalog.type=='artist'][['id', 'name']]\n",
"catalog_artists.to_parquet('data/catalog_artists.parquet')\n",
"catalog_artists"
]
},
{
"cell_type": "markdown",
"id": "f762beed",
"metadata": {},
"source": [
"## Анализ каталогов\n",
"## Genres"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "41225d91",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>812305</th>\n",
" <td>0</td>\n",
" <td>all</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812306</th>\n",
" <td>1</td>\n",
" <td>eastern</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812307</th>\n",
" <td>2</td>\n",
" <td>rusrock</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812308</th>\n",
" <td>3</td>\n",
" <td>rusrap</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812309</th>\n",
" <td>4</td>\n",
" <td>postrock</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812466</th>\n",
" <td>1182</td>\n",
" <td>balkan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812467</th>\n",
" <td>1197</td>\n",
" <td>experimental</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812468</th>\n",
" <td>1370</td>\n",
" <td>europop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812469</th>\n",
" <td>1484</td>\n",
" <td>meditation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812470</th>\n",
" <td>1542</td>\n",
" <td>asiapop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>166 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id name\n",
"812305 0 all\n",
"812306 1 eastern\n",
"812307 2 rusrock\n",
"812308 3 rusrap\n",
"812309 4 postrock\n",
"... ... ...\n",
"812466 1182 balkan\n",
"812467 1197 experimental\n",
"812468 1370 europop\n",
"812469 1484 meditation\n",
"812470 1542 asiapop\n",
"\n",
"[166 rows x 2 columns]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_genres"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e3627b2a",
"metadata": {},
"outputs": [],
"source": [
"genres_dict = dict(zip(catalog_genres['id'], [0]*catalog.shape[0]))\n",
"\n",
"for k,v in tracks.iterrows():\n",
" genres = v['genres']\n",
" for g in genres:\n",
" genres_dict[g] += 1\n",
"genres_df = pd.DataFrame.from_dict(genres_dict, orient='index')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "df97b13b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>count</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>all</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1186</td>\n",
" <td>eastern</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>36649</td>\n",
" <td>rusrock</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>65958</td>\n",
" <td>rusrap</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>2054</td>\n",
" <td>postrock</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>161</th>\n",
" <td>1182</td>\n",
" <td>454</td>\n",
" <td>balkan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>1197</td>\n",
" <td>1674</td>\n",
" <td>experimental</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163</th>\n",
" <td>1370</td>\n",
" <td>0</td>\n",
" <td>europop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>164</th>\n",
" <td>1484</td>\n",
" <td>1606</td>\n",
" <td>meditation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>1542</td>\n",
" <td>176</td>\n",
" <td>asiapop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>166 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" id count name\n",
"0 0 0 all\n",
"1 1 1186 eastern\n",
"2 2 36649 rusrock\n",
"3 3 65958 rusrap\n",
"4 4 2054 postrock\n",
".. ... ... ...\n",
"161 1182 454 balkan\n",
"162 1197 1674 experimental\n",
"163 1370 0 europop\n",
"164 1484 1606 meditation\n",
"165 1542 176 asiapop\n",
"\n",
"[166 rows x 3 columns]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"genres_df.reset_index(inplace=True)\n",
"genres_df.columns=['id','count']\n",
"genres_df['name'] = genres_df['id'].apply(lambda x: catalog_genres.loc[catalog_genres['id'] == x]['name'].values[0] )\n",
"genres_df"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "bd810c63",
"metadata": {},
"outputs": [],
"source": [
"catalog_genres = catalog_genres.merge(genres_df[['count','id']], on='id')\n",
"catalog_genres.to_parquet('data/catalog_genres.parquet')"
]
},
{
"cell_type": "markdown",
"id": "3fe7c8ae",
"metadata": {},
"source": [
"## Tracks"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "faebd87d",
"metadata": {},
"outputs": [],
"source": [
"tracks_dict = dict(zip(catalog_tracks['id'], [0]*catalog_tracks.shape[0]))\n",
"tracks = tracks.merge(catalog_tracks, left_on='track_id', right_on='id')\n",
"tracks = tracks.merge(tracks_grouped, on='track_id')\n",
"tracks = tracks[['track_id', 'albums', 'artists', 'genres', 'name_x', 'user_id_count']]\n",
"tracks.rename(columns={'user_id_count':'count'}, inplace=True)\n",
"tracks.rename(columns={'name_x':'name'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "9bdda605",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>albums</th>\n",
" <th>artists</th>\n",
" <th>genres</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>26</td>\n",
" <td>[3, 2490753]</td>\n",
" <td>[16]</td>\n",
" <td>[11, 21]</td>\n",
" <td>Complimentary Me</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>38</td>\n",
" <td>[3, 2490753]</td>\n",
" <td>[16]</td>\n",
" <td>[11, 21]</td>\n",
" <td>Momma's Boy</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>135</td>\n",
" <td>[12, 214, 2490809]</td>\n",
" <td>[84]</td>\n",
" <td>[11]</td>\n",
" <td>Atticus</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>136</td>\n",
" <td>[12, 214, 2490809]</td>\n",
" <td>[84]</td>\n",
" <td>[11]</td>\n",
" <td>24 Hours</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>138</td>\n",
" <td>[12, 214, 322, 72275, 72292, 91199, 213505, 24...</td>\n",
" <td>[84]</td>\n",
" <td>[11]</td>\n",
" <td>Don't Upset The Rhythm (Go Baby Go)</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932659</th>\n",
" <td>101478482</td>\n",
" <td>[21399811]</td>\n",
" <td>[5540395]</td>\n",
" <td>[3, 75]</td>\n",
" <td>На лицо</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932660</th>\n",
" <td>101490148</td>\n",
" <td>[21403052]</td>\n",
" <td>[9078726]</td>\n",
" <td>[11, 20]</td>\n",
" <td>Без капли мысли</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932661</th>\n",
" <td>101493057</td>\n",
" <td>[21403883]</td>\n",
" <td>[11865715]</td>\n",
" <td>[44, 75]</td>\n",
" <td>SKITTLES</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932662</th>\n",
" <td>101495927</td>\n",
" <td>[21404975]</td>\n",
" <td>[4462686]</td>\n",
" <td>[3, 75]</td>\n",
" <td>Москва</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932663</th>\n",
" <td>101521819</td>\n",
" <td>[21414638]</td>\n",
" <td>[5056591]</td>\n",
" <td>[3, 75]</td>\n",
" <td>Вокзал</td>\n",
" <td>34</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>932664 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" track_id albums \\\n",
"0 26 [3, 2490753] \n",
"1 38 [3, 2490753] \n",
"2 135 [12, 214, 2490809] \n",
"3 136 [12, 214, 2490809] \n",
"4 138 [12, 214, 322, 72275, 72292, 91199, 213505, 24... \n",
"... ... ... \n",
"932659 101478482 [21399811] \n",
"932660 101490148 [21403052] \n",
"932661 101493057 [21403883] \n",
"932662 101495927 [21404975] \n",
"932663 101521819 [21414638] \n",
"\n",
" artists genres name count \n",
"0 [16] [11, 21] Complimentary Me 5 \n",
"1 [16] [11, 21] Momma's Boy 8 \n",
"2 [84] [11] Atticus 16 \n",
"3 [84] [11] 24 Hours 7 \n",
"4 [84] [11] Don't Upset The Rhythm (Go Baby Go) 17 \n",
"... ... ... ... ... \n",
"932659 [5540395] [3, 75] На лицо 6 \n",
"932660 [9078726] [11, 20] Без капли мысли 72 \n",
"932661 [11865715] [44, 75] SKITTLES 9 \n",
"932662 [4462686] [3, 75] Москва 20 \n",
"932663 [5056591] [3, 75] Вокзал 34 \n",
"\n",
"[932664 rows x 6 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tracks"
]
},
{
"cell_type": "markdown",
"id": "cddebf8e",
"metadata": {},
"source": [
"## Artists"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "fdd805b2",
"metadata": {},
"outputs": [],
"source": [
"artists_dict = dict(zip(catalog_artists['id'], [0]*catalog_artists.shape[0]))\n",
"\n",
"for k,v in tracks.iterrows():\n",
" artists = v['artists']\n",
" for g in artists:\n",
" artists_dict[g] += 1\n",
"artists_df = pd.DataFrame.from_dict(artists_dict, orient='index')"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "064f70d0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>count</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>Kenny Dorham</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>Max Roach</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>Francis Rossi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9</td>\n",
" <td>182</td>\n",
" <td>Status Quo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>1</td>\n",
" <td>Phil Everly</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153576</th>\n",
" <td>16093680</td>\n",
" <td>4</td>\n",
" <td>Los Tiburones</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153577</th>\n",
" <td>16097398</td>\n",
" <td>2</td>\n",
" <td>AMELI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153578</th>\n",
" <td>16098445</td>\n",
" <td>1</td>\n",
" <td>2GANGSTA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153579</th>\n",
" <td>16099125</td>\n",
" <td>1</td>\n",
" <td>Daria</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153580</th>\n",
" <td>16102782</td>\n",
" <td>1</td>\n",
" <td>Pan dö Baré</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>153581 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" id count name\n",
"0 4 1 Kenny Dorham\n",
"1 5 1 Max Roach\n",
"2 7 1 Francis Rossi\n",
"3 9 182 Status Quo\n",
"4 12 1 Phil Everly\n",
"... ... ... ...\n",
"153576 16093680 4 Los Tiburones\n",
"153577 16097398 2 AMELI\n",
"153578 16098445 1 2GANGSTA\n",
"153579 16099125 1 Daria\n",
"153580 16102782 1 Pan dö Baré\n",
"\n",
"[153581 rows x 3 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"artists_df.reset_index(inplace=True)\n",
"artists_df.columns=['id','count']\n",
"artists_df['name'] = artists_df['id'].apply(lambda x: catalog_artists.loc[catalog_artists['id'] == x]['name'].values[0] )\n",
"artists_df"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "04e2830a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>count</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13930</th>\n",
" <td>118708</td>\n",
" <td>2038</td>\n",
" <td>Владимир Высоцкий</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>171</td>\n",
" <td>1363</td>\n",
" <td>сборник</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15138</th>\n",
" <td>139896</td>\n",
" <td>1273</td>\n",
" <td>Armin van Buuren</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2096</th>\n",
" <td>9170</td>\n",
" <td>1201</td>\n",
" <td>Wolfgang Amadeus Mozart</td>\n",
" </tr>\n",
" <tr>\n",
" <th>329</th>\n",
" <td>1227</td>\n",
" <td>1101</td>\n",
" <td>Johann Sebastian Bach</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2570</th>\n",
" <td>11818</td>\n",
" <td>1045</td>\n",
" <td>Hans Zimmer</td>\n",
" </tr>\n",
" <tr>\n",
" <th>411</th>\n",
" <td>1551</td>\n",
" <td>1030</td>\n",
" <td>Pyotr Ilyich Tchaikovsky</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2426</th>\n",
" <td>10987</td>\n",
" <td>973</td>\n",
" <td>Elvis Presley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18025</th>\n",
" <td>188963</td>\n",
" <td>966</td>\n",
" <td>Аквариум</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16706</th>\n",
" <td>164416</td>\n",
" <td>919</td>\n",
" <td>Михаил Шуфутинский</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id count name\n",
"13930 118708 2038 Владимир Высоцкий\n",
"26 171 1363 сборник\n",
"15138 139896 1273 Armin van Buuren\n",
"2096 9170 1201 Wolfgang Amadeus Mozart\n",
"329 1227 1101 Johann Sebastian Bach\n",
"2570 11818 1045 Hans Zimmer\n",
"411 1551 1030 Pyotr Ilyich Tchaikovsky\n",
"2426 10987 973 Elvis Presley\n",
"18025 188963 966 Аквариум\n",
"16706 164416 919 Михаил Шуфутинский"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"artists_df.sort_values(by='count', ascending=False).head(10)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "57e71559",
"metadata": {},
"outputs": [],
"source": [
"catalog_artists = catalog_artists.merge(artists_df[['count','id']], on='id')\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "5dab64e2",
"metadata": {},
"outputs": [],
"source": [
"catalog_artists.to_parquet('data/catalog_artists.parquet')"
]
},
{
"cell_type": "markdown",
"id": "318b573a-9e2d-4808-95db-60cfb8bbdb73",
"metadata": {
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"# Выводы"
]
},
{
"cell_type": "markdown",
"id": "caa96e12-36a8-4401-8f11-98627a49ae9d",
"metadata": {},
"source": [
"Приведём выводы по первому знакомству с данными:\n",
"- есть ли с данными явные проблемы,\n",
"1) Огромная таблица взаимодействий - 220 млн записей.\n",
"1) Представление жанров, артистов и альбомов списками, что усложнит обработку\n",
"\n",
"\n",
"- какие корректирующие действия (в целом) были предприняты.\n",
"1) Были удалены треки без авторов, жанров и альбома\n",
"1) Удалены треки, у которых хотя бы один жанр был неизвестным\n",
"1) Удалены взаимодействия с треками, которых нет в tracks\n",
"1) Удалены из взаимодействий пользователи с количеством прослушиваний менее 3. Для них имеет смысл предсказывать только онлайн "
]
},
{
"cell_type": "markdown",
"id": "7bc3296b-eba6-4333-a78d-b9304aa87e3d",
"metadata": {},
"source": [
"# === ЭТАП 2 ==="
]
},
{
"cell_type": "markdown",
"id": "68e73960-fd38-4e15-8db0-9a25c35dfd25",
"metadata": {},
"source": [
"# EDA"
]
},
{
"cell_type": "markdown",
"id": "a30e823e-8e0f-4a76-a02e-8d1ba8bf0f8a",
"metadata": {},
"source": [
"Распределение количества прослушанных треков."
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "9bf5eaba-35f7-4da7-be59-9ab4a34b2423",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([3.0000e+00, 6.0000e+00, 2.5500e+02, 4.9647e+04, 7.2378e+04,\n",
" 5.6896e+04, 4.6211e+04, 3.8646e+04, 3.3147e+04, 2.8957e+04,\n",
" 2.5255e+04, 2.2636e+04, 2.0247e+04, 1.8258e+04, 1.6701e+04,\n",
" 1.5122e+04, 1.4046e+04, 1.2963e+04, 1.2195e+04, 1.1213e+04,\n",
" 1.0359e+04, 9.5520e+03, 9.1130e+03, 8.6450e+03, 8.1950e+03,\n",
" 7.6020e+03, 7.0220e+03, 6.9950e+03, 6.4610e+03, 6.2610e+03,\n",
" 5.8450e+03, 5.7380e+03, 5.4560e+03, 5.1840e+03, 4.9350e+03,\n",
" 4.7770e+03, 4.7050e+03, 4.4300e+03, 4.2880e+03, 4.1730e+03,\n",
" 3.8810e+03, 3.8060e+03, 3.6740e+03, 3.5720e+03, 3.3860e+03,\n",
" 3.3100e+03, 3.2350e+03, 3.2640e+03, 2.9770e+03, 0.0000e+00,\n",
" 3.0450e+03, 2.8090e+03, 2.7330e+03, 2.7090e+03, 2.6730e+03,\n",
" 2.6670e+03, 2.5620e+03, 2.3920e+03, 2.3700e+03, 2.3370e+03,\n",
" 2.2830e+03, 2.1520e+03, 2.1690e+03, 2.0780e+03, 2.1080e+03,\n",
" 2.1130e+03, 1.9710e+03, 1.9290e+03, 1.8570e+03, 1.7640e+03,\n",
" 1.7840e+03, 1.7810e+03, 1.6900e+03, 1.7460e+03, 1.6840e+03,\n",
" 1.5710e+03, 1.6040e+03, 1.5720e+03, 1.6200e+03, 1.4670e+03,\n",
" 1.4740e+03, 1.4300e+03, 1.4960e+03, 1.4090e+03, 1.3540e+03,\n",
" 1.4030e+03, 1.3710e+03, 1.3610e+03, 1.3190e+03, 1.3210e+03,\n",
" 1.2080e+03, 1.2230e+03, 1.1540e+03, 1.1670e+03, 1.1550e+03,\n",
" 1.1800e+03, 1.0870e+03, 1.0960e+03, 1.0900e+03, 1.0600e+03]),\n",
" array([ 2. , 2.98, 3.96, 4.94, 5.92, 6.9 , 7.88, 8.86,\n",
" 9.84, 10.82, 11.8 , 12.78, 13.76, 14.74, 15.72, 16.7 ,\n",
" 17.68, 18.66, 19.64, 20.62, 21.6 , 22.58, 23.56, 24.54,\n",
" 25.52, 26.5 , 27.48, 28.46, 29.44, 30.42, 31.4 , 32.38,\n",
" 33.36, 34.34, 35.32, 36.3 , 37.28, 38.26, 39.24, 40.22,\n",
" 41.2 , 42.18, 43.16, 44.14, 45.12, 46.1 , 47.08, 48.06,\n",
" 49.04, 50.02, 51. , 51.98, 52.96, 53.94, 54.92, 55.9 ,\n",
" 56.88, 57.86, 58.84, 59.82, 60.8 , 61.78, 62.76, 63.74,\n",
" 64.72, 65.7 , 66.68, 67.66, 68.64, 69.62, 70.6 , 71.58,\n",
" 72.56, 73.54, 74.52, 75.5 , 76.48, 77.46, 78.44, 79.42,\n",
" 80.4 , 81.38, 82.36, 83.34, 84.32, 85.3 , 86.28, 87.26,\n",
" 88.24, 89.22, 90.2 , 91.18, 92.16, 93.14, 94.12, 95.1 ,\n",
" 96.08, 97.06, 98.04, 99.02, 100. ]),\n",
" <BarContainer object of 100 artists>)"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(tracks.query('count <= 100')['count'], bins=100)\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "0ede50ad",
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "3bd4b553",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 0, 'Tracks')"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.histplot(tracks.query('count >= 20000')['count'].fillna(0))\n",
"plt.xlabel('Tracks')"
]
},
{
"cell_type": "markdown",
"id": "d765519a-18dd-4d30-9e29-cc2d84cacd79",
"metadata": {},
"source": [
"Наиболее популярные треки"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "aecaf630-fde0-4860-b84a-42c933a9606e",
"metadata": {},
"outputs": [],
"source": [
"tracks[['track_id','count','name']].sort_values(by='count', ascending=False).head(10)\n",
"catalog_tracks = catalog_tracks.merge(tracks[['count','track_id']], left_on='id', right_on='track_id')\n",
"catalog_tracks.drop(columns=['track_id'], inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "89335ddd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>count_x</th>\n",
" <th>count_y</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8215</th>\n",
" <td>53404</td>\n",
" <td>Smells Like Teen Spirit</td>\n",
" <td>111062</td>\n",
" <td>111062</td>\n",
" <td>111062</td>\n",
" </tr>\n",
" <tr>\n",
" <th>451664</th>\n",
" <td>33311009</td>\n",
" <td>Believer</td>\n",
" <td>106921</td>\n",
" <td>106921</td>\n",
" <td>106921</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24433</th>\n",
" <td>178529</td>\n",
" <td>Numb</td>\n",
" <td>101924</td>\n",
" <td>101924</td>\n",
" <td>101924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>478765</th>\n",
" <td>35505245</td>\n",
" <td>I Got Love</td>\n",
" <td>99490</td>\n",
" <td>99490</td>\n",
" <td>99490</td>\n",
" </tr>\n",
" <tr>\n",
" <th>774169</th>\n",
" <td>65851540</td>\n",
" <td>Юность</td>\n",
" <td>86670</td>\n",
" <td>86670</td>\n",
" <td>86670</td>\n",
" </tr>\n",
" <tr>\n",
" <th>341295</th>\n",
" <td>24692821</td>\n",
" <td>Way Down We Go</td>\n",
" <td>86246</td>\n",
" <td>86246</td>\n",
" <td>86246</td>\n",
" </tr>\n",
" <tr>\n",
" <th>443347</th>\n",
" <td>32947997</td>\n",
" <td>Shape of You</td>\n",
" <td>85886</td>\n",
" <td>85886</td>\n",
" <td>85886</td>\n",
" </tr>\n",
" <tr>\n",
" <th>651810</th>\n",
" <td>51241318</td>\n",
" <td>In The End</td>\n",
" <td>85244</td>\n",
" <td>85244</td>\n",
" <td>85244</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83809</th>\n",
" <td>795836</td>\n",
" <td>Shape Of My Heart</td>\n",
" <td>85042</td>\n",
" <td>85042</td>\n",
" <td>85042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>607542</th>\n",
" <td>45499814</td>\n",
" <td>Life</td>\n",
" <td>84748</td>\n",
" <td>84748</td>\n",
" <td>84748</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name count_x count_y count\n",
"8215 53404 Smells Like Teen Spirit 111062 111062 111062\n",
"451664 33311009 Believer 106921 106921 106921\n",
"24433 178529 Numb 101924 101924 101924\n",
"478765 35505245 I Got Love 99490 99490 99490\n",
"774169 65851540 Юность 86670 86670 86670\n",
"341295 24692821 Way Down We Go 86246 86246 86246\n",
"443347 32947997 Shape of You 85886 85886 85886\n",
"651810 51241318 In The End 85244 85244 85244\n",
"83809 795836 Shape Of My Heart 85042 85042 85042\n",
"607542 45499814 Life 84748 84748 84748"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_tracks.sort_values('count', ascending=False).head(10)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "da18b206",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_219026/1451962387.py:2: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" p.set_xticklabels(\n"
]
},
{
"data": {
"text/plain": [
"[Text(0, 0, 'Smells Like Teen Spirit'),\n",
" Text(1, 0, 'Believer'),\n",
" Text(2, 0, 'Numb'),\n",
" Text(3, 0, 'I Got Love'),\n",
" Text(4, 0, 'Юность'),\n",
" Text(5, 0, 'Way Down We Go'),\n",
" Text(6, 0, 'Shape of You'),\n",
" Text(7, 0, 'In The End'),\n",
" Text(8, 0, 'Shape Of My Heart'),\n",
" Text(9, 0, 'Life')]"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmIAAAIaCAYAAACDJyFKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACD/UlEQVR4nO3dd3wN2f8/8NdNpEsEIUWJ6KwuRNQgK9oSvQuiixa9hVU/YkWweme1VXeVtXqLbllE3V1WjWgRgrT7/v2R351vroRll8wkXs/HIw8yc+69Z27unXnNmTPn6EREQERERERpzkTtChARERF9qRjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUkkmtStA76fX63H//n3Y2tpCp9OpXR0iIiL6ACKCFy9ewMXFBSYm7273YhDTuPv37yNPnjxqV4OIiIj+hTt37iB37tzvXM8gpnG2trYAkv6QdnZ2KteGiIiIPkR0dDTy5MmjHMffhUFM4wyXI+3s7BjEiIiI0pl/6lbEzvpEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilWRSuwL075QfslLtKrzT2Wkd1a4CERFRusAWMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQq4aTfpBotT1wOcPJyIiL6/NgiRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRSjKpXQGi9K78kJVqV+G9zk7rqHYViIjoHdgiRkRERKSSdBfEDh8+jG+++QYuLi7Q6XTYunWr0XoRQVBQEJydnWFlZQVvb2/cuHHDqMzTp0/Rrl072NnZwd7eHv7+/nj58qVRmQsXLqBatWqwtLREnjx5EBwcnKIuGzZsQNGiRWFpaYmSJUti586dH10XIiIi+nKluyAWExOD0qVLY86cOamuDw4OxqxZszB//nycPHkSNjY28PHxwZs3b5Qy7dq1Q3h4OPbs2YPt27fj8OHD6N69u7I+OjoaderUgaurK86ePYtp06Zh3LhxWLhwoVLm2LFjaNOmDfz9/XHu3Dn4+vrC19cXly5d+qi6EBER0ZdLJyKidiX+LZ1Ohy1btsDX1xdAUguUi4sLBg0ahMGDBwMAnj9/DkdHRyxfvhytW7fGlStXULx4cZw+fRru7u4AgF27dqF+/fq4e/cuXFxcMG/ePIwaNQoREREwNzcHAAwfPhxbt27F1atXAQCtWrVCTEwMtm/frtSnUqVKKFOmDObPn/9BdfkQ0dHRyJIlC54/fw47OztluZb7JX1onyQtbwPw5W0HERF9Ou86fr8t3bWIvc/NmzcREREBb29vZVmWLFng4eGB48ePAwCOHz8Oe3t7JYQBgLe3N0xMTHDy5EmlTPXq1ZUQBgA+Pj64du0anj17ppRJ/jqGMobX+ZC6pCY2NhbR0dFGP0RERJQxZaggFhERAQBwdHQ0Wu7o6Kisi4iIQM6cOY3WZ8qUCdmyZTMqk9pzJH+Nd5VJvv6f6pKaKVOmIEuWLMpPnjx5/mGriYiIKL3KUEEsIxgxYgSeP3+u/Ny5c0ftKhEREdFnkqHGEXNycgIAPHz4EM7Ozsryhw8fokyZMkqZyMhIo8clJCTg6dOnyuOdnJzw8OFDozKG3/+pTPL1/1SX1FhYWMDCwuKDtpfoU2E/NyIidWSoFjE3Nzc4OTlh3759yrLo6GicPHkSnp6eAABPT09ERUXh7NmzSpn9+/dDr9fDw8NDKXP48GHEx8crZfbs2YMiRYoga9asSpnkr2MoY3idD6kLERERfdnSXRB7+fIlzp8/j/PnzwNI6hR//vx53L59GzqdDgMGDMDEiRPx888/4+LFi+jYsSNcXFyUOyuLFSuGunXrolu3bjh16hTCwsIQEBCA1q1bw8XFBQDQtm1bmJubw9/fH+Hh4Vi/fj1mzpyJwMBApR79+/fHrl27MH36dFy9ehXjxo3DmTNnEBAQAAAfVBciIiL6sqW7S5NnzpxBzZo1ld8N4cjPzw/Lly/H0KFDERMTg+7duyMqKgpVq1bFrl27YGlpqTxm9erVCAgIQO3atWFiYoJmzZph1qxZyvosWbJg9+7d6NOnD8qXLw8HBwcEBQUZjTVWuXJlrFmzBqNHj8bIkSNRqFAhbN26FSVKlFDKfEhdiIiI6MuVrscR+xJwHDH1fEnbkRG2gYhIS77IccSIiIiI0hMGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCWZ1K4AEdGnUn7ISrWr8F5np3VUuwpEpDEMYkREGqPlQMkwSfRpMYgREdEnp+UwCTBQknawjxgRERGRStgiRkRE9A5s2aPPjUGMiIgoA2OY1DYGMSIiItK8jBoo2UeMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCrJcEEsMTERY8aMgZubG6ysrFCgQAFMmDABIqKUEREEBQXB2dkZVlZW8Pb2xo0bN4ye5+nTp2jXrh3s7Oxgb28Pf39/vHz50qjMhQsXUK1aNVhaWiJPnjwIDg5OUZ8NGzagaNGisLS0RMmSJbFz587Ps+FERESU7mS4IDZ16lTMmzcP33//Pa5cuYKpU6ciODgYs2fPVsoEBwdj1qxZmD9/Pk6ePAkbGxv4+PjgzZs3Spl27dohPDwce/bswfbt23H48GF0795dWR8dHY06derA1dUVZ8+exbRp0zBu3DgsXLhQKXPs2DG0adMG/v7+OHfuHHx9feHr64tLly6lzZtBREREmpbhgtixY8fQuHFjNGjQAPny5UPz5s1Rp04dnDp1CkBSa1hoaChGjx6Nxo0bo1SpUli5ciXu37+PrVu3AgCuXLmCXbt2YfHixfDw8EDVqlUxe/ZsrFu3Dvfv3wcArF69GnFxcVi6dCm++uortG7dGv369UNISIhSl5kzZ6Ju3boYMmQIihUrhgkTJqBcuXL4/vvv0/x9ISIiIu3JcEGscuXK2LdvH65fvw4A+P3333H06FHUq1cPAHDz5k1ERETA29tbeUyWLFng4eGB48ePAwCOHz8Oe3t7uLu7K2W8vb1hYmKCkydPKmWqV68Oc3NzpYyPjw+uXbuGZ8+eKWWSv46hjOF1UhMbG4vo6GijHyIiIsqYMqldgU9t+PDhiI6ORtGiRWFqaorExERMmjQJ7dq1AwBEREQAABwdHY0e5+joqKyLiIhAzpw5jdZnypQJ2bJlMyrj5uaW4jkM67JmzYqIiIj3vk5qpkyZgm+//fZjN5uIiIjSoQzXIvbjjz9i9erVWLNmDX777TesWLEC3333HVasWKF21T7IiBEj8Pz5c+Xnzp07aleJiIiIPpMM1yI2ZMgQDB8+HK1btwYAlCxZEn///TemTJkCPz8/ODk5AQAePnwIZ2dn5XEPHz5EmTJlAABOTk6IjIw0et6EhAQ8ffpUebyTkxMePnxoVMbw+z+VMaxPjYWFBSwsLD52s4mIiCgdynAtYq9evYKJifFmmZqaQq/XAwDc3Nzg5OSEffv2Keujo6Nx8uRJeHp6AgA8PT0RFRWFs2fPKmX2798PvV4PDw8Ppczhw4cRHx+vlNmzZw+KFCmCrFmzKmWSv46hjOF1iIiI6MuW4YLYN998g0mTJmHHjh24desWtmzZgpCQEDRp0gQAoNPpMGDAAEycOBE///wzLl68iI4dO8LFxQW+vr4AgGLFiqFu3bro1q0bTp06hbCwMAQEBKB169ZwcXEBALRt2xbm5ubw9/dHeHg41q9fj5kzZyIwMFCpS//+/bFr1y5Mnz4dV69exbhx43DmzBkEBASk+ftCRERE2pPhLk3Onj0bY8aMQe/evREZGQkXFxf06NEDQUFBSpmhQ4ciJiYG3bt3R1RUFKpWrYpdu3bB0tJSKbN69WoEBASgdu3aMDExQbNmzTBr1ixlfZYsWbB792706dMH5cuXh4ODA4KCgozGGqtcuTLWrFmD0aNHY+TIkShUqBC2bt2KEiVKpM2bQURERJqW4YKYra0tQkNDERoa+s4yOp0O48ePx/jx499ZJlu2bFizZs17X6tUqVI4cuTIe8u0aNECLVq0eG8ZIiIi+jJluEuTREREROkFgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpJI0C2K1atVCVFRUiuXR0dGoVatWWlWDiIiISDPSLIgdPHgQcXFxKZa/efMGR44cSatqEBEREWlGps/9AhcuXFD+f/nyZURERCi/JyYmYteuXciVK9fnrgYRERGR5nz2IFamTBnodDrodLpUL0FaWVlh9uzZn7saRERERJrz2YPYzZs3ISLInz8/Tp06hRw5cijrzM3NkTNnTpiamn7uahARERFpzmcPYq6urgAAvV7/uV+KiIiIKF357EEsuRs3buDAgQOIjIxMEcyCgoLSsipEREREqkuzILZo0SL06tULDg4OcHJygk6nU9bpdDoGMSIiIvripNnwFRMnTsSkSZMQERGB8+fP49y5c8rPb7/99klf6969e2jfvj2yZ88OKysrlCxZEmfOnFHWiwiCgoLg7OwMKysreHt748aNG0bP8fTpU7Rr1w52dnawt7eHv78/Xr58aVTmwoULqFatGiwtLZEnTx4EBwenqMuGDRtQtGhRWFpaomTJkti5c+cn3VYiIiJKv9IsiD179gwtWrRIk9epUqUKzMzM8Msvv+Dy5cuYPn06smbNqpQJDg7GrFmzMH/+fJw8eRI2Njbw8fHBmzdvlDLt2rVDeHg49uzZg+3bt+Pw4cPo3r27sj46Ohp16tSBq6srzp49i2nTpmHcuHFYuHChUubYsWNo06YN/P39ce7cOfj6+sLX1xeXLl367O8DERERaV+aBbEWLVpg9+7dn/11pk6dijx58mDZsmWoWLEi3NzcUKdOHRQoUABAUmtYaGgoRo8ejcaNG6NUqVJYuXIl7t+/j61btwIArly5gl27dmHx4sXw8PBA1apVMXv2bKxbtw73798HAKxevRpxcXFYunQpvvrqK7Ru3Rr9+vVDSEiIUpeZM2eibt26GDJkCIoVK4YJEyagXLly+P777z/7+0BERETal2ZBrGDBghgzZgw6deqE6dOnY9asWUY/n8rPP/8Md3d3tGjRAjlz5kTZsmWxaNEiZf3NmzcREREBb29vZVmWLFng4eGB48ePAwCOHz8Oe3t7uLu7K2W8vb1hYmKCkydPKmWqV68Oc3NzpYyPjw+uXbuGZ8+eKWWSv46hjOF1UhMbG4vo6GijHyIiIsqY0qyz/sKFC5E5c2YcOnQIhw4dMlqn0+nQr1+/T/I6f/31F+bNm4fAwECMHDkSp0+fRr9+/WBubg4/Pz9lZH9HR0ejxzk6OirrIiIikDNnTqP1mTJlQrZs2YzKuLm5pXgOw7qsWbMiIiLiva+TmilTpuDbb7/9F1tORERE6U2aBbGbN2+myevo9Xq4u7tj8uTJAICyZcvi0qVLmD9/Pvz8/NKkDv/FiBEjEBgYqPweHR2NPHnyqFgjIiIi+lzS7NJkWnF2dkbx4sWNlhUrVgy3b98GADg5OQEAHj58aFTm4cOHyjonJydERkYarU9ISMDTp0+NyqT2HMlf411lDOtTY2FhATs7O6MfIiIiypjSrEWsS5cu712/dOnST/I6VapUwbVr14yWXb9+XRnh383NDU5OTti3bx/KlCkDIKnV6eTJk+jVqxcAwNPTE1FRUTh79izKly8PANi/fz/0ej08PDyUMqNGjUJ8fDzMzMwAAHv27EGRIkWUOzQ9PT2xb98+DBgwQKnLnj174Onp+Um2lYiIiNK3NB2+IvlPZGQk9u/fj82bNyMqKuqTvc7AgQNx4sQJTJ48GX/88QfWrFmDhQsXok+fPgCS+qMNGDAAEydOxM8//4yLFy+iY8eOcHFxga+vL4CkFrS6deuiW7duOHXqFMLCwhAQEIDWrVvDxcUFANC2bVuYm5vD398f4eHhWL9+PWbOnGl0WbF///7YtWsXpk+fjqtXr2LcuHE4c+YMAgICPtn2EhERUfqVZi1iW7ZsSbFMr9ejV69eytASn0KFChWwZcsWjBgxAuPHj4ebmxtCQ0PRrl07pczQoUMRExOD7t27IyoqClWrVsWuXbtgaWmplFm9ejUCAgJQu3ZtmJiYoFmzZkZ3d2bJkgW7d+9Gnz59UL58eTg4OCAoKMhorLHKlStjzZo1GD16NEaOHIlChQph69atKFGixCfbXiIiIkq/0nSuybeZmJggMDAQXl5eGDp06Cd73oYNG6Jhw4bvXK/T6TB+/HiMHz/+nWWyZcuGNWvWvPd1SpUqhSNHjry3TIsWLdJkIFsiIiJKf1TvrP/nn38iISFB7WoQERERpbk0axFL3ncKSBrh/sGDB9ixY0e6GFaCiIiI6FNLsyB27tw5o99NTEyQI0cOTJ8+/R/vqCQiIiLKiNIsiB04cCCtXoqIiIgoXUjzzvqPHj1SxvkqUqQIcuTIkdZVICIiItKENOusHxMTgy5dusDZ2RnVq1dH9erV4eLiAn9/f7x69SqtqkFERESkGWkWxAIDA3Ho0CFs27YNUVFRiIqKwk8//YRDhw5h0KBBaVUNIiIiIs1Is0uTmzZtwsaNG+Hl5aUsq1+/PqysrNCyZUvMmzcvrapCREREpAlp1iL26tUrODo6plieM2dOXpokIiKiL1KaBTFPT0+MHTsWb968UZa9fv0a3377LSfBJiIioi9Sml2aDA0NRd26dZE7d26ULl0aAPD777/DwsICu3fvTqtqEBEREWlGmgWxkiVL4saNG1i9ejWuXr0KAGjTpg3atWsHKyurtKoGERERkWakWRCbMmUKHB0d0a1bN6PlS5cuxaNHjzBs2LC0qgoRERGRJqRZH7EFCxagaNGiKZZ/9dVXmD9/flpVg4iIiEgz0iyIRUREwNnZOcXyHDly4MGDB2lVDSIiIiLNSLMglidPHoSFhaVYHhYWBhcXl7SqBhEREZFmpFkfsW7dumHAgAGIj49HrVq1AAD79u3D0KFDObI+ERERfZHSLIgNGTIET548Qe/evREXFwcAsLS0xLBhwzBixIi0qgYRERGRZqRZENPpdJg6dSrGjBmDK1euwMrKCoUKFYKFhUVaVYGIiIhIU9IsiBlkzpwZFSpUSOuXJSIiItKcNOusT0RERETGGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSSYYPYv/73/+g0+kwYMAAZdmbN2/Qp08fZM+eHZkzZ0azZs3w8OFDo8fdvn0bDRo0gLW1NXLmzIkhQ4YgISHBqMzBgwdRrlw5WFhYoGDBgli+fHmK158zZw7y5csHS0tLeHh44NSpU59jM4mIiCgdytBB7PTp01iwYAFKlSpltHzgwIHYtm0bNmzYgEOHDuH+/fto2rSpsj4xMRENGjRAXFwcjh07hhUrVmD58uUICgpSyty8eRMNGjRAzZo1cf78eQwYMABdu3bFr7/+qpRZv349AgMDMXbsWPz2228oXbo0fHx8EBkZ+fk3noiIiDQvwwaxly9fol27dli0aBGyZs2qLH/+/DmWLFmCkJAQ1KpVC+XLl8eyZctw7NgxnDhxAgCwe/duXL58GT/88APKlCmDevXqYcKECZgzZw7i4uIAAPPnz4ebmxumT5+OYsWKISAgAM2bN8eMGTOU1woJCUG3bt3QuXNnFC9eHPPnz4e1tTWWLl2atm8GERERaVKGDWJ9+vRBgwYN4O3tbbT87NmziI+PN1petGhR5M2bF8ePHwcAHD9+HCVLloSjo6NSxsfHB9HR0QgPD1fKvP3cPj4+ynPExcXh7NmzRmVMTEzg7e2tlElNbGwsoqOjjX6IiIgoY8qkdgU+h3Xr1uG3337D6dOnU6yLiIiAubk57O3tjZY7OjoiIiJCKZM8hBnWG9a9r0x0dDRev36NZ8+eITExMdUyV69efWfdp0yZgm+//fbDNpSIiIjStQzXInbnzh30798fq1evhqWlpdrV+WgjRozA8+fPlZ87d+6oXSUiIiL6TDJcEDt79iwiIyNRrlw5ZMqUCZkyZcKhQ4cwa9YsZMqUCY6OjoiLi0NUVJTR4x4+fAgnJycAgJOTU4q7KA2//1MZOzs7WFlZwcHBAaampqmWMTxHaiwsLGBnZ2f0Q0RERBlThgtitWvXxsWLF3H+/Hnlx93dHe3atVP+b2Zmhn379imPuXbtGm7fvg1PT08AgKenJy5evGh0d+OePXtgZ2eH4sWLK2WSP4ehjOE5zM3NUb58eaMyer0e+/btU8oQERHRly3D9RGztbVFiRIljJbZ2Ngge/bsynJ/f38EBgYiW7ZssLOzQ9++feHp6YlKlSoBAOrUqYPixYujQ4cOCA4ORkREBEaPHo0+ffrAwsICANCzZ098//33GDp0KLp06YL9+/fjxx9/xI4dO5TXDQwMhJ+fH9zd3VGxYkWEhoYiJiYGnTt3TqN3g4iIiLQswwWxDzFjxgyYmJigWbNmiI2NhY+PD+bOnausNzU1xfbt29GrVy94enrCxsYGfn5+GD9+vFLGzc0NO3bswMCBAzFz5kzkzp0bixcvho+Pj1KmVatWePToEYKCghAREYEyZcpg165dKTrwExER0ZfpiwhiBw8eNPrd0tISc+bMwZw5c975GFdXV+zcufO9z+vl5YVz5869t0xAQAACAgI+uK5ERET05chwfcSIiIiI0gsGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKslwQWzKlCmoUKECbG1tkTNnTvj6+uLatWtGZd68eYM+ffoge/bsyJw5M5o1a4aHDx8albl9+zYaNGgAa2tr5MyZE0OGDEFCQoJRmYMHD6JcuXKwsLBAwYIFsXz58hT1mTNnDvLlywdLS0t4eHjg1KlTn3ybiYiIKH3KcEHs0KFD6NOnD06cOIE9e/YgPj4ederUQUxMjFJm4MCB2LZtGzZs2IBDhw7h/v37aNq0qbI+MTERDRo0QFxcHI4dO4YVK1Zg+fLlCAoKUsrcvHkTDRo0QM2aNXH+/HkMGDAAXbt2xa+//qqUWb9+PQIDAzF27Fj89ttvKF26NHx8fBAZGZk2bwYRERFpWia1K/Cp7dq1y+j35cuXI2fOnDh79iyqV6+O58+fY8mSJVizZg1q1aoFAFi2bBmKFSuGEydOoFKlSti9ezcuX76MvXv3wtHREWXKlMGECRMwbNgwjBs3Dubm5pg/fz7c3Nwwffp0AECxYsVw9OhRzJgxAz4+PgCAkJAQdOvWDZ07dwYAzJ8/Hzt27MDSpUsxfPjwNHxXiIiISIsyXIvY254/fw4AyJYtGwDg7NmziI+Ph7e3t1KmaNGiyJs3L44fPw4AOH78OEqWLAlHR0eljI+PD6KjoxEeHq6USf4chjKG54iLi8PZs2eNypiYmMDb21spk5rY2FhER0cb/RAREVHGlKGDmF6vx4ABA1ClShWUKFECABAREQFzc3PY29sblXV0dERERIRSJnkIM6w3rHtfmejoaLx+/RqPHz9GYmJiqmUMz5GaKVOmIEuWLMpPnjx5Pn7DiYiIKF3I0EGsT58+uHTpEtatW6d2VT7YiBEj8Pz5c+Xnzp07aleJiIiIPpMM10fMICAgANu3b8fhw4eRO3duZbmTkxPi4uIQFRVl1Cr28OFDODk5KWXevrvRcFdl8jJv32n58OFD2NnZwcrKCqampjA1NU21jOE5UmNhYQELC4uP32AiIiJKdzJci5iIICAgAFu2bMH+/fvh5uZmtL58+fIwMzPDvn37lGXXrl3D7du34enpCQDw9PTExYsXje5u3LNnD+zs7FC8eHGlTPLnMJQxPIe5uTnKly9vVEav12Pfvn1KGSIiIvqyZbgWsT59+mDNmjX46aefYGtrq/THypIlC6ysrJAlSxb4+/sjMDAQ2bJlg52dHfr27QtPT09UqlQJAFCnTh0UL14cHTp0QHBwMCIiIjB69Gj06dNHaa3q2bMnvv/+ewwdOhRdunTB/v378eOPP2LHjh1KXQIDA+Hn5wd3d3dUrFgRoaGhiImJUe6iJCIioi9bhgti8+bNAwB4eXkZLV+2bBk6deoEAJgxYwZMTEzQrFkzxMbGwsfHB3PnzlXKmpqaYvv27ejVqxc8PT1hY2MDPz8/jB8/Xinj5uaGHTt2YODAgZg5cyZy586NxYsXK0NXAECrVq3w6NEjBAUFISIiAmXKlMGuXbtSdOAnIiKiL1OGC2Ii8o9lLC0tMWfOHMyZM+edZVxdXbFz5873Po+XlxfOnTv33jIBAQEICAj4xzoRERHRlyfD9REjIiIiSi8YxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVMIgRkRERKQSBjEiIiIilTCIEREREamEQYyIiIhIJQxiRERERCphECMiIiJSCYMYERERkUoYxIiIiIhUwiBGREREpBIGMSIiIiKVMIilgTlz5iBfvnywtLSEh4cHTp06pXaViIiISAMYxD6z9evXIzAwEGPHjsVvv/2G0qVLw8fHB5GRkWpXjYiIiFTGIPaZhYSEoFu3bujcuTOKFy+O+fPnw9raGkuXLlW7akRERKSyTGpXICOLi4vD2bNnMWLECGWZiYkJvL29cfz48VQfExsbi9jYWOX358+fAwCio6ONyiXGvv4MNf403q7ru2h5G4AvazsywjYA3I60kBG2AfiytiMjbAOQ/rbD8LuIvP+BQp/NvXv3BIAcO3bMaPmQIUOkYsWKqT5m7NixAoA//OEPf/jDH/5kgJ87d+68NyuwRUxjRowYgcDAQOV3vV6Pp0+fInv27NDpdJ/lNaOjo5EnTx7cuXMHdnZ2n+U1PreMsA1AxtiOjLANALdDSzLCNgAZYzsywjYAabMdIoIXL17AxcXlveUYxD4jBwcHmJqa4uHDh0bLHz58CCcnp1QfY2FhAQsLC6Nl9vb2n6uKRuzs7NL1FwvIGNsAZIztyAjbAHA7tCQjbAOQMbYjI2wD8Pm3I0uWLP9Yhp31PyNzc3OUL18e+/btU5bp9Xrs27cPnp6eKtaMiIiItIAtYp9ZYGAg/Pz84O7ujooVKyI0NBQxMTHo3Lmz2lUjIiIilTGIfWatWrXCo0ePEBQUhIiICJQpUwa7du2Co6Oj2lVTWFhYYOzYsSkuiaYnGWEbgIyxHRlhGwBuh5ZkhG0AMsZ2ZIRtALS1HTqRf7qvkoiIiIg+B/YRIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiKif2nr1q24efOm2tX4z/R6vdpV+GIxiGVQGe1LxZt7iUhLRAQRERFo2rQphg4dijt37qhdpX8tPj4eJiZJceBDJ+CmT4dBLAM6d+6c8qWaNm0a1q5dq3KN/j1DAIuMjFS5JvQ2hmN1vOskK72dfCX//KTXz5KTkxNOnjyJX3/9FUOGDMGtW7fUrtJH27t3L2bPng0A6NmzJ9q0aYOEhASVa/Vl4YCuGcytW7dQvnx5DBo0CCKCRYsW4dSpU2pX61/T6XTYsGED+vfvj2PHjiFfvnxqV+mjiMhnm6w9LRm24++//8abN29gamqKAgUKqF2tL45er1dOsnbs2IH79+/D2toatWrVgrOzs8q1+ziG70VwcDCcnJzQrl07mJqaqlyrjxMfH48KFSpg3759qFatGuzt7TF06FDkz59f7ap9kISEBKxcuRJXrlzBzp07ce7cORw9ehSZMqXfaJB8n5v8+6JpQhnCjRs3REQkMTFRduzYIWZmZmJra6ssT0hIULN6/9rdu3elWbNmMn/+fLWr8q/o9XoREblw4YLs3btXjhw5Ii9fvkyxXssMddyyZYsULFhQSpUqJdmzZ5fevXvL8ePHVa7dl2nQoEGSK1cuKV68uBQpUkTs7Oxkz549IpI+PlPJde7cWTJlyiQ//vhjutpPGd7nzZs3y3fffSfFixcXnU4n7du3l7///lvl2v2z5O91lSpVRKfTydChQ5VliYmJalTrXzP8PWJjYyUuLi7VdVrFIJYB9OvXT3x8fJTff/31V9HpdGJmZiZDhgxRlmv9w/i2M2fOSMuWLaVWrVpy586ddLOTnjx5skyZMkV5vzdt2iQ2NjZSpEgR0el0Ur9+fVm9erXKtfw4Bw4cEFtbW5kzZ46IiMyfP190Op388MMPKtfs30lv34Xk1q5dK9mzZ5fTp09LdHS03L59W7p37y42NjZKMNbq9r3r4N6/f3+xsrKSdevWpZvvuYjInj17xNzcXObMmSMbN26UBQsWiKWlpbRu3Vpu3bqldvU+yHfffSc9e/aUFi1aSOXKlSUkJETevHkjIto/gTd8zg3/7ty5Uxo1aiSVKlWSpk2byrFjx5Rt0TIGsQzg1atXyhlARESEiIjcv39ftm7dKhYWFtK/f38Va/fvTZs2TQoXLizZs2eX+/fvi0j6OEubOHGi6HQ6mTlzpty/f1+KFSsmCxYskIcPH8rZs2elcePGUrNmTVm3bp3aVf1Hhh3csGHDxM/PT0REbt68KQULFpTu3bsr5ZK38mnN26HEsGN+/fq1REZGaja0vMuUKVOkQYMGRsvi4uKkbdu2UqxYMXn+/LlKNftwd+7cSbGsb9++YmVlJevXr5f4+HgVavXx+vbtm+JvceTIEbG0tJQOHTrIX3/9pVLN3i35PnTq1KliYmIi4eHhIpLUOunh4SEhISHy+vVrpdyTJ0/SvJ4f6+effxYbGxsZMWKE7NmzR0qVKiVFihSR33//Xe2q/SMGsXQu+RnLhg0bxMLCQs6dOyciIvHx8bJmzRqxsLCQQYMGKeX69+8vmzdvTuuq/ivz58+XfPnySdOmTeXBgwcikj7CWGhoqJiYmEhQUJC0bdtWoqOjlXWXL1+W+vXrS+PGjTV7xvl2OPHz85PZs2dLXFycuLi4SI8ePZQy69evl23btqlRzQ927NgxGTt2rPJ+37x5Uxo1aiSlSpWSBg0ayOHDh1WuYeqeP3+unFwZTJgwQZydnZVtMfz7008/iaurq9y8eTOtq/lRNm3aJDqdTo4cOZJiXbdu3SRr1qyyefNmiY2NVaF2H8bw2e/YsaPUr19fWWao84wZM8TExERatWolt2/fVq2e73P8+HEZPXq0/Prrr8qyV69eSZcuXaRy5coyZcoUefjwodSsWVPatWunYk1TGjJkiIwbN05Eko4Hz549Ey8vL5k8ebKIiLx48UJcXV2lT58+albzgzGIpWPJD5a//PKL3L9/X7y9vcXNzU3Onz8vIkk7aUMY8/b2lipVqkihQoU0d8Zp2JZr167J+fPnZd++fcq6uXPniqenp3Tp0kUePnwoIukjjIWEhIhOpxN7e3v5448/ROT/6h0WFiY6nU7OnDmjZhXf69ChQ0pLpOHg7+TkJP369VNaYBMTE6V9+/YSGBioqQOnXq83+n4MGTJEihUrJqGhoXLr1i3Jly+ftGvXTpYtWyYlS5aUr7/+WnPfiS1btkjbtm2lVKlScuLECWX5mTNnpFSpUjJmzBiJiopSlp88eVKKFCkily9fVqO6Hyw2NlYaNWokTk5OcvToURH5v+//yZMnxdTUVHQ6nezdu1fNan6QtWvXioWFhRJmDNuxfPlyqVChgjg5Ocndu3fVrGKqdu/eLc7OzuLo6CinT58WEVG+v69evZJevXpJiRIlJE+ePFK+fHlNfbdDQ0MlW7Zscv36dWXZmzdvpFy5cnLr1i25f/++ODs7G7XYb9++3ehkWGsYxNKp5AeZsWPHSpEiReT69evy8uVL8fb2ljx58ihhTCTpwN+mTRsZMGCAchDVSmtM8r5U+fPnl7Jly0qWLFmkYcOGyk4iNDRUKleuLN26dVNaxtKDBQsWiE6nk2+//dZoZ/bnn39KoUKF5NSpUyrW7t0SEhKkbNmy0rBhQxERuXfvnjRs2FCcnJyUy0pv3ryRESNGiIuLi1y7dk3N6hpJHsK2bdsmmzdvlqioKBkyZIiUL19eLC0tpWfPnkr5Q4cOScGCBTXVcrF48WJxcXGRxYsXGwWSqKgoSUxMlBEjRkjlypWlT58+8scff0h4eLjUq1dPatasqamTlPfVpXHjxuLg4KCEMRGRc+fOyahRo2TGjBmaCsaGz9PVq1flwIEDcu/ePYmNjZVXr16Jn5+fFClSxKhladiwYTJnzhx59eqVWlV+rwsXLkjv3r3F0tJSpk+friw3HBtiY2Pl6NGjsnXrVuU4oZW/R9++faVFixYiknRc27Vrl4iIlC1bVoYPHy4FChSQHj16KNvy4MED8fHxkU2bNqlW53/CIJYOJQ9hly5dkiZNmsj+/fuVZe8KY8l3ilr5UhkcPnxYsmTJIkuWLBGRpIOjTqeTZcuWKWVmzZolxYsXl4CAAE0dbET+729y7949ozM1EZHp06eLTqeToKAguXTpkkRGRsqIESMkZ86ccu/ePTWq+0GWL18u1apVU1pYtmzZItWqVZNs2bKJj4+P1K5dWxwdHeW3335TuabGDH+LtWvXiomJiaxdu1ZERB4/fixDhgyRQoUKyZgxY5Tyo0aNEk9PT6PWJTVt2bJFbG1tZf369UbLu3TpIu3bt5dnz55JfHy8TJw4USpUqCA6nU5KlCghHh4eRi2VatLr9UYnemvXrpWxY8dKcHCw/PLLL8ryxo0bS9asWWXJkiVy8OBBadSokXTs2FFZr6X91IYNG8TZ2VkcHBykePHiEhwcLK9evZIbN25I586dxczMTKpVqyaVK1eWzJkzG+171fSuz8LVq1elZ8+ekidPHlm8eLGy/O07DkW0c9KekJAgQUFB4uHhIf379zdqOQ0NDRUHBwepWrWq0WNGjhwpX331labvZGUQS0cWL15sdLCYN2+eVKhQQdzd3ZWzecOX7uXLl/L111+Lm5ub0qpkoMXOyVOnTpW2bduKSNLlyYIFC0rXrl1TlJs7d65m+8AYWvScnJykbt26cvjwYWUHZghj1tbW0rFjRyldurTmAszb7t+/L3nz5pXRo0cry27evCnTp0+XAQMGyMyZM+XPP/9UsYbGkreE7dixQ0xMTJS+kIaDy5MnTyQwMFA8PDwkNDRUpk6dKlZWVpr5W8TExIivr68MHjzYaHmdOnUkV65cYmFhIa1atZJnz56JSFLLxYEDB+T8+fPKd18L4eXx48fK/wcPHiwODg7SsGFDKVy4sBQpUkT69eunrO/atas4ODhIvnz5pFKlSqkGAbUYPk9//vmnVKpUSebPny/Xrl2TPn36SIUKFWTEiBESExMjiYmJsmXLFhk4cKCMGTNGrly5onLNkyQPYUePHpVffvlFDh06pCy7dOmSBAQESNGiRZWTYBHtHSMuXLhgdEWhTJkyYmVlZXSMuHHjhnTq1EmKFCkigYGBMnPmTPH39xc7Ozul37RWMYilE3PmzJFWrVoZfbHOnj0rxYoVE3Nzc6Oz5+RhrEyZMtK0adM0r+8/Sf5FT0xMlI4dO8qgQYNEr9dLrly5pHv37kqZZcuWyfLly9Wq6gf5448/pHDhwvLdd9/J9u3bpWzZslKxYkXZunWrcmBcuHCh6HQ6mTFjhibvbLtw4UKKPmtLly4VNzc3OXnypEq1+nDJL3HrdDpxdnZW6q3X65W/g6FlzNnZWczNzVOcqKjpwYMHkjNnTlm1apWy7LfffpPatWvL69ev5cyZM2JjYyMtWrRI9YRE7ZYwkaT+R9WrV5fExETZvn27ODs7K5cfHz16JLNnz5b8+fMbjVl18eJFuXz5suphMvn7Z/g8/fbbbzJ8+HDp0qWL0Z2Eo0ePlgoVKsiwYcPk6dOnaV7Xf5J8HztixAgpUqSI5MqVSzw9PaVNmzbKugsXLkjfvn2lePHiMnv2bDWq+k56vV5WrlwpLi4u8vz5c4mLi5MnT56ITqeTr776SqpVqyarVq1SPi+XL1+WkJAQ+eqrr6Ry5crSunVruXTpkspb8c8YxNIRQ+vKoUOHlH5SV65ckeLFi8vXX39tdBeSYYfy+vVrTeycU7N7927lIPjjjz9K/vz5xd7eXgICAox2Il27dpWuXbtqqr/F253BHz9+LP369VN2CC9fvpSaNWumCGOzZ8/WXGdqvV4vz549k3z58knZsmWlbdu2cvfuXYmJiZFHjx5JpUqVZNasWSJifNlCa2fNIkmXwHQ6nYwaNUp69eollStXNupjZfgOPXr0SIKCguTq1atqVTVVN2/eFAcHB2V8Nr1eL4mJiUYBYP/+/aLT6WTLli0q1fL9ZsyYIW5ubiIismjRIilWrJhRa8aTJ09kwoQJUqlSpVQvF6l1Gcywnzx//rxs375dRJLefz8/P7G1tZVSpUoZ1U2v18vo0aOlcuXK0qtXL81c2n7blClTxNHRUcLCwiQuLk5GjhwpOp1O6tWrp5S5ePGitG/fXtq0aaPJ77VhTDbDHcSPHz+WxMREqV+/vlSuXNkojIn8399SS62r78Mglg4kD1L79u0TNzc3GT16tHIH4e+//y5FixaVb775xqjj69utTloSHx8vLVq0kJo1a0pMTIz8+eef0qpVK3Fzc1PumIyKipKRI0eKk5OT5g6Yhvf2119/le7du0udOnWkSZMmRmWio6PFy8tLKleunC5GDb969aps3rxZSpYsKcWKFRM/Pz/566+/ZPz48eLq6qrJVrzk7t+/L7Vr11b6FR4/flw6deokVapUMQpjhh221r4TIknDVTg7OyuX6ZMzfObCw8PF29vb6E5KLTDU7/Lly1KwYEG5e/eubNu2TQoUKJDi0u+pU6fExMQk1SEs1JA8hOl0Ohk5cqSy7vXr19KvXz/JkyePTJkyRWJiYpR1er1eAgMDxdvbW9kfa8mNGzekTp06smPHDhFJurs+c+bM0rt3b8mbN69yM45I0uVXw/uglTBm2GcmJibK+fPnlXHmXrx4ISJJod4Qxn744Qej8iLa2Y5/wiCWDg0cOFAqVqwoQUFBRmGsePHi0rhxY6OO+1q2YsUKKVOmjNKfwjAqctasWcXd3V2qVKkiuXLl0kz/nbcdPHhQdDqdtGzZUvLmzSvZs2eXmTNnGp2ZRUdHS9myZaV27drKzkMLDDuov//+W8LDw1Nc5po9e7a0bNlSLCwspH379solVa0xbMelS5fkjz/+SNFn7cSJE0oY0/r3wnDwmDNnjuh0OpkwYYLRcpGkoQUaNGggjRo10mSQFEm6YcXe3l527Nghf//9t+TPn1/69etndFfqn3/+KaVLl9bEJe/kIczKysqoT6TBq1evxN/fP9XBTvV6vURGRqZZfT/W8uXL5cGDBxIWFia5cuWSBQsWiIhI7969RafTibu7u1F5rX6uRER8fX3FwcFBNm/erATiJ0+eSIMGDaR69eqyZMkSTdf/XRjENOx9H6hBgwZJuXLlUoSxbNmyGfW90Ip3nZmUK1dOmjdvrvz+xx9/yJYtW2T48OGycuVKzXbMv3HjhkycOFHpU/H8+XNp3bq1VKtWTebPn2/0t3v58qWmpjtJ3pcqX758UqBAAbGwsJAuXboo8xUabNiwQZo0aSIFChRIcTeo2pJvh6Ojo0yePFk5ICb/vJ04cUL8/f3lq6++MuqorFW3b9+Wbt26iU6nk8DAQPntt9/k8ePH8ssvv8jXX38txYsX18zdkSJJ7++ePXvk9evXSveBb775RhYtWiQiSZ8hOzs76dy5s6xcuVJOnjwpderUkYoVK6pef8Pn5Pz582JjYyPDhw83Wr9y5Uql5fH169fi7+8vFStWlJkzZxqFMS34p/dy9OjR0rFjR6Xe3333nTRp0kS6du2qmdb65F1qkktev7Zt20qWLFmMwtjTp0+latWq4uPjo/mW+9QwiGlU8gPJ3LlzpVOnTjJw4EBZuXKlsnzIkCFSrlw5GTt2rBLG/vjjD818qd524sQJOXfunNF0OD///LOULl1aOUCmh6bkK1euSPXq1cXV1VU2bNigLH/06JG0bt1aKleuLAsXLlT9IPM+R44cERsbG5k1a5aEh4fL2rVrpUaNGtKwYUM5cOCAUdknT55otv/Lvn37xMbGRhYvXpxiBPrkn6WjR49Knz59NBvs3/bnn39KUFCQWFpaiq2trWTKlElKly4tDRs2VEKYFu6O/Pvvv5WBQfPnzy9FixaVIUOGiKWlpfj6+irTSW3dulVq164tDg4OUqJECfHy8tJMmIyMjBQHBwdp1KiRiPzf52by5MmSI0cOOXnypLLs9evX0r17dylcuLDMnTtXtTq/Lfl7uGHDBhk/frxMmzbNaKiQVq1aKa1fcXFx0rRpU6NWbq0cN+7evSstWrRI0YL9T2Hs2bNnmhoL8GMwiGlQ8gPI6NGjxdbWVlq0aCE+Pj5iamoqvXr1UtYPGjRIKlSoIAMHDjS6c0crXyqRpO2Ji4uTEiVKyFdffSX169eX8+fPS1xcnDx79kzKli0rI0aMUMpqUfJ63b9/X3r16iUODg7SpUsXo3JPnjyR9u3bS/HixY3GQNMKw3aMGTNG6tata7Ru//79UqVKFendu7eIaOsz9C4BAQHSoUMHo2XvOrBrpQXjYz7jN2/elM2bN8uaNWvk0qVLqt9VmJqIiAiJioqSnTt3yowZM2TgwIFSqlQpcXd3l++//1553x8/fiy3b9+Wa9euaWo7rl+/Lu3atZOcOXPK7t27RSQphGXPnt1okFbD9yEmJkYCAgI0OY/kkCFDJFeuXNKiRQtp2bKl2NvbKzfabNu2TQoVKiSlS5cWd3d3KV68uPL+a2m/++eff4qnp6c0aNDAqM+zSMow5uDgIOvWrdP0XLcfgkFMw06fPi3t27dX5sGLjY2Vn376STJnziwDBw5UyvXs2VM6d+6sqS9Tal6/fi2bN28WX19fsbW1FT8/P9m/f79s375d7O3t5eLFi2pX8b2OHDmiDO8QGRkpgwYNktKlS8v48eONyj1+/Fj8/f013foSFBQkVapUkdjYWKPPzdKlS8Xa2lqTHY/fFh8fL1WqVFFGyX/7Tta7d+9qLkwmD4mxsbEppl35kBts1G5BevTo0T+WefHihXTp0kU8PDxk5syZqU6Ro/Z2JPfHH39Ily5dJFu2bNKxY0dxdHRURmxPTgt92t5ly5Ytkjt3bjl27JiIJA37Y25urpwQvnjxQrZv3y59+vSR4cOHKyFMa98RkaRwXLduXfHx8UlxA1ry+pYoUUJKlCihqf63/waDmEZt2LBBypcvL0WLFk1xi/cPP/wg9vb2RnccGXbgWgljhnqcO3dOfv75Z1myZInRpaN169ZJjx49xMrKSqpWrSo6nU6mTp2qqZ1zclFRUfLNN9+Ii4uLnD17VkSSWgIGDBggFStWTBHGtLodBqtWrRIzM7MUd60dPXpUihUrpkxjpHUDBw6UMmXKKN8Rw+fu1q1bMmrUKE2F4eSficmTJ4uvr6+4uLjImDFjjC4haVnDhg1lxowZqe5nDMsMB8ro6Gjx8/OTKlWqyOTJkzXR+vU+N27ckB49eoiJiYlMnDhRRJK2xbBdw4cPF51OJ48ePdLMflbk/1oVp02bpowZuWnTJrG1tVU65j9//jzV8bS0/Dd5VxgTSWqVHDlypPTo0UNT06v9WwxiGnH69GnlFmORpD4VNWvWFDMzM9m2bZtR2WvXromLi4sy1o2BlnYOIkk7AwcHB6lTp464urpKtWrVJDg4WFkfGxsrFy5ckGbNmknJkiU1N0SFiPF7euTIEWnWrJkULlxYaRkzhLEqVarIsGHD1Kpmqv5pktv27duLg4ODHDx4UOngOmjQIClZsqTmBqg0/B1evXpl1Bl3165dUrp0aenfv79RGBszZowULFhQE1NIvf29HDlypOTIkUOWLVsmq1evlmLFiknVqlU1OTl0cv369RNXV1fl9/ftbwyh88WLF9KoUSOjAZq17OrVq9KtWzfJmjWr0WXJMWPGSObMmTUzN+yvv/6aYuaRuXPnSp8+fWTz5s2SOXNmmT9/vrJu06ZNMnLkyA9qzdSS1MJYbGysBAQEiE6n08w0Uv8Vg5gGrFq1SkqUKCGdOnUy+mAdOnRIvLy8xNPT0+hutqdPn0r+/PmVOfS0IvmO9uzZs+Lo6CgLFy4UkaSgaWj1eltMTIwm7nRJ3mJh6Ej8dr+iY8eOSePGjaVw4cJGLWNdu3YVb29vzezoHj16JE5OTkZzyBkY/k5Pnz6Vjh07ioWFhZQqVUoqV64sWbNm1dxwIckn8Pb29paiRYtKw4YNZd26dSKSNLG6h4eHFC1aVFq2bCl169YVe3t7zUxrYghYiYmJcvHiRSlZsqTS3eDo0aNibm6uzByhxctEIkkTvHfo0EHpEjFt2jQ5fvz4ex9j+D69evUqXY3rdOPGDenSpYvY29tLWFiYzJgxQywtLVPMOqGW+Ph4CQ4OlgIFCkjfvn2V5Zs3bxYbGxsxNTWVefPmKctfvnwpPj4+RmXTk+Rh7MCBAzJ06FBNTUv2KTCIqWzZsmVia2srixYtUi4HJd9Z7dy5U3x8fKRgwYISEhIiS5YskYYNG0qxYsU0s9PevXt3itaXVatWiZeXl4gk7djc3NykW7duynotzVGY3K1bt5T3PywsTKpWrZqiU+6xY8ekTp06UrRoUaW5/+HDhynu2lNbv379xNraWhml/V02b94sM2bMkJCQEPnjjz/SqHYfZ+fOnWJubi7Dhw+X+fPnS506daR8+fLKJaSjR4/KxIkTpUmTJjJixAjNzPVnOAEx9NsJDw+XkiVLikhS94PMmTMrB82YmBjZvHmz5j5HBpMmTRIzMzNp0qSJ6HS6D/qsJD+50crl+uT713cFwxs3bihDiJiYmGgmhBk8e/ZMQkJCpFSpUkofSRGRiRMnik6nkwULFsjx48fl7Nmz8vXXX0uZMmU02TH/Q12/fl0aNmwoWbNmFXNzc+UkOKNgEFPRxYsXpWDBgqkeKJN3lj548KBUrVpVLCwsxMfHR0JCQpQWG7XD2KZNm6RatWopOnfPnz9fOnToIDExMZI7d27p3r27siP+5ZdfZOrUqZobEuHNmzdSqVIl5fLL4cOHpVy5clKrVq0UfY2WL18uOp1OHB0dNX1mNmLECDEzM0v1M2aYf1HLfSz0er3ExMRIw4YNjcZ4MkzVUq5cOdm6dauKNXy/K1euSNWqVaV79+7y9OlTuXTpkuTNm1dCQ0PF3t5evv/+e6Xs0aNHpUmTJpppyTNIfuAuVKiQWFhYyHfffadijf6d5Je3RSTFKOxvu3z5sgwbNkzCw8PTpoIfKPn0PXPnzpUcOXJI//79lfWDBw+WPHnyiK2trVSsWFG8vb01c7z4L65evSqNGjVKF3NHfiwGMRXt2LFDvvrqK6NRmTdv3izdunUTZ2dnqVKlijKm0969e6V+/fri6+srBw8eFBHtdLQ0jN3yxx9/KJcYjxw5IjqdTiwtLWXIkCFG5Xv16iXNmzf/xz5MaU2v18uRI0ekRIkSUqlSJRFJahWrVq2aVK9e3ahl7Pjx4+Lj4yM9evSQGzduqFXlD/KuMBYbGyudO3cWe3t7iYqK0vSZspeXlwQGBorI/x1MEhISpHr16tKyZUs1q/aPgoODxcnJSZm6q0uXLqLT6Yxu8Hj16pU0bNhQsyPm6/V6uXLlipQuXVqaN28u1tbWsn79+nRzYE8+JZlhKKAmTZr8Y7+81O72VMPVq1eVk93k8yd+88034uTkJEWLFpUePXooy8PDw+Xs2bNy/fp1TQ0V8l+ll7kjPxaDmIp27dolhQoVks2bN4tI0jAUlSpVEh8fH5kxY4ZUrVpV3NzclMCyY8cOqV+/vtStW1cZ70Yta9asMbq54NKlS1K2bFkZM2aMPHv2TESSJpu1tLSUZcuWyevXr+XOnTsyfPhwyZ49uybOMlM74CUmJsrx48elUKFCShg7cuSIVK9eXapUqSI3btyQhIQECQoKEn9/f82FyXd5O4zFxcVJQECA2NnZaaYDsoHhoGkYqPHNmzfSuHFjo3HPDAFg0qRJUrlyZc0cMEWSWrrv379vtMzb21u++uorEUmaAqhBgwaSOXNmCQ4OlqCgIGW9VgY5fVcdDMMEGO54Tg9zqBps3bpVrK2tZfTo0bJixQrx8PAQR0fHVCce15KLFy9Kjhw5ZNKkSUafq6ZNm0rp0qXl999/l5kzZ0rJkiWNwlhyWvg80bsxiKno1q1bUr9+fcmXL584ODiIm5ub/PDDD/LgwQMRSRo41MzMzOjSy+7du6VKlSrStGlTpYk9rd28eVOqVq0qNWvWNBqFvWvXruLh4SETJ06UFy9eyNOnT2XIkCFiYmIiBQoUkNKlS0uhQoU0cSnPsGN68OBBik7HcXFxcvLkScmfP794eHiISFILWK1atUSn00mFChXExsZGLly4kOb1fpcP6fdiCGMrVqyQ/v37i5WVlWb7Wpw6dUp8fX3l999/F5Gk6busrKykf//+RtvXoUMHad68uWbOlA8cOKDM3xcWFqaElHPnzkmePHmUeQwjIiJk6NCh4u7uLvXq1ZN+/fopLRZaaLlIfuA+fPiw7Ny5Uw4ePGi0z+nZs6dYWVnJhg0bNB/Gnj9/LtWrV1fu2r5z547ky5dPunfvblROq4Fl8ODBkj9/fgkNDZXXr19Lq1atpESJEkpfW0OfsTJlykibNm1Uri19LAYxld24cUN27doly5YtSxGsTpw4IaVLl5Zz584ZHXz27t2r+lQOv/76qzRq1Ei8vb2NbvPu27evlCtXTiZPnqyMdnz69GlZvXq17N27VxPDCRjcvn1bsmfPLjqdTry8vGTEiBGyb98+5fLqqVOnlFGoRZJaYRYtWiTz58/XTKf25CNKHzx48B+HABk9erTodDrJlCmTJgLxu6xatUrc3d2lRYsWSuDdtGmTWFlZSa1ataRTp07i5+cnmTNn1lQg/uuvv8Td3V1KlCghTk5OMn78eOVkZdCgQeLu7q6ESxFJMRClFkJYcoMHDxZnZ2cpWLCgmJiYSNOmTY3GPOvdu7fY2trKihUrNBtiRJL63Lq5ucmdO3fk4cOHkitXLqMQtmbNGmU6Ji1JHnBHjhwprq6uUqJECSlWrJhyU4fhfY+KipIJEyZIhw4dNP23oJQYxDTq1atX0qhRI6lfv77ypdLClyt5INyzZ480aNDgnWFs4sSJmhuPKrlbt25JmTJlpEiRIuLu7i5+fn5iaWkpZcqUkQ4dOsj69evlxx9/lEKFCknt2rU114fq/v37kj9/fjl37pz8+OOPYmFh8UGXrGfNmqX5WQxEkm7qqFy5sjRt2lQJmJcvX5aOHTtKkyZNpGPHjprajoSEBHn9+rWMHTtWJk2aJBs3bpRevXqJl5eX/O9//5P79++Lk5OT0awYyYOX1j5fixYtkpw5c8qJEyfk0aNHcvbsWalUqZI0bNjQaCDgdu3aSa1atVSs6Yfx8fGRKVOmSN68eaVnz55KK+qDBw/E19dXNm3apHINU5d8vz9hwgSxtLSU4cOHK11Akpd5+fKl8jnSwvGCPgyDmMZER0fLL7/8IvXq1ZOSJUtqqs+IQfKztF9++UXq16+fahjz8PCQkSNHarof1Y0bN6RJkybSuHFjOXHihPz999+ydu1aqVKlilSsWFGsra2lZMmSotPpxNfXV0S0c8CMj4+X9u3bS9asWcXU1FRWrFihdpX+k7t37xp9ViIiIsTOzk7y5csnTZo0UVqSDN8JrVyOfLu/4+XLl8XV1VX5e+zevVty584tXbt2lTZt2ohOp5Off/5Zjap+lICAAGnRooWI/N/+5+LFi1KkSBGjIROSr9cCQ10SEhKUoGsYBDRz5sxSv359o/LDhw+XkiVLano2ieTv7+jRoyVv3rwydepUo7vVP6R7AmkTg9hn9jE7qPj4eBk+fLg0aNBAWrVqpak+IyLGnagNHalFkvrF1KtXL0UY69y5s9SsWVMeP36c5nX9GFevXhUfHx/5+uuvjTquP3v2TFauXCkjR46UsmXLavJS3s6dO0Wn00nmzJk1PQ/eP7l06ZK4urrKzJkzRSSptS937twSEBAgq1atkpo1a0qLFi2MBjzWwsHm3LlzkjNnTvnmm2/kr7/+UroXbNy4UXLlyqWMBh4TEyN9+vQRHx8f0el00qtXLzWr/V6G99XPz0+++eYbEUnajxmC76pVq8Te3l7u379vdFKmZhhLfrlXJOnGpnbt2sk333wjW7ZsEZGkOWBr1aolFSpUkGHDhsnixYvF399fsmTJopkR2t9+D9/1/o4aNUpcXV1l2rRpSp9iSr8YxD6j5F+cM2fOyPXr143OulI7kNy6dUtOnTpldFanBYa67tixQ+rUqSPu7u5So0YN2bt3r4gkhTHDZcrkswBodXDKt12/fl18fHzEx8dHGR4kOa2E4bc9f/5cNm/erAxDYfh7vE0LoSW558+fy8OHD+X48ePK5evevXtLsWLFZMqUKeLi4iJ9+vRRyq9Zs0bKli0rHTp00NQdkvfu3ZNVq1ZJkSJFpGDBgjJ27FhlSITAwEAZNGiQ8h2Ij4+XkydPyqRJkzT7eUpu06ZNotPpUkyxtn79eilfvrwmZsMQSeobqdPpZNGiRSKS1GXCxsZG2rRpIw0aNBATExP59ttvRSRpf9S/f3+pUKGClCtXTlq0aKGpy9sG69atS3WcxeTHlDFjxoi5ubmsXr06LatGnwGDWBoYOnSouLi4iLOzs9SqVcto2IcPma9NK7Zv3y7W1tYybtw4OXHihNSoUUOcnZ2VO+/27NkjjRo1kgoVKsj+/ftVru3HSz6VRlhYmNrVSZXh8/L69Wt58uSJ0brWrVuLvb290Z2sq1at0tyArZcuXRJvb28pVKiQ6HQ6yZ49u7Rt21ZevHgh/fv3F51OJzVq1DDqAyMi8uOPP8qtW7fUqfQ/SEhIkF69eom7u7sULlxYjh49KqtXr5YaNWq8s6VS62EsISFB+vXrJ1ZWVrJ27Vp58OCBREZGSt26daV+/fqqh/vk0yZNmzZNLCws5IcffpBZs2bJ7NmzlXKLFi0SnU4nQUFBRo999eqVpkK9QXh4uNGwRm8fB5L/vmDBAs2crNO/xyD2GSTfQYWFhUm+fPnkyJEjsmrVKunSpYvkzZtX+ZK9XV6LEhISlPnKDGeWhvku377EsmPHDmnVqpXmx+Z5F8NUGpUqVfrHufTSWvI5Fxs2bCiFCxeWrl27yrJly5QyrVu3lixZssjcuXOlX79+YmtrK9evX1epxildvHhR7OzsZMCAAfLzzz9LeHi49O/fXxwdHaVUqVJy9+5dGTlypOTLl08WLVqk6f6FBskPhAcPHpTWrVuLmZmZjBkzRooUKSJVqlTRTH+2j3Xv3j0ZMWKEmJubi6urqxQuXFjKlSunet9Vw+v+/vvv4ufnJ1FRUTJmzBjJlCmTFChQQJYsWWJU3hDGJkyYkOJOVa2Jj4+XGjVqKP3zUvN2+GIYS98YxD6jZcuWSf/+/WXChAnKsvDwcOnevbvkzp1b6bugJcnPMpMHxJiYGKlQoYJcvnxZIiMjxdnZ2ej2740bNyotNMmHVEiPrly5Is2bN9dkmNy+fbuYm5vL0KFDZdKkSVK3bl0pV66cjB07VinTs2dPKVKkiJQrV05T44Q9fvxYPDw8Usy0EBsbKz///LPky5dPKlasKCJJHcWLFSsms2bN0vyBUyRlIFm8eLHUrFlTihUrJjqdLtXJ19X0sQHqzJkzsnXrVtm2bZty0FerRc9Q9/Pnz4uJiYmMGzdOWTdt2jTR6XQyZsyYFPuwpUuXik6nk+DgYM2c/BreS0N9DL8fP35c8uTJY3T1hDIuBrHP5O+//5b69euLra2tDBo0yGhdeHi49OjRQ1xdXWXNmjUq1TAlww7u2rVrEhAQIE2aNDGaU65q1arSo0cPyZ8/v/Ts2VNp1n/8+LHUq1dPli9frkq9PwetXbLQ6/Xy4sULadCggdEllgcPHsi3334r5cqVk7Vr1yrLb9++rbm5PK9cuSJFixaV48ePp5jnLz4+XlauXClWVlZKaGnatKmUL18+xSXK9OLChQsyffp0adSokaYuQyYPYQcPHpTVq1fL/v37U0xub5BaaFGrBcZQ98uXL4uVlZXRCYjB+PHjxdTUVJYuXZpi3cqVK+Xy5cufu5ofbd++fZKQkKBs3927d+Xrr7+WUaNGiYj2uqnQp8Ug9omktrM6cOCANGrUSLJnzy6HDh0yWnf58mVp2bKlMiSC2pKfZebIkUN8fX2VSyxTpkwRkaSzfGdnZ6lQoYLRY0eOHClFixbVbP+djCIxMVHc3d1l6NChRssfPnwo1atXl379+qlUsw+zZcsW0el0yi33hu+M4d/Y2FgpWrSodOjQQXnM21MFqeVjDoTJ9wXJH6elMCaS1HfVzc1NypUrJ56enuLu7p5iP6UlhvfywoUL4uDgIE5OTsq6t4Ph2LFj3xnGtODAgQPKnZrnzp0TnU4n1atXl0GDBilzD//4449ibW0tV65cUbOqlAZMQP+ZXq+HTqcDADx69Ah//fUXAMDLywvjx49HtWrV0L9/fxw9elR5TLFixTB16lRs2rRJlTonp9frYWJiggsXLsDT0xPdunXDli1bsHr1avTo0QMREREAgEaNGqFJkyaIi4tDhw4dMGXKFPj5+WHOnDlYu3YtXF1dVd6SjOP+/fvYtm0bZs+ejVevXkFEEB8fj3z58uHevXt4+fIlRAQAkDNnTnh4eOD06dOIjY1VuebG3rx5o/zf2dkZFhYW+Omnn5CQkKB8Z3Q6HUQE5ubmKFCgABISEoweozYRgYlJ0q5yzZo1mDp1KlauXPnO99qwXQCUxwFApkyZPm9FP8KSJUuwcuVK/PDDDzh79izq16+Pixcv4tmzZ2pXLVWGfdTvv/+OSpUqoWbNmjA1NUWTJk0AAKamptDr9Ur5cePGYfTo0ejTpw/mzZunVrVTtXfvXtSqVQvdu3fHuXPnUKZMGdy+fRteXl44ffo0ihcvjlGjRsHMzAxNmjTBxo0bAcBo+yiDUTcHpn/Jz37HjRsnFStWlJw5c0rt2rVl6dKlkpiYKMeOHZPmzZtLmTJllHGFktNCs/Pt27fFwcEhRQfRVq1aSenSpaVw4cLSqlUrGT9+vCxcuFCqVasmNWrUkE6dOmliAu+M5NKlS+Lh4SF+fn7K3IQG27ZtExMTExk/frzR8AEdOnSQ9u3ba6rV5e7du9KiRQvlDtr4+HipUKGCVKlSxWgqJsN36M2bN1KnTh2ZNm2aKvVNTfLv98iRI8XKykpq1KghOp1OWrVqJZcuXVKxdh/PsD29e/dW+upt2bJFbG1tZcGCBSKS1B9US1ORGfz+++9iYWGhfCcOHDggOXPmlMaNGytl3t6XDhkyRBwcHDRzmT4xMVEWLFggVlZWUrduXfHy8lLu0DbcABEaGiodOnQQOzs70el0UqZMmRStx5SxMIh9IuPHj5ecOXPKpk2bJDIyUsqWLSvFihVThg44cuSItGjRQpydnVMMPqgFN2/elAoVKkijRo2UsDhlyhSxtraWCRMmyKJFi6RIkSJSsmRJo+DFu3U+rUuXLknWrFllzJgxRpflNm3apFzSW7JkiZiYmEjz5s2lW7du0rVrV83NuSgi8ueff4qnp6fUr19fGZtt+PDhotPppGbNmkY3Euj1ehk9erQ4OzsrExlryfXr16VOnTpy+vRpEUkKBU5OTtKkSROjcai0fqA01K9nz56ycOFC+fXXXyVz5swyb948EUn6Pi9fvlyWLFmiuX6SS5cuTTEExYeEsUePHqVVFT/I48ePpUCBAvLNN99Iu3btxMvLy2ggaZGkKe5Onz4tXbt2FScnJ/nf//6nUm0pLTCI/Ud6vV4iIiKkUqVKsnHjRhFJOlOzsbGRhQsXGpXdv3+/jBo1SrPhxTCOVqNGjaRr166SM2dOo5Hyb926JTqdTubMmaMs0/qBJz159OiRVKxYUXr06GG0fOrUqaLT6aRw4cLK4KC//PKL+Pv7S+3ataV9+/aaHJRS5P8+U40bNxZ/f39xcnKSFStWSNGiRaVgwYJSq1Yt8ff3l6ZNm4qjo6Om7vI0mDx5snh7e0uTJk2M7uA8d+6cODk5SdOmTTX7/r+rtX3UqFFibW0tNjY2Rv2onjx5IrVr1za601uLkrcQfUgY0wpDi/XChQulc+fOsmrVKqVlzBDyRf7vBPf58+fSv3//9w5lQekfg9gn8PDhQyldurS8fv1atm/fbnSGGRMTI8uWLUvR1K/VMHbt2jX5+uuvxcrKSrljUq/XS1xcnNy9e1dKly4tGzZsULmWGdPhw4elZMmSRmfHK1asEHt7ewkJCREvLy8pXry4MqXJ69evRUR7d3i+LflnynDZMTY2VmbPni0BAQHSvn17mTZtmty4cUPlmiZ5++Ri+/btotPpxNHRUbnjzlDm/Pnzkjt3bqlRo4bmWvKSh5GwsDA5fvy40cG+RYsWkjVrVrl8+bLcu3dP/v77b/Hx8ZGKFStq6hL3P0kexpo1a6Z2dVI4cuRIii4pe/fulWLFisnFixflxIkT4uPjI15eXnLmzBkRSfrbGY4Re/fuFUdHR00Op0OfBoPYR0qtBejNmzdStGhRadasmWTJkkXpayGS1CLg5eWVYpoQLfvjjz+kTp06Uq9ePTl8+LCyfMyYMeLm5ia3b99WsXYZ16xZs8TR0dFoENONGzcqrUSnTp2S6tWrS548eSQ6OjpdtUYm/0yll1kXpk2bptzZdujQIcmUKZN06tRJuWRseP9Pnz4tjRo10lQrTPLPxsCBA8XJyUmyZ88uuXPnllatWklsbKzcvHlTqlevLtmzZ5dcuXJJhQoVpFKlSkpfJa2eLKZGr9fLwYMHxdTUVNq1a6d2dRS7du0SnU4nOp1Oxo4dazTQ7IABA6Rhw4ai1+tl586d0qBBA6lVq5YcO3bM6DkmTpwobm5ump+zl/49BrGPkHzHdOfOHXn48KEyr9yyZcskZ86cRsNRvHr1Spl/MT3t1ESMp/v57bffZOrUqWJpaanJia8zilWrVom5ufl7b34IDg6WChUqaKbz8cdI/pk6cuSI0TqthcrY2FipW7eu1KpVSxmoeM+ePZIpUybx9/dPEcYMtBDGktfp4MGDUrhwYQkLC1MGZXV2dpYGDRooZbZs2SI//vij7N27V/XBWv8LvV4vR44c0cyUXnq9XpYuXSqVKlWSvHnzSseOHaVBgwZStmxZ2bJliyxbtkw6duwoN2/eFJGkWUk8PDwkICBAeXxsbKx0796d+90MjkHsAyU/8H377bdSuXJlKViwoFSpUkU2bNggsbGxMnz4cMmaNas0btxYOnbsKDVq1JCSJUumyzNMkf+b7idnzpxiZmamNJvT53Hp0iUpUKCANGnSROkL9vZUMn379pX27dvLq1evVKvnf6HVKaRSC1Dbt2+XWrVqyQ8//GB0mcjMzEy6d+8ud+7cSetqfpSNGzdKmzZtjCZPF0m6nJotWzYZMGBAqo/Twn7qXcFca4H9n8TGxsqyZcukbt26UrduXbl3756MGTNGWrZsKc7OzqLT6YzujD527JjyWXz7X8q4GMQ+wLJly6Rr164iknR5Lnv27LJz5045c+aMNGjQQHQ6nTx69EgiIiJkx44d8s0330jXrl1l/PjxypllejzDFBG5evWqNGrUKN3dpp9eDRo0SLJnzy7du3dXBnYUSToRGDZsmDg4OGhyZPCPoeUppKZPn67MEKHX66VLly7i7u5uFAD27dsnOp1O03ey3b9/X+rVqyfZsmUz6jdlCFnffvutVKtWTZOXuA31OXHihEyfPl22bNli1B1Ca/V9W/Jp4kSSwtjSpUulQoUK0r59e4mLi5O4uDhZvXq11KtXT7n8ndpz0JeBQewfzJ8/X3Q6nWzbtk1evXolNWvWlJ07d4pI0phO9vb2MnfuXBF595dHC2eY/0V6nbBY65J/XpL/v0uXLuLg4CDFihWT0NBQGTBggLRo0UJy5syZYS5RaPEGg/DwcKU/z5gxY2T37t0SGxsrxYsXl969exuVPXPmjKZOrlILJ2fOnJHmzZtLtmzZZMWKFUbrvv/+eylZsqTRWHRa8tNPP4mlpaW4u7uLtbW1tG7dWvbu3aus12IYa9++vdHJk8j/1TMuLk6WL18u7u7u0rRpU+XuW8O8vAxeXzYGsfdYtWqVZMqUSelof+vWLcmWLZtcv35dfvnlF6O7I1+9eiXBwcFG01FocWdB6vr7779l5syZyu/Jd8DJA/uCBQukUaNGkjt3bilbtqz079/faBBU+u9SO/iNGzdOsmXLJl26dJE2bdpI+/btZcmSJVK5cmXZtWuXiBh/r7UQxpJvx9snTWfPnpUWLVqIh4eHLF68WBITE+X+/ftSu3ZtpaO4VhjqcufOHenQoYMy/M8vv/witWvXlgYNGsju3btTlNcCX19fKV68uLx580ZZ9vYgrIYwVrFiRfH19VVCWHo/Uaf/jkHsHZYuXSo6nU4qV66sLIuJiZHmzZtLQECA0UjUIkm36Ddq1Ei2b9+uRnUpHUhISJBhw4ZJ4cKFJTg4WFn+rjAmIsogrlo44GdUO3fuVAZZjoqKkh49esjEiRPl1KlT0rBhQ3FwcJCsWbNKx44dJSYmRuXaGkv+2Zk7d660b99eWrduLXPnzlU+MydOnJBmzZqJqampFClSRFq1aiW1atVShj9RqzUmtdc9ceKEdOrUSb7++mulE7tI0uVgb29vadCggezZsycNa/nP/vzzT8mfP7/SYvfTTz8Z3fkskjKMeXp6So0aNZS/AX3ZONdkKhYuXIhu3bph6NChuH79Ovz8/AAA1tbWKFCgAObMmYM2bdqgW7duAIDo6GgMHDgQr169Qt26ddWsOmmYqakp+vbti3r16mHTpk2YOnUqgKT5CA3zyJmamho9xsHBIdXl9N+JCO7du4fWrVtj2LBhmDhxIrJkyYKSJUvixo0bKF68OLZt24YxY8Ygd+7cuHXrFqysrNSuthHDXJbDhw/H2LFj4erqCmtrayxcuBB9+vRBfHw8PDw8MHLkSPj6+sLGxgbly5fHvn37YGlpidjYWKP5MNO67rdv38asWbOUZdevX8ehQ4dw8uRJ3Lx5U1leq1YtjBw5Enq9Ht9++y0OHDigRpVTZWNjg/z582PdunXo1KkThg8fjhcvXhiVMcynamZmhrZt26JDhw4oXLgwzM3NVao1aYraSVBrFi5cKCYmJvLTTz+JiMjPP/8sdnZ20r59e6VMhw4dxNHRUZo2bSpdunSRatWqSalSpVLc4UaUmgcPHkhAQIB4eHgYdfjm5+bzS+09vnz5skyePFkKFy4sderUkcOHD0uhQoVk4MCBSpkbN26k6IStFStXrpTChQsrg7Vu2rRJzM3Nxc3NTdq0aaPsl06cOCGtW7eWatWqyaZNm9Sssoi8u4V469atUrJkSWnevHmKO7V//fVXadKkiebGMvzxxx8lT548Ym5urnRlSe2zZvjsxMfHK//n954YxN4yffp02bp1q/J7YmKibNu2Tezs7KRt27bK8pkzZ0rPnj2lXbt2MmnSpHR/dySlLYaxtJf8vb148aKcOnXK6M7Nv/76S6pVqya1atWSBg0aSLZs2VJ0NdDi32fRokXKBN5bt26VrFmzyowZM2Tq1KmSJUsW6dq1q3JzxJkzZ6Rt27ZSsmRJ2bx5s5rVFpGkieH79+8vHh4eMmnSJGX52rVrxd3dXTp06JBi2istXB5+e2iJ0NBQ0el0Urp0aenVq5f88ccfIpJ6aE++TGuhntTBIPYOyb8g7wpjb2OnS/oYDGNpJ/n3efjw4eLq6iouLi5iZWUlQ4YMMZowPSQkRJo1ayY6nU5GjRqlRnXf6V0H7jt37khkZKSULVtWpk6dKiJJN4bkzp1b7OzsZOTIkUrZEydOSOfOneXWrVtpUud/kvx7kDyMrVmzRtzd3aVz585y8uRJFWuY0osXL+TFixfKaPeRkZFy7949WbBggVSqVEm6du2qTHnFsEX/hEHsAxnCmL29vfj5+aldHcogDAehKlWqSFBQkNrVyfBmzpwpDg4OsmfPHrlx44YsXrxYihcvLl27djUan+3+/fuyfPlyTbVwJw/oT58+TTG7wtGjRyVv3rzKdoSHh0vLli1lw4YNKcJ98rv7tOBdYWzdunVSoEAB6dmzp2bqvHHjRmncuLHkzZtX7Ozs5Ouvv5bFixcr60NDQ6VSpUri7+/PMEYfhEHsIyQmJioTAE+cOFHt6lAG8eDBA+nUqZN4e3tzPrnPSK/XS7NmzYz6fokkHVhdXFxk1qxZqT5O7TC2adMmZZolkaRBpatUqSL58uWT77//XrmzNjw8XAoXLixDhgyR8PBwqVu3rrRu3VoJAVpvsX9XGNu4caP89ddfKtbs/yxevFhsbGxk8uTJsnjxYvn+++/F3d1dLCwsZPDgwUq5mTNniqenp3Tr1k0zUy6RdjGIfaTExEQJCwtTfedMGUtERIQyrRH9d6m1QMTGxkqdOnWkX79+yu8GgwcPlsKFC8ubN2801XphOPGbMmWKvHnzRubNmydOTk4yY8YMGTBggJiZmcmAAQPkzp07EhsbK2PHjhU3NzfJlSuX0QTeWtqm90neQqy1y8JHjhwRFxeXFDc6XLlyRbp37678nQzmzp0rBQsWNFpGlJovNoi9rx/Oh/bRYRgj0qbo6Gh59OiRnDt3Th49eqQsHzFihNjZ2SnzRBpaiUJCQqRWrVqa7J8XGhoqJiYmMnPmTAkKCjK6mWjdunViZ2cnffv2ladPn0psbKzcunVLwsLClG1Jb/sprbYQz5s3T77++muJiYlR3lNDwL1x44Y0bNhQSpUqpXTUF0lqzdR6SySpTyciovYQGmlNr9crY+esWbMGFy5cgKmpKSpUqABfX191K0dE/8n27duxZs0a7N+/H5GRkShatCjq1q2LkJAQ6PV61KpVC3///Td27doFFxcXmJmZoWHDhnBwcMC6devUrr4iJiYGNjY2AIAZM2Zg0KBBsLa2xqJFi9CmTRul3Pr169GjRw906tQJAwYMQL58+ZR1iYmJ6XIMuocPHwIAHB0dVa7J/+nQoQOuXLmCM2fOpLp+27ZtaNy4MY4fPw4PDw+jden170Bp44sc0NUQwoYOHYphw4bh3r17ePr0KZo2bYrZs2erXDsi+reWLFmCrl27okiRIggJCcGhQ4dQokQJLF68GI0bN4aJiQmWL1+OokWLoly5cqhSpQoqVqyIhw8fYtWqVQCSBnpV2+7duzF37lwcP34cADBw4EAsWbIEr169wokTJ/Ds2TOlbKtWrbBo0SLMmjULP/30k9HzpNeDv6Ojo6ZCGAC4uroiMjISf/75p9Fyw+elXLlyyJw5Mx4/fpzisen170BpRN0GOfXs2LFD8uTJI8eOHRORpAH5dDqdLFq0SOWaEdG/MX/+fDE3N5d169YZLX/8+LFMnTpVrK2tpUePHsrytWvXyty5c2XRokXK5SMtXMZbunSp5MqVS3r16iWnTp0yWjd79mylL9Lbd03u27dPE/XPqPbt2ycmJiZGNxKI/N/l7ZMnT0rp0qXl4sWLalSP0rEvJoi93fdj7ty50qhRIxFJuo6fOXNmZe7I58+fpxhEkIi0a9euXaLT6ZSO1IZO6oaD5NOnTyUgIECcnZ2Vk6+3aaEvz9q1a8Xa2lrWr18vz58/T7XM9OnTlTCWWhmGsU/P0BcsMDBQTE1NZdq0aUb91+Li4qRevXpSt25dTfYzJG3LpHaLXFoQEeVy5I0bN1CoUCHY2trCysoKq1evRs+ePTFt2jR0794dAHDgwAHs3r0befLkQY4cOdSsOhH9A71er3yvd+/ejcaNG8PMzEzplyMiyJo1K/r164eFCxfizz//hKenZ4rnUfvy0aNHj7BgwQIEBwejZcuWyvKXL1/i8uXLiI+PR5UqVRAYGAgAGDZsGF68eIGRI0cqfckAIFOmL2K3nqZ0Oh0AoG/fvnj9+jWGDh2Kbdu2oWLFirCwsMCxY8cQGRmJc+fOKXPHqjWHJ6U/X8QnxfAlmj9/vhK2XFxccPr0afj7+2PChAno2bMngKQOsgsWLICIKBMuE5F2mZiYoHPnzhg0aBBOnTqFzp07Q6/Xw9TUFImJicr3P0eOHMicObMywboWRUZGIleuXMrv8+bNQ+fOnVGpUiW0atUKVapUgYggMDAQ48aNw8GDB2Ftba1ijb8s+fLlQ3BwMFatWgW9Xo9Nmzbh999/R7ly5XD+/HmYmZkhISGBIYw+Sob9tLRu3Rpr1641Wnbr1i0UKFAAAFCrVi307NkTcXFxeP36NQ4cOIBjx46hSZMmuH//PmbNmgWdTqeJjrtE9G4iAhsbG7Rv3x7du3fHpUuX0KlTJ4gITE1NkZCQAAA4d+4cihQpgjJlyqhb4feIjo7Gjh07sH//fjRv3hzz5s1Djhw58OuvvyI0NBQRERGYMGECAGDUqFE4evQo91OfyIcG9MyZM6Ndu3bYt28fLl26hG3btuG7775DpkyZkJiYyBZJ+mgZ9hOTKVMmdO3aFVZWVmjUqBFMTEwQFRUFW1tbpcyQIUPw4sULbN68GWPHjoW7uzuyZMmC06dPK18qtS9XENH7GYKItbU1OnbsCABYuHAh/Pz8sGzZMmTKlAmvXr3C9OnT4erqihIlSqhc49TlyJEDy5cvR7NmzbB//37Y2toiNDQUpUuXRvbs2fHs2TPY2dkZBQbDthta/ejfSX4p8dChQ7h37x6cnZ2RL18+uLm5pfoYMzMzmJubK78bgj/Rx8pwQezBgwdwdnbGDz/8gP79+6Nt27b44Ycf0LRpU7x+/Vppxo+Pj4eZmRnGjx+PgIAAREZGImvWrHBxcYFOp0NCQgLPbIg0KLXg8a4w1rVrVyxZsgTt27fH7du38dtvv2m6D0/t2rVx48YNvHz5MtUAYGtrCxcXF6NlDGH/neGzMGzYMGzYsAFZs2aFhYUF4uPjMX36dFSvXj3FY1L7DBL9GxkqaXh7e6NUqVIICQkBAMycOROJiYlo27Ytdu/eDRsbG7x+/RoRERGIiIiAlZUVsmfPjvDwcNSoUUP5Mur1eoYwIo0yHPAMJ1OGUJVaGFuyZAnMzc1RoEABXLp0KV20dOfIkSPFTUKPHj1C586dERcXB39/f5VqlrEtWbIEK1euxKZNm1C5cmVMnDgREydONBqzjehzyFAj69+4cQN58+aFhYUFoqKiYG9vDwDo2bMnli1bBltbW2TKlAnZs2fHnTt3YG1tDSsrKzg7OyMsLIxnNETpxPTp03Hs2DFs2rQpxTpDi9mrV68wb948nD9/HkuXLlU6Uqenk6zHjx9j8eLFOHr0KCIjIxEWFmZ0Ryj9d4bPS58+fWBjY4Pg4GBs3boVHTt2xHfffYfu3bvj1atXiIqKStEaSfQppJ890j/Q6/UoVKgQACA4OBi7d+/GwoULkT9/fsyfPx8ODg6YPHkypk+fjrZt2+LNmzcwNTWFXq9H7ty52deCKB3Jnz8/Fi1ahLNnz6J8+fJG65K3jAUEBMDc3Dzddje4e/cuwsLCULBgQWzduhWZMmVKl9uRHhiOIbt370aHDh2UIY0SExOxYcMGJCYmon379kb9wog+hQzzbU7e38Pb2xtBQUEYNmwYgoOD4ebmhokTJ+L58+cICgpCgQIF0KhRI6PHa7XPCNGXLrUTpBIlSsDMzAzHjh1D+fLlU3x/DeUtLCyU50iP4aVMmTJYtWoVsmTJAp1Ox7vyPoF3fVayZ8+OAQMGQKfTYfbs2ejcuTMA4Pnz51i1ahW8vLwYwuizyBCXJpPvqA1N9pcuXYKnpye8vb0REhKidHzt168fvv/+exw8eDDVDphEpE0vX75E5syZld+Dg4MREhKCU6dOIW/evCrWLG2wxf6/Sx7Cjh07BhMTE2TKlAnu7u4AgJYtW2Lv3r0ICwtDlixZkJCQgO7du+PZs2cICwtjCKbPIl0HMcMdkkDqYezixYuoXLlyijA2Y8YM9O3bl18qonQiJCQEZ86cQf369dG+fXuICCIiItCsWTP06NEDfn5+bNWm90p+jAgMDMTatWsRHx8PKysrVKlSBStXrsT9+/fh5+eH8PBwWFpawsXFBaampjh8+DD75tFnk26D2LFjxzBo0CAMHToUTZo0AfDuMFa1alV8/fXXmDJlitKPDAD7WhClE0uWLEFYWBg2bdqE6tWrw9vbG/369UO3bt1w9epVHD16VO0qkoYlPzYcOnQI3bt3x7Jly2BhYYG7d++iV69eKFeuHLZv3w4A2Lp1K+Lj45EtWzZ4eXkpAwPzeEGfQ7oNYkePHsXkyZMRFxeHfv36KX2+Ugtj4eHhKFmyJIYMGYKpU6eqWW0i+gfvatlKTEzEzZs3lcuRb968QaNGjfC///0Pq1evRps2bVSoLaUnmzZtwqZNm5AtWzZ8//33yvLff/8dtWrVQseOHTFjxowUj2NLGH1O6TaIAcDx48cxbdo0PH36FIGBgUoYS74jf/z4MfR6PRITE5EjRw6e0RBpWPLv7pYtWxAZGYmXL19i4MCByvL4+HjExsYiODgYZ8+exS+//IJ27dph1apValadNO7Bgwfw9/fHyZMnUbNmTWzcuBHA/4Ws8ePHY+/evdixYwcyZ87M/niUZtJlhwpDdvT09MTgwYORLVs2hISE4Oeffwbwf3dQPnz4EM2aNcPgwYPh7Oys3PpNRNojIsp3d/jw4RgwYABWrlyJ5cuXo0yZMrh69SqApKllMmfOjPHjx2PNmjX48ccfsX79ehw8eFDF2pPWvN3G4OzsjAkTJqBWrVo4cOAAVq5cCQBKS1f27NkRFRXFmyIozaXLIJb8S1K5cmWjMLZt2zYASSNRt2zZEvfv38eSJUuU8mwRI9Kep0+fKt/rmTNnYuXKldiyZQvCwsIwatQoXLp0Cc2aNcPFixcB/N9B1s7ODo0aNUKtWrVw7tw51epP2qLX641mYDAoX748RowYgdq1a2Pu3LlYsmQJ9Ho9Hjx4gC1btsDV1dVoPmKitJAug9jbkoexGTNmYOXKlWjbti0ePXqEy5cvKyNqE5H2HD58GIULF8aTJ0+QkJCA69evY/r06ShXrhx+/vln9OjRA9OnT4e9vT3atGmDixcvKgdZnU4Hc3NzPH/+HLdu3VJ3Q0gTkl/enjdvHrp06YI2bdpg3rx5SEhIQLly5TBo0CDkzp0bPXr0QPHixTFw4ECICDZs2ACdTmc0sTrR56b5IJb8C5H8zOZtycNY586dce/ePfz+++/pcloToi+Jk5MTsmXLhqCgIJiamqJFixaoXr06fv/9dwwcOBCTJ0/GwIED0atXL1y+fBm1atXCX3/9pTz+5MmT+Pvvv9GlSxcVt4K0Ivnl7bFjx8LV1RXW1tZYuHAh+vTpg/j4eHh4eGDkyJHw9fWFjY0Nypcvj3379sHS0hKxsbEcBoXSlKbTSfIzm+XLl0NE0KRJE2UOybdVrlwZiYmJKFiwICZOnMjpQIjSgfz586NNmzbYsmULjh8/Di8vLwDA7t274erqipYtWwIAbGxs0KdPH5iZmcHV1VV5vJubG86dOwdHR0c1qk8atGrVKmzZsgU7d+6Eu7s7Nm/ejB9++AHPnz+Hn58fVqxYgXLlymHIkCEIDQ3Ftm3bUKBAATRt2lSZjYEorWg29ifvuDt06FCMGDECJiYmiImJee/jqlWrhv/9738MYUQaZuh4DyT12xw4cCDevHmDadOmKctv3ryJ8PBwZMqUCU+fPsXy5cthZ2eHkJAQZVwnAMiZMydDGBmJjY1F48aN4e7ujp9++gldu3bF1KlT0bNnT+zcuRO9e/dGXFwcPDw8MHjwYOTJkwfjxo3Dli1b1K46fYlE42bPni3Ozs5y+vRpo+UvXrwQEZHExEQ1qkVE/9LPP/8sOp1O6tevL7du3ZKoqCgRETlw4IBYWVnJzJkzRUTkyZMnUqRIEbG1tZWCBQtKiRIlJC4uTs2qkwbp9fpUl9+5c0ciIyOlbNmyMnXqVBER+fvvvyV37txiZ2cnI0eOVMqeOHFCOnfuLLdu3UqTOhMlp6nmom7dusHf3x+VKlVSlp0+fRrNmzeHu7s7/vjjD5w+fRqLFi2CTqfDpEmTjMoSkfa5uroiV65cOHLkCHr37o0qVaqgfv368PLygp+fHzZt2oRq1aqhbNmyOHHiBNasWQMbGxu0a9eOLd1kJHn3lWfPnsHExARZsmQBAOTOnRthYWF48uQJvvnmGwBJ85VWrlwZLVq0QNOmTZXn8fDwQJkyZXhZklShmQFd4+Pj4enpicjISGzduhXlypUDAPTu3RvXrl1DjRo1sH//ftjZ2SFbtmyIjo7GjRs3cPToUeWLR0TaJP9/bKaEhAQkJiZi5syZiI6ORpYsWXD79m3s27cPwcHBsLCwQLdu3dC3b18MGjQoxfNwhHMCgM2bN8PLywvZsmUDAAQFBWH//v24d+8eBg8ejBYtWiBnzpy4fPkymjRpgsaNG6NTp04YNGgQ7O3tsWbNGuh0On6eSBM00UcsOjoaZmZmOHr0KL766is0atQIZ8+eBQA0a9YMjo6OWLFiBerXr4/x48dj+fLlqFevHvLmzQtra2uVa09E/+Tu3bsAkvqDWVhYoEyZMjh69CgqVKiA2bNnY8CAAejatSvOnz8PJycnTJ482agfmQEPmrRjxw40b94cCxcuRGxsLObPn49FixahefPm8PX1xcCBAzFlyhTcvXsXBQsWRJs2bbBx40bUqVMHUVFRWLlyJXQ6HUSEnyfSBnWvjIo0bdpU/P39JTIyUkREXr9+LT4+PuLi4iJnzpwREZGYmBh58uSJ8hi9Xi/169eX1q1bv7N/ABFpw6lTp0Sn08ngwYPl6tWryvLRo0eLk5OT3L9/X0SS+un069dPvLy8RKfTSf/+/VWqMWldaGiomJiYyMyZMyUoKEi2bt2qrFu3bp3Y2dlJ37595enTpxIbGyu3bt2SsLAwpU9xfHy8WlUnSkH1ILZixQoxMTGRQYMGpQhjzs7OShgTEXn+/Lns2LFDfHx8pGTJkkrHXYYxIu169uyZzJo1SxwcHKR69eoyadIkZZ2fn5/07t1boqOjRSSpg/6pU6ckICCAB0tK4eXLl8r/Q0JCRKfTiY2NjaxZs8ao3Lp16yRLlizSv39/uXnzptG6hISEtKgq0QdT9dKkiKBjx45Yv349QkJC8L///Q+PHj2CpaUltm7dilKlSqFx48b47bffAAC3b9/G+vXrkSNHDvz222/KYK2cF4xIu+zt7dG3b1+EhYUhf/78WLx4MSpXroyzZ8+iWrVqiImJQXh4OAAga9asyuVKzg1Lye3evRtz587F8ePHAQADBw7EkiVL8OrVK5w4cQLPnj1TyrZq1QqLFi3CrFmz8NNPPxk9Dy9HkuaomQL1er3SmvXjjz+KTqeTwMDAFC1juXPnlrNnz4qIyL1795TH8IyZKH2JioqSgwcPSqVKlaRAgQLSt29fKVSokPTq1UvtqpGGLV26VHLlyiW9evWSU6dOGa2bPXu26HQ6mTJlijIUisG+fft4nCDNUyWIvWvsrzVr1qQaxurVqycmJiZG/Ut4OZIofRs5cqQ0atRI7O3tRafTyZYtW9SuEmnQ2rVrxdraWtavXy/Pnz9Ptcz06dOVMJZaGYYx0rI0H4wn+bgvBw8exKNHj2Bqaoqvv/4abdq0AQC0a9cOQNJcYTly5MCmTZswatQoFCxYUHkeXo4kSp8M+4BJkybh1KlTKF26NPbs2YOGDRuqXTXSmEePHmHBggUIDg5WproCksYDu3z5MuLj41GlShUEBgYCAIYNG4YXL15g5MiRsLGxUcpz3DnSMtXGERs2bBi2bt0Kc3NzZM+eHX/++SdOnDiBXLlyYcOGDWjVqhUGDRqEQYMGwcnJSXkcx30hSv/k/48r9jYO1krJPXr0CF5eXpg0aRJ8fX0BAPPmzcP+/fuxadMmuLi4wNXVFUePHlUG+d65c6fyO1F6oEpn/fnz52PZsmX44YcfcPHiRbRo0QL37t3D6dOnAQAtWrTAunXrMH36dGzcuNHosQxhROlfagdJEWEIoxSio6OxY8cO7N+/H82bN8e8efOQI0cO/PrrrwgNDUVERAQmTJgAABg1apQSwlRqYyD6aKrs9a5cuYJBgwahQoUK2Lp1K0aMGIEFCxbA19dXGdy1ZcuWyJ49O2rUqKFGFYkojbEFg96WI0cOLF++HM2aNcP+/ftha2uL0NBQlC5dGtmzZ8ezZ89gZ2cHvV6vPMYQwvh5ovRClSAWEREBBwcH7NixAx06dEBwcDC6desGvV6PVatWIT4+HgEBAahduzYAXq4gIvpS1a5dGzdu3MDLly/h5uaWYr2trS1cXFyMljGEUXryWS9NJj9LSa5cuXL46aef0KZNG0ydOhW9evUCkDRp686dO/HmzRuj4MUQRkT05cqRI0eKEPbo0SN06NABcXFx8Pf3V6lmRP/dZ+usn/zuyEOHDsHCwgLZsmVD4cKF8fDhQzRo0ACPHz/GqlWrULp0aTx58gR9+vTBkydPEBYWxvBFREQpPH78GIsXL8bRo0cRGRmJsLAwmJmZ8UYuSrc++12Tw4cPx7x585A9e3a8fv0a8+bNg6+vL+7du4c6derAxMQE9+7dQ9GiRSEiOHz4ML9URESUqvPnz2PMmDEoUKAAvvvuO2UGBp68U3r1yYNY8k6Sly5dQseOHbFgwQLExsZiy5YtCA0NxfLly9GhQwc8ffoUFy9exF9//YWCBQuicuXKMDU15ZeKiIjeKSoqClmyZIFOp+NJO6V7nzTtJL8cGRcXh9jYWHh7e6NChQoAgFKlSsHMzAydOnWCiYkJ2rVrhxo1ahjdGZmYmMgQRkRE72Rvbw8g6cSfIYzSu0+aeAwhbPz48Th27Jhyd+SLFy9ga2sLOzs7jBo1CjqdDp07d0ZcXBw6d+5s9Bz8UhER0Yfg3ZGUEXySuyaT3x05e/ZszJ07FyVKlECJEiWwf/9+rFy5Ullva2uLkSNHomvXrli6dOmneHkiIiKidOmTtIgZWsLOnz+P+/fvY8mSJWjQoAEAoHjx4ujfvz8yZcqEHj16AEgKY9999x2srKw+xcsTERERpUv/KYjFxsbCwsICAHDy5El4enrC3NwcpUqVUsqMHDkSABAQEAATExN069YNAGBtbQ3g3XPOEREREWV0//rS5O7duzFr1iycOnUKAODh4YFFixYhLi4OJ06cwNOnT5WyI0eOxIQJE9CjRw/89NNPRs/DEEZERERfqn/VIrZs2TKMGTMGjRo1gpeXl7Lc398fr1+/Rr9+/eDs7IyePXsqd7cMHz4cuXLlUi5ZEhEREX3pPjqIrVu3DgEBAVi2bBnq1q0LOzs7o/UBAQGIi4vD4MGDAQC9evVClixZAAAdOnQAwLkjiYiIiICPDGKPHj3CggULEBwcjJYtWyrLX758icuXLyM+Ph5VqlRBYGAgAGDYsGF48eIFRo4cCRsbm/97UYYwIiIioo9vEYuMjESuXLmU3+fNm4f9+/dj06ZNcHFxgaurK44ePYrAwEC8fv0aO3fuxMSJEz9ppYmIiIgygo+a4ujRo0coV64c6tatizZt2mDu3Lm4fv06qlatiiZNmuD58+cYNmwY/Pz8EBQUBOD/7ork3ZFERERExj6qRSxHjhxYvnw5mjVrhv3798PW1hahoaEoXbo0smfPjmfPnsHOzs5ogFeGMCIiIqLUffSlydq1a+PGjRt4+fIl3NzcUqy3tbWFi4uL0TKGMCIiIqKUPurS5Ps8evQInTt3xuPHjxEWFsY5I4mIiIj+wX++ffHx48dYvHgxjh49isjISCWEJSYmMowRERERvcd/nvT77t27CAsLQ8GCBXHs2DGYmZkhISGBIYyIiIjoH3ySS5NRUVHIkiULdDodW8KIiIiIPtAn6yMGcAJvIiIioo/xny9NJscQRkRERPThPmkQIyIiIqIPxyBGREREpBIGMSIiIiKVMIgRERERqYRBjIiIiEglDGJEREREKmEQIyIiIlIJgxgRERGRShjEiIg+kpeXF/r164ehQ4ciW7ZscHJywrhx45T1ISEhKFmyJGxsbJAnTx707t0bL1++VNYvX74c9vb22L59O4oUKQJra2s0b94cr169wooVK5AvXz5kzZoV/fr1Q2JiovK42NhYDB48GLly5YKNjQ08PDxw8ODBNNxyIvrUGMSIiP6FFStWwMbGBidPnkRwcDDGjx+PPXv2AABMTEwwa9YshIeHY8WKFdi/fz+GDh1q9PhXr15h1qxZWLduHXbt2oWDBw+iSZMm2LlzJ3bu3IlVq1ZhwYIF2Lhxo/KYgIAAHD9+HOvWrcOFCxfQokUL1K1bFzdu3EjTbSeiT+eTzjVJRPQl8PLyQmJiIo4cOaIsq1ixImrVqoX//e9/Kcpv3LgRPXv2xOPHjwEktYh17twZf/zxBwoUKAAA6NmzJ1atWoWHDx8ic+bMAIC6desiX758mD9/Pm7fvo38+fPj9u3bcHFxUZ7b29sbFStWxOTJkz/nJhPRZ5JJ7QoQEaVHpUqVMvrd2dkZkZGRAIC9e/diypQpuHr1KqKjo5GQkIA3b97g1atXsLa2BgBYW1srIQwAHB0dkS9fPiWEGZYZnvPixYtITExE4cKFjV43NjYW2bNn/yzbSESfH4MYEdG/YGZmZvS7TqeDXq/HrVu30LBhQ/Tq1QuTJk1CtmzZcPToUfj7+yMuLk4JYqk9/l3PCQAvX76Eqakpzp49C1NTU6NyycMbEaUvDGJERJ/Q2bNnodfrMX36dJiYJHXD/fHHH//z85YtWxaJiYmIjIxEtWrV/vPzEZE2sLM+EdEnVLBgQcTHx2P27Nn466+/sGrVKsyfP/8/P2/hwoXRrl07dOzYEZs3b8bNmzdx6tQpTJkyBTt27PgENSciNTCIERF9QqVLl0ZISAimTp2KEiVKYPXq1ZgyZconee5ly5ahY8eOGDRoEIoUKQJfX1+cPn0aefPm/STPT0Rpj3dNEhEREamELWJEREREKmEQIyIiIlIJgxgRERGRShjEiIiIiFTCIEZERESkEgYxIiIiIpUwiBERERGphEGMiIiISCUMYkREREQqYRAjIiIiUgmDGBEREZFKGMSIiIiIVPL/AF1bCMzTH5AaAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = sns.barplot(catalog_tracks.sort_values('count', ascending=False).head(10),x='name', y='count')\n",
"p.set_xticklabels(\n",
" p.get_xticklabels(), \n",
" rotation=45, \n",
" horizontalalignment='right'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "3b0bff35",
"metadata": {},
"outputs": [],
"source": [
"catalog_tracks.to_parquet('../data/catalog_tracks.parquet')"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "16e6863f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 148201., 192358., 172134., 268240., 311700., 262459.,\n",
" 387518., 431634., 351026., 505503., 549948., 442020.,\n",
" 624725., 498827., 702108., 747713., 592265., 826998.,\n",
" 870964., 683504., 950577., 996923., 777570., 1074763.,\n",
" 839507., 1157846., 1207839., 936417., 1289020., 1340665.,\n",
" 1036658., 1425276., 1475006., 1139436., 1561318., 1614665.,\n",
" 1241160., 1698673., 1311020., 1790139., 1842043., 1414538.,\n",
" 1933496., 1989027., 1527155., 2081109., 2138155., 1637404.,\n",
" 2233059., 1708755., 2324778., 2387956., 1824282., 2481403.,\n",
" 2542194., 1945960., 2644429., 2704845., 2068454., 2809762.,\n",
" 2873336., 2195813., 2983642., 2278917., 3093772., 3158010.,\n",
" 2408102., 3269353., 3334345., 2545551., 3454690., 3520903.,\n",
" 2685108., 3640694., 2771240., 3751363., 3817447., 2906555.,\n",
" 3933520., 3998960., 3035922., 4105882., 4164647., 3159655.,\n",
" 4261388., 4319254., 3273782., 4400208., 3328987., 4468841.,\n",
" 4495965., 3374974., 4495049., 4464815., 3306909., 4312803.,\n",
" 4140545., 2908108., 3418731., 1983502.]),\n",
" array([18993. , 18996.64, 19000.28, 19003.92, 19007.56, 19011.2 ,\n",
" 19014.84, 19018.48, 19022.12, 19025.76, 19029.4 , 19033.04,\n",
" 19036.68, 19040.32, 19043.96, 19047.6 , 19051.24, 19054.88,\n",
" 19058.52, 19062.16, 19065.8 , 19069.44, 19073.08, 19076.72,\n",
" 19080.36, 19084. , 19087.64, 19091.28, 19094.92, 19098.56,\n",
" 19102.2 , 19105.84, 19109.48, 19113.12, 19116.76, 19120.4 ,\n",
" 19124.04, 19127.68, 19131.32, 19134.96, 19138.6 , 19142.24,\n",
" 19145.88, 19149.52, 19153.16, 19156.8 , 19160.44, 19164.08,\n",
" 19167.72, 19171.36, 19175. , 19178.64, 19182.28, 19185.92,\n",
" 19189.56, 19193.2 , 19196.84, 19200.48, 19204.12, 19207.76,\n",
" 19211.4 , 19215.04, 19218.68, 19222.32, 19225.96, 19229.6 ,\n",
" 19233.24, 19236.88, 19240.52, 19244.16, 19247.8 , 19251.44,\n",
" 19255.08, 19258.72, 19262.36, 19266. , 19269.64, 19273.28,\n",
" 19276.92, 19280.56, 19284.2 , 19287.84, 19291.48, 19295.12,\n",
" 19298.76, 19302.4 , 19306.04, 19309.68, 19313.32, 19316.96,\n",
" 19320.6 , 19324.24, 19327.88, 19331.52, 19335.16, 19338.8 ,\n",
" 19342.44, 19346.08, 19349.72, 19353.36, 19357. ]),\n",
" <BarContainer object of 100 artists>)"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAh4AAAGsCAYAAACSKa6pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdPElEQVR4nO3df5TVZZ0H8M+AMkAwoxCgLCBkx/wVrrmm4FZQ/ljzmPZzT2dN1tbdQ02ph91OzXZWdtx1h46Kmpa5RbiUaLkntfRsulFEBa4yDCmtgpkaJYQrCwNYV2We/cNlZGBmnDtz7zNz77xe59w/7uU73+9zP8fTffd5nu/zrUkppQAAyGDYQA8AABg6BA8AIBvBAwDIRvAAALIRPACAbAQPACAbwQMAyEbwAACyETwAgGwEDwAgmwELHqtWrYrzzz8/Jk+eHDU1NXHPPfcUfY6UUlx77bVxzDHHRG1tbfzRH/1RXH311aUfLABQEocM1IX37NkTJ510Unz84x+PD3zgA306x+WXXx4PPvhgXHvttfHWt741tm/fHtu3by/xSAGAUqkZDA+Jq6mpibvvvjsuvPDCjs8KhUJ8/vOfjzvuuCN27NgRJ554YnzhC1+IOXPmRETE448/HjNnzowNGzbEW97yloEZOABQlEG7xuNTn/pUrFmzJu6888549NFH48Mf/nD82Z/9WTz55JMREfG9730v3vSmN8V9990XM2bMiOnTp8ell16q4wEAg9igDB6//vWvY+nSpXHXXXfFO97xjjj66KPj7/7u7+JP//RPY+nSpRER8atf/SqeffbZuOuuu2LZsmVx2223RUtLS3zoQx8a4NEDAN0ZsDUePXnsscdi7969ccwxx3T6vFAoxPjx4yMior29PQqFQixbtqzjuCVLlsQpp5wSGzduNP0CAIPQoAweu3fvjuHDh0dLS0sMHz6807+NGTMmIiKOPPLIOOSQQzqFk+OOOy4iXu2YCB4AMPgMyuBx8sknx969e2Pbtm3xjne8o8tjzjjjjHjllVfiqaeeiqOPPjoiIjZt2hQREUcddVS2sQIAvTdgd7Xs3r07fvnLX0bEq0Fj8eLFMXfu3Bg3blxMmzYtLrroovjZz34W1113XZx88snx/PPPx4oVK2LmzJlx3nnnRXt7e5x66qkxZsyYuOGGG6K9vT0aGhqirq4uHnzwwYH4SgDA6xiw4LFy5cqYO3fuQZ/Pmzcvbrvttnj55Zfjn//5n2PZsmXx29/+Nt74xjfG6aefHk1NTfHWt741IiKee+65+PSnPx0PPvhgvOENb4hzzz03rrvuuhg3blzurwMA9MKg2McDABgaBuXttABAdRI8AIBsst/V0t7eHs8991yMHTs2ampqcl8eAOiDlFLs2rUrJk+eHMOG9b1vkT14PPfcczF16tTclwUASmDz5s0xZcqUPv999uAxduzYiHh14HV1dbkvDwD0QVtbW0ydOrXjd7yvsgePfdMrdXV1ggcAVJj+LpOwuBQAyEbwAACyETwAgGwEDwAgG8EDAMhG8AAAshE8AIBsBA8AIBvBAwDIRvAAALIRPACAbAQPACAbwQMAyEbwAACyOWSgBwAAg9X0z93f6f0zi87r0zG8RscDAMhGxwMA4uDOBeUheABACXUVYEy/vMZUCwCQjeABAGQjeAAA2QgeAEA2FpcCUPUs+Bw8dDwAgGwEDwAgG8EDAMhG8AAAshE8AIBs3NUCQEVzx0plETwAYAAcGJiGSlgy1QIAZCN4AADZCB4AQDbWeABQUbpaTErl0PEAALIRPACAbAQPACAbwQMAyEbwAACyETwAgGzcTgvAoDFUtxEfSnQ8AIBsdDwAYBAYKk/Z1fEAALIRPACAbEy1ADAgPHNlaNLxAACyETwAgGwEDwAgm34Fj0WLFkVNTU1cccUVJRoOAFDN+hw8Hnnkkbj11ltj5syZpRwPAFDF+hQ8du/eHX/xF38RX/3qV+Pwww8v9ZgAgCrVp+DR0NAQ5513Xpx55pmve2yhUIi2trZOLwBgaCp6H48777wz1q1bF4888kivjm9ubo6mpqaiBwZA5Roq239TvKI6Hps3b47LL788br/99hg5cmSv/qaxsTF27tzZ8dq8eXOfBgoAVL6iOh4tLS2xbdu2eNvb3tbx2d69e2PVqlVx8803R6FQiOHDh3f6m9ra2qitrS3NaAGAilZU8HjPe94Tjz32WKfPLrnkkjj22GPjs5/97EGhAwBgf0UFj7Fjx8aJJ57Y6bM3vOENMX78+IM+BwA4kIfEAdBvHvhGb/U7eKxcubIEwwAAhgLPagEAshE8AIBsBA8AIBvBAwDIRvAAALIRPACAbAQPACAbG4gB0KMDNwfzlFn6Q8cDAMhG8AAAshE8AIBsBA8AIBvBAwDIxl0tAHTweHvKTccDAMhG8AAAshE8AIBsBA8AIBvBAwDIRvAAALIRPACAbOzjATBEdLVHhyfNkpuOBwCQjeABAGQjeAAA2QgeAEA2ggcAkI3gAQBkI3gAANkIHgBANoIHAJCNnUsBqoBdSakUOh4AQDaCBwCQjeABAGQjeAAA2VhcClCBulpMCpVAxwMAyEbwAACyMdUCABWiGvZr0fEAALIRPACAbEy1AEAvuZuo/3Q8AIBsBA8AIBtTLQCDzIHt/Eq7awF6IngAMCRZrzEwBA8AKDMh5zXWeAAA2QgeAEA2ploABpAWfHmo6+Cl4wEAZCN4AADZCB4AQDaCBwCQjeABAGQjeAAA2QgeAEA2ggcAkI0NxADKpKtNrDxptjg2Aqs+Oh4AQDaCBwCQjeABAGQjeAAA2VhcClAiFkLC69PxAACyETwAgGyKCh633HJLzJw5M+rq6qKuri5mzZoV//Ef/1GusQEAVaao4DFlypRYtGhRtLS0xNq1a+Pd7353XHDBBfGLX/yiXOMDAKpIUYtLzz///E7vr7766rjlllvioYceihNOOKGkAwOgulh8S0Q/7mrZu3dv3HXXXbFnz56YNWtWt8cVCoUoFAod79va2vp6SQCgwhW9uPSxxx6LMWPGRG1tbcyfPz/uvvvuOP7447s9vrm5Oerr6zteU6dO7deAAYDKVXTweMtb3hLr16+P//qv/4pPfOITMW/evPjv//7vbo9vbGyMnTt3drw2b97crwEDAJWr6KmWESNGxJvf/OaIiDjllFPikUceiRtvvDFuvfXWLo+vra2N2tra/o0SYIAduD7BU2ahb/q9j0d7e3unNRwAAN0pquPR2NgY5557bkybNi127doVy5cvj5UrV8YDDzxQrvEBAFWkqOCxbdu2uPjii2PLli1RX18fM2fOjAceeCDOOuusco0PAKgiRQWPJUuWlGscAMAQ4FktAEA2fd5ADIChyQ6k+VRjrXU8AIBsdDyAIc8eHZCPjgcAkI3gAQBkI3gAANkIHgBANhaXAkNKNd6eCJVExwMAyEbwAACyMdUCMISZeiI3HQ8AIBvBAwDIRvAAALIRPACAbAQPACAbd7UAVaOrOzQ8aRYGF8EDoEq5VZbByFQLAJCN4AEAZGOqBahYphKg8uh4AADZCB4AQDaCBwCQjeABAGQjeAAA2QgeAEA2ggcAkI19PIBB6cA9OobyM1fsV0I10fEAALIRPACAbAQPACAbwQMAyMbiUmDAWTwJQ4eOBwCQjeABAGQjeAAA2QgeAEA2ggcAkI27WgAycgcPQ53gAZRVVz+0Q/m5KzDUmWoBALIRPACAbAQPACAbwQMAyMbiUqCk3LUB9ETHAwDIRvAAALIRPACAbKzxAOgDa1mgb3Q8AIBsBA8AIBtTLUCvHTi94JkrQLF0PACAbAQPACAbUy0AB3DHCpSPjgcAkI3gAQBkI3gAANkIHgBANhaXAhFhjw4gDx0PACAbwQMAyEbwAACyscYDhiAbZAEDRfAAqpqQBYOLqRYAIJuigkdzc3OceuqpMXbs2Jg4cWJceOGFsXHjxnKNDQCoMkUFjx//+MfR0NAQDz30UPznf/5nvPzyy3H22WfHnj17yjU+AKCKFLXG4/vf/36n97fddltMnDgxWlpa4p3vfGdJBwYAVJ9+LS7duXNnRESMGzeu22MKhUIUCoWO921tbf25JABQwfq8uLS9vT2uuOKKOOOMM+LEE0/s9rjm5uaor6/veE2dOrWvlwQAKlyfOx4NDQ2xYcOG+OlPf9rjcY2NjbFgwYKO921tbcIHlFFXt4967gpUr0p7zlKfgsenPvWpuO+++2LVqlUxZcqUHo+tra2N2traPg0OAKguRQWPlFJ8+tOfjrvvvjtWrlwZM2bMKNe4AIAqVFTwaGhoiOXLl8e9994bY8eOja1bt0ZERH19fYwaNaosAwSIsAMpVIuigsctt9wSERFz5szp9PnSpUvjL//yL0s1JqAIfpCBSlL0VAsAQF95VgsAkI3gAQBkI3gAANkIHgBANoIHAJBNvx4SB5RXpW2FDPB6BA8gO3uPwNBlqgUAyEbwAACyMdUCg4TpB2Ao0PEAALIRPACAbAQPACAbwQMAyEbwAACycVcL0GfuxAGKJXhABrY+B3iVqRYAIBvBAwDIRvAAALIRPACAbCwuBbrkjhWgHHQ8AIBsdDygn9wqC9B7Oh4AQDaCBwCQjeABAGRjjQcUwZ0eAP2j4wEAZCN4AADZmGqBIcAUETBY6HgAANkIHgBANoIHAJCNNR7w/7paB2H7c4DS0vEAALIRPACAbAQPACAbazwYsuxtAZCf4AEVRFgCKp2pFgAgG8EDAMhG8AAAshE8AIBsLC6lKh24CNMOpACDg44HAJCN4AEAZGOqBQaA/TiAoUrwoOL5EQeoHKZaAIBsBA8AIBvBAwDIxhoPKDFrTgC6p+MBAGQjeAAA2ZhqYVDratrC9ucAlUvHAwDIRvAAALIRPACAbKzxYFBxKypAdRM8oAeCEEBpmWoBALIRPACAbAQPACAbwQMAyMbiUrI5cKGmHUgBhh4dDwAgm6KDx6pVq+L888+PyZMnR01NTdxzzz1lGBYAUI2KnmrZs2dPnHTSSfHxj388PvCBD5RjTFBy9uMAGByKDh7nnntunHvuueUYCwBQ5cq+uLRQKEShUOh439bWVu5LAgCDVNmDR3NzczQ1NZX7Mgwwd6wA0Btlv6ulsbExdu7c2fHavHlzuS8JAAxSZe941NbWRm1tbbkvAwBUAPt4AADZFN3x2L17d/zyl7/seP/000/H+vXrY9y4cTFt2rSSDo7Bya2pAPRV0cFj7dq1MXfu3I73CxYsiIiIefPmxW233VaygUF3BB+AylV08JgzZ06klMoxFgCgylnjAQBkI3gAANkIHgBANmXfx4PK0tXCTbuQAlAqOh4AQDaCBwCQjakWBoz9OACGHsFjiPPjD0BOploAgGwEDwAgG1MtlIUpHAC6ouMBAGQjeAAA2ZhqqWIHTnfYgRSAgabjAQBkI3gAANmYaqkS7iIBoBLoeAAA2eh48Lp0UwAoFR0PACAbwQMAyEbwAACyscajAnS1xsJmYABUIh0PACAbwQMAyMZUyxDitlgABpqOBwCQjeABAGRjqmWAuWMFgKFExwMAyEbwAACyETwAgGys8cisVLe0ujUWgEqk4wEAZCN4AADZCB4AQDaCBwCQjcWlJXTggs++bgRm4SgA1UrHAwDIRvAAALIRPACAbKzx6CPrMACgeDoeAEA2ggcAkI2pljIyHQMAnel4AADZCB4AQDamWrrQ1RRJX3chBQBeo+MBAGQjeAAA2ZhqCXefAEAugkcvCScA0H+mWgCAbAQPACAbwQMAyEbwAACyqfrFpQcuCrURGAAMHB0PACCbqu94HMhtsQAwcHQ8AIBsqqrjoZsBAIObjgcAkI3gAQBkI3gAANkIHgBANoIHAJCN4AEAZNOn4PGlL30ppk+fHiNHjozTTjstHn744VKPCwCoQkUHj29961uxYMGCWLhwYaxbty5OOumkOOecc2Lbtm3lGB8AUEWKDh6LFy+Ov/7rv45LLrkkjj/++PjKV74So0ePjq9//evlGB8AUEWK2rn0pZdeipaWlmhsbOz4bNiwYXHmmWfGmjVruvybQqEQhUKh4/3OnTsjIqKtra0v4+1Re+HFkp8TACpJOX5f9z9vSqlf5ykqePzP//xP7N27NyZNmtTp80mTJsUTTzzR5d80NzdHU1PTQZ9PnTq1mEsDAL1Qf0N5z79r166or6/v89+X/VktjY2NsWDBgo737e3tsX379hg/fnzU1NSU7DptbW0xderU2Lx5c9TV1ZXsvNVEjYqjXr2nVsVTs+KoV++Vq1Yppdi1a1dMnjy5X+cpKni88Y1vjOHDh8fvfve7Tp//7ne/iyOOOKLLv6mtrY3a2tpOnx122GHFjbIIdXV1/qN8HWpUHPXqPbUqnpoVR716rxy16k+nY5+iFpeOGDEiTjnllFixYkXHZ+3t7bFixYqYNWtWvwcDAFS3oqdaFixYEPPmzYs/+ZM/ibe//e1xww03xJ49e+KSSy4px/gAgCpSdPD48z//83j++efjyiuvjK1bt8Yf//Efx/e///2DFpzmVltbGwsXLjxoWofXqFFx1Kv31Kp4alYc9eq9wV6rmtTf+2IAAHrJs1oAgGwEDwAgG8EDAMhG8AAAsilr8Ghubo5TTz01xo4dGxMnTowLL7wwNm7c2OmYP/zhD9HQ0BDjx4+PMWPGxAc/+MFOG5T9/Oc/j49+9KMxderUGDVqVBx33HFx4403djrHd77znTjrrLNiwoQJUVdXF7NmzYoHHnjgdceXUoorr7wyjjzyyBg1alSceeaZ8eSTT3Y65uqrr47Zs2fH6NGjy7bxWTXU6X3ve19MmzYtRo4cGUceeWR87GMfi+eee64fVelaNdRq+vTpUVNT0+m1aNGiflSle5Ver5UrVx5Uq32vRx55pJ/VOVil1ysiYt26dXHWWWfFYYcdFuPHj4+/+Zu/id27d/ejKt0b7PX6zne+E2effXbHTtnr168/6Jh//dd/jTlz5kRdXV3U1NTEjh07+lSL3shVr5/+9KdxxhlnxPjx42PUqFFx7LHHxvXXX/+648v2m5jK6JxzzklLly5NGzZsSOvXr0/vfe9707Rp09Lu3bs7jpk/f36aOnVqWrFiRVq7dm06/fTT0+zZszv+fcmSJemyyy5LK1euTE899VT6xje+kUaNGpVuuummjmMuv/zy9IUvfCE9/PDDadOmTamxsTEdeuihad26dT2Ob9GiRam+vj7dc8896ec//3l63/vel2bMmJF+//vfdxxz5ZVXpsWLF6cFCxak+vr60hVnP9VQp8WLF6c1a9akZ555Jv3sZz9Ls2bNSrNmzSphlV5VDbU66qij0lVXXZW2bNnS8dp//KVU6fUqFAqd6rRly5Z06aWXphkzZqT29vYSV6vy6/Xb3/42HX744Wn+/PnpiSeeSA8//HCaPXt2+uAHP1jiSr1qsNdr2bJlqampKX31q19NEZFaW1sPOub6669Pzc3Nqbm5OUVE+t///d9+16U7ueq1bt26tHz58rRhw4b09NNPp2984xtp9OjR6dZbb+1xfLl+E8saPA60bdu2FBHpxz/+cUoppR07dqRDDz003XXXXR3HPP744yki0po1a7o9zyc/+ck0d+7cHq91/PHHp6ampm7/vb29PR1xxBHpmmuu6fhsx44dqba2Nt1xxx0HHb906dKyBY8DVXKd9rn33ntTTU1Neumll3q8fn9VYq2OOuqodP3117/eVyuLSqzX/l566aU0YcKEdNVVV/V47VKptHrdeuutaeLEiWnv3r0dxzz66KMpItKTTz7Z85ctgcFUr/09/fTT3QaPfX70ox+VPXgcKGe93v/+96eLLrqo23/P+ZuYdY3Hzp07IyJi3LhxERHR0tISL7/8cpx55pkdxxx77LExbdq0WLNmTY/n2XeOrrS3t8euXbt6PObpp5+OrVu3drp2fX19nHbaaT1eO4dKr9P27dvj9ttvj9mzZ8ehhx7a7blLoVJrtWjRohg/fnycfPLJcc0118Qrr7zS8xctkUqt1z7f/e5344UXXsi2U3Kl1atQKMSIESNi2LDX/qd91KhREfFq+73cBlO9KkGuerW2tsbq1avjXe96V7fH5PxNLPvTafdpb2+PK664Is4444w48cQTIyJi69atMWLEiIPmiSZNmhRbt27t8jyrV6+Ob33rW3H//fd3e61rr702du/eHR/5yEe6PWbf+Q/ccbWna+dQyXX67Gc/GzfffHO8+OKLcfrpp8d9993X7XlLoVJrddlll8Xb3va2GDduXKxevToaGxtjy5YtsXjx4h6/b39Var32t2TJkjjnnHNiypQp3Z63VCqxXu9+97tjwYIFcc0118Tll18ee/bsic997nMREbFly5aev3A/DbZ6DXY56jVlypR4/vnn45VXXol//Md/jEsvvbTb8eT8TczW8WhoaIgNGzbEnXfe2edzbNiwIS644IJYuHBhnH322V0es3z58mhqaopvf/vbMXHixIiIuP3222PMmDEdr5/85Cd9HkO5VXKdPvOZz0Rra2s8+OCDMXz48Lj44osjlXFj3Eqt1YIFC2LOnDkxc+bMmD9/flx33XVx0003RaFQ6PP36I1Krdc+v/nNb+KBBx6Iv/qrv+rz+ItRifU64YQT4t/+7d/iuuuui9GjR8cRRxwRM2bMiEmTJnXqgpRDJdZrIOWo109+8pNYu3ZtfOUrX4kbbrgh7rjjjogYBPXq0wRNkRoaGtKUKVPSr371q06fr1ixoss5tWnTpqXFixd3+uwXv/hFmjhxYvr7v//7bq9zxx13pFGjRqX77ruv0+dtbW3pySef7Hi9+OKL6amnnupyzu+d73xnuuyyyw46d441HtVQp302b96cIiKtXr26h2/cd9VUqw0bNqSISE888UQP37h/qqFeV111VZowYULZ1w2lVB312rp1a9q1a1favXt3GjZsWPr2t7/di2/eN4OxXvsbbGs8ctVrf//0T/+UjjnmmJTSwP8mljV4tLe3p4aGhjR58uS0adOmg/5930Kaf//3f+/47IknnjhoIc2GDRvSxIkT02c+85lur7V8+fI0cuTIdM899/R6bEcccUS69tprOz7buXPngCwuraY67fPss8+miEg/+tGPenWd3qrGWn3zm99Mw4YNS9u3b+/VdYpRLfVqb29PM2bMSH/7t3/bq3P3VbXUa39LlixJo0ePLssP6mCu1/4GS/DIWa8DNTU1paOOOqrHseX6TSxr8PjEJz6R6uvr08qVKzvdDrd/Gp0/f36aNm1a+uEPf5jWrl170G2Yjz32WJowYUK66KKLOp1j27ZtHcfcfvvt6ZBDDklf+tKXOh2zY8eOHse3aNGidNhhh6V77703Pfroo+mCCy446NahZ599NrW2tqampqY0ZsyY1NramlpbW9OuXbvU6f/r9NBDD6Wbbroptba2pmeeeSatWLEizZ49Ox199NHpD3/4Q8nqVA21Wr16dbr++uvT+vXr01NPPZW++c1vpgkTJqSLL764pHXap9Lrtc8PfvCDFBHp8ccfL1FlulYN9brppptSS0tL2rhxY7r55pvTqFGj0o033ljCKr1msNfrhRdeSK2tren+++9PEZHuvPPO1NramrZs2dJxzJYtW1Jra2vHLberVq1Kra2t6YUXXihhpV6Vq14333xz+u53v5s2bdqUNm3alL72ta+lsWPHps9//vM9ji/Xb2JZg0dEdPlaunRpxzG///3v0yc/+cl0+OGHp9GjR6f3v//9nf6jWLhwYZfn2D+5vetd7+rymHnz5vU4vvb29vQP//APadKkSam2tja95z3vSRs3bux0zLx587o8dyn/n3yl1+nRRx9Nc+fOTePGjUu1tbVp+vTpaf78+ek3v/lNqUrUodJr1dLSkk477bRUX1+fRo4cmY477rj0L//yLyUPaPtUer32+ehHP9ppL4NyqYZ6fexjH0vjxo1LI0aMSDNnzkzLli0rRWm6NNjrtXTp0i7/buHCha97/f2/Q6nkqtcXv/jFdMIJJ6TRo0enurq6dPLJJ6cvf/nLnW6z7kqu38Sa/y8GAEDZeVYLAJCN4AEAZCN4AADZCB4AQDaCBwCQjeABAGQjeAAA2QgeAEA2ggcAkI3gAQBkI3gAANkIHgBANv8HjikQDoSO+1sAAAAASUVORK5CYII=",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(interactions['started_at'], bins=100)"
]
},
{
"cell_type": "markdown",
"id": "b1c32a5a-d3be-4f96-8dd9-f7860951020c",
"metadata": {},
"source": [
"Наиболее популярные жанры"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "1bc50491-9235-4d3c-a6c2-297f7c05a959",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>pop</td>\n",
" <td>166109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>75</td>\n",
" <td>rap</td>\n",
" <td>128206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>102</td>\n",
" <td>allrock</td>\n",
" <td>118461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>68</td>\n",
" <td>electronics</td>\n",
" <td>106478</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>rusrap</td>\n",
" <td>65958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>44</td>\n",
" <td>foreignrap</td>\n",
" <td>59772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>14</td>\n",
" <td>rock</td>\n",
" <td>55148</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>16</td>\n",
" <td>dance</td>\n",
" <td>51595</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>20</td>\n",
" <td>ruspop</td>\n",
" <td>46706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>13</td>\n",
" <td>alternative</td>\n",
" <td>42894</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name count\n",
"11 11 pop 166109\n",
"75 75 rap 128206\n",
"102 102 allrock 118461\n",
"68 68 electronics 106478\n",
"3 3 rusrap 65958\n",
"44 44 foreignrap 59772\n",
"14 14 rock 55148\n",
"16 16 dance 51595\n",
"20 20 ruspop 46706\n",
"13 13 alternative 42894"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog_genres = pd.read_parquet('../data/catalog_genres.parquet')\n",
"catalog_genres.sort_values('count', ascending=False).head(10)"
]
},
{
"cell_type": "markdown",
"id": "0d850a07-ef1e-462f-891a-1cf89f2e24ef",
"metadata": {},
"source": [
"# Преобразование данных"
]
},
{
"cell_type": "markdown",
"id": "fabcf8d2-1192-4df5-b20b-fbb84689f57a",
"metadata": {},
"source": [
"Преобразуем данные в формат, более пригодный для дальнейшего использования в расчётах рекомендаций."
]
},
{
"cell_type": "markdown",
"id": "72ecbbed-c560-44d9-9c14-86c7dc76f399",
"metadata": {},
"source": [
"# Очистка памяти"
]
},
{
"cell_type": "markdown",
"id": "b5358ede-ba6e-4c4f-bd73-5b9344f0ba79",
"metadata": {},
"source": [
"Здесь, может понадобится очистка памяти для высвобождения ресурсов для выполнения кода ниже. \n",
"\n",
"Приведите соответствующие код, комментарии, например:\n",
"- код для удаление более ненужных переменных,\n",
"- комментарий, что следует перезапустить kernel, выполнить такие-то начальные секции и продолжить с этапа 3."
]
},
{
"cell_type": "markdown",
"id": "a0c64390",
"metadata": {},
"source": [
"Следует перезапустить kernel, выполнять действия начиная с 3го этапа"
]
},
{
"cell_type": "markdown",
"id": "5b4e87cc",
"metadata": {},
"source": []
},
{
"cell_type": "markdown",
"id": "075fd983",
"metadata": {},
"source": [
"### ВЫВОДЫ:\n",
"\n",
"Больше всего треков, которые слушают менее 20 раз. Вероятно, их можно убрать из выборки для существенного сокращения размера (делать мы это конечно же не будем)\n",
"\n",
"Треки из топ-10 находятся в моем сердечке :)\n",
"\n",
"Топ жанров возглавляет поп, рэп, рок и электроника, остальные идут с большим отрывом в полтора-два раза по количеству треков"
]
},
{
"cell_type": "markdown",
"id": "708503df-ee89-4cf3-8489-093dc478e2a8",
"metadata": {},
"source": [
"# === ЭТАП 3 ==="
]
},
{
"cell_type": "markdown",
"id": "fd77de22-e10f-4b42-85c1-8fb6f805fe68",
"metadata": {},
"source": [
"# Загрузка данных"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b255df91",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"import scipy\n",
"import sklearn.preprocessing\n",
"from catboost import CatBoostClassifier, Pool"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f19fc8a5-bd2c-40d7-864a-ee75aca6d512",
"metadata": {},
"outputs": [],
"source": [
"items = pd.read_parquet('../data/items.parquet')\n",
"interactions = pd.read_parquet('../data/events.parquet')\n",
"#catalog = pd.read_parquet('data/catalog_names.parquet')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9b4ef95a",
"metadata": {},
"outputs": [],
"source": [
"catalog_genres = pd.read_parquet('../data/catalog_genres.parquet')\n",
"catalog_albums = pd.read_parquet('../data/catalog_albums.parquet')\n",
"catalog_artists = pd.read_parquet('../data/catalog_artists.parquet')\n",
"catalog_tracks = pd.read_parquet('../data/catalog_tracks.parquet')"
]
},
{
"cell_type": "markdown",
"id": "a694c023-6477-490b-939d-1cfa6f5f1b72",
"metadata": {},
"source": [
"# Разбиение данных"
]
},
{
"cell_type": "markdown",
"id": "fbd5f6e0-54e7-4428-8678-eabce505d82c",
"metadata": {},
"source": [
"Разбиваем данные на тренировочную, тестовую выборки."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "00c2dfa5-d8a2-47d1-922e-6eefee2c62d1",
"metadata": {},
"outputs": [],
"source": [
"train_test_global_time_split_date = pd.to_datetime(\"2022-12-16\")\n",
"interactions_train = interactions.loc[interactions[\"started_at\"] < train_test_global_time_split_date]\n",
"interactions_test = interactions.loc[interactions[\"started_at\"] >= train_test_global_time_split_date]\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6bb261d5",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# В учебных целях берем 25% от всего, потому что иначе все выполняется бессовестно долго. \n",
"interactions_train = interactions_train.sample(frac=0.25, random_state = 42)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "781f047e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(50299870, 4)\n",
"(13514936, 4)\n"
]
}
],
"source": [
"print(interactions_train.shape)\n",
"print(interactions_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e73cc82",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(13438773, 4)\n"
]
}
],
"source": [
"# Drop items in test that are not in train\n",
"interactions_test = interactions_test.loc[~interactions_test['track_id'].isin(items_diff)]\n",
"print(interactions_test.shape)"
]
},
{
"cell_type": "markdown",
"id": "9131c7e6-8852-4556-b510-51f7253cc299",
"metadata": {},
"source": [
"# Топ популярных"
]
},
{
"cell_type": "markdown",
"id": "dd70d43a-88cc-4719-b291-feaed7136f30",
"metadata": {},
"source": [
"Рассчитаем рекомендации как топ популярных."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ee45e200-b7d6-4f56-9077-aad431689b96",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>track_id</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7920</th>\n",
" <td>53404</td>\n",
" <td>27471</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23491</th>\n",
" <td>178529</td>\n",
" <td>25393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>427095</th>\n",
" <td>33311009</td>\n",
" <td>25232</td>\n",
" </tr>\n",
" <tr>\n",
" <th>452692</th>\n",
" <td>35505245</td>\n",
" <td>23657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>323047</th>\n",
" <td>24692821</td>\n",
" <td>21052</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>747958</th>\n",
" <td>68562711</td>\n",
" <td>11949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>793610</th>\n",
" <td>75642961</td>\n",
" <td>11884</td>\n",
" </tr>\n",
" <tr>\n",
" <th>794783</th>\n",
" <td>75944934</td>\n",
" <td>11797</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8394</th>\n",
" <td>57730</td>\n",
" <td>11691</td>\n",
" </tr>\n",
" <tr>\n",
" <th>746648</th>\n",
" <td>68348389</td>\n",
" <td>11651</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" track_id count\n",
"7920 53404 27471\n",
"23491 178529 25393\n",
"427095 33311009 25232\n",
"452692 35505245 23657\n",
"323047 24692821 21052\n",
"... ... ...\n",
"747958 68562711 11949\n",
"793610 75642961 11884\n",
"794783 75944934 11797\n",
"8394 57730 11691\n",
"746648 68348389 11651\n",
"\n",
"[100 rows x 2 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_popularity = interactions_train.groupby('track_id')[['user_id']].count().reset_index().sort_values(by='user_id', ascending=False)[:100]\n",
"item_popularity.columns = ['track_id', 'count']\n",
"item_popularity"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e39319f4",
"metadata": {},
"outputs": [],
"source": [
"item_popularity['rank'] = item_popularity['count'].rank(ascending=False)\n",
"item_popularity['rank'] = item_popularity['rank'].astype(int)\n",
"item_popularity = item_popularity.rename(columns= {'track_id': 'item_id'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bacefc16",
"metadata": {},
"outputs": [],
"source": [
"item_popularity.to_parquet('../recommendations/top_popular.parquet')\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "4cc77f15",
"metadata": {},
"outputs": [],
"source": [
"item_popularity = pd.read_parquet('../recommendations/top_popular.parquet')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "705c40a6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>item_id</th>\n",
" <th>count</th>\n",
" <th>rank</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7920</th>\n",
" <td>53404</td>\n",
" <td>27471</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23491</th>\n",
" <td>178529</td>\n",
" <td>25393</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>427095</th>\n",
" <td>33311009</td>\n",
" <td>25232</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>452692</th>\n",
" <td>35505245</td>\n",
" <td>23657</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>323047</th>\n",
" <td>24692821</td>\n",
" <td>21052</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>747958</th>\n",
" <td>68562711</td>\n",
" <td>11949</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>793610</th>\n",
" <td>75642961</td>\n",
" <td>11884</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>794783</th>\n",
" <td>75944934</td>\n",
" <td>11797</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8394</th>\n",
" <td>57730</td>\n",
" <td>11691</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>746648</th>\n",
" <td>68348389</td>\n",
" <td>11651</td>\n",
" <td>100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" item_id count rank\n",
"7920 53404 27471 1\n",
"23491 178529 25393 2\n",
"427095 33311009 25232 3\n",
"452692 35505245 23657 4\n",
"323047 24692821 21052 5\n",
"... ... ... ...\n",
"747958 68562711 11949 96\n",
"793610 75642961 11884 97\n",
"794783 75944934 11797 98\n",
"8394 57730 11691 99\n",
"746648 68348389 11651 100\n",
"\n",
"[100 rows x 3 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_popularity"
]
},
{
"cell_type": "markdown",
"id": "2ad231f2-6158-421a-b7fa-01d8bc3ed572",
"metadata": {},
"source": [
"# Персональные"
]
},
{
"cell_type": "markdown",
"id": "86159460-cd9d-4b63-8248-604ea3c9aebf",
"metadata": {},
"source": [
"Рассчитаем персональные рекомендации."
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "25f19ff2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_155402/3672207653.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" interactions_test['target'] = 1 # Факт прослушивания\n"
]
}
],
"source": [
"interactions_train['target'] = 1 # Факт прослушивания\n",
"interactions_test['target'] = 1 # Факт прослушивания"
]
},
{
"cell_type": "markdown",
"id": "af7a2e3c",
"metadata": {},
"source": [
"## ALS"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "51edbd36",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_155402/3892860961.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" interactions_test[\"user_id_enc\"] = user_encoder.transform(interactions_test[\"user_id\"])\n",
"/tmp/ipykernel_155402/3892860961.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" interactions_test[\"track_id_enc\"] = item_encoder.transform(interactions_test[\"track_id\"])\n"
]
}
],
"source": [
"user_encoder = sklearn.preprocessing.LabelEncoder()\n",
"\n",
"# перекодируем идентификаторы пользователей: \n",
"# из имеющихся в последовательность 0, 1, 2, ...\n",
"user_encoder.fit(interactions[\"user_id\"])\n",
"interactions_train[\"user_id_enc\"] = user_encoder.transform(interactions_train[\"user_id\"])\n",
"interactions_test[\"user_id_enc\"] = user_encoder.transform(interactions_test[\"user_id\"])\n",
"\n",
"# перекодируем идентификаторы объектов: \n",
"# из имеющихся в последовательность 0, 1, 2, ...\n",
"item_encoder = sklearn.preprocessing.LabelEncoder()\n",
"item_encoder.fit(items[\"track_id\"])\n",
"items[\"track_id_enc\"] = item_encoder.transform(items[\"track_id\"])\n",
"interactions_train[\"track_id_enc\"] = item_encoder.transform(interactions_train[\"track_id\"])\n",
"interactions_test[\"track_id_enc\"] = item_encoder.transform(interactions_test[\"track_id\"])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "d2dd9c63",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>track_id</th>\n",
" <th>track_seq</th>\n",
" <th>started_at</th>\n",
" <th>target</th>\n",
" <th>user_id_enc</th>\n",
" <th>track_id_enc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>273</th>\n",
" <td>1311288</td>\n",
" <td>65891271</td>\n",
" <td>274</td>\n",
" <td>2022-12-03</td>\n",
" <td>1</td>\n",
" <td>1305663</td>\n",
" <td>774484</td>\n",
" </tr>\n",
" <tr>\n",
" <th>241</th>\n",
" <td>1182663</td>\n",
" <td>35112957</td>\n",
" <td>242</td>\n",
" <td>2022-10-16</td>\n",
" <td>1</td>\n",
" <td>1177607</td>\n",
" <td>473510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>785243</td>\n",
" <td>89084998</td>\n",
" <td>135</td>\n",
" <td>2022-12-14</td>\n",
" <td>1</td>\n",
" <td>781869</td>\n",
" <td>899346</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>172914</td>\n",
" <td>43117</td>\n",
" <td>15</td>\n",
" <td>2022-06-08</td>\n",
" <td>1</td>\n",
" <td>172180</td>\n",
" <td>6832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>572275</td>\n",
" <td>20003416</td>\n",
" <td>21</td>\n",
" <td>2022-12-08</td>\n",
" <td>1</td>\n",
" <td>569812</td>\n",
" <td>285162</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>621</th>\n",
" <td>1056259</td>\n",
" <td>2189518</td>\n",
" <td>622</td>\n",
" <td>2022-03-15</td>\n",
" <td>1</td>\n",
" <td>1051720</td>\n",
" <td>111587</td>\n",
" </tr>\n",
" <tr>\n",
" <th>382</th>\n",
" <td>278471</td>\n",
" <td>23344068</td>\n",
" <td>383</td>\n",
" <td>2022-04-02</td>\n",
" <td>1</td>\n",
" <td>277280</td>\n",
" <td>321471</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>682319</td>\n",
" <td>59373715</td>\n",
" <td>137</td>\n",
" <td>2022-11-11</td>\n",
" <td>1</td>\n",
" <td>679358</td>\n",
" <td>721033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>955372</td>\n",
" <td>9548591</td>\n",
" <td>6</td>\n",
" <td>2022-08-05</td>\n",
" <td>1</td>\n",
" <td>951241</td>\n",
" <td>196517</td>\n",
" </tr>\n",
" <tr>\n",
" <th>896</th>\n",
" <td>689124</td>\n",
" <td>27759885</td>\n",
" <td>897</td>\n",
" <td>2022-06-25</td>\n",
" <td>1</td>\n",
" <td>686139</td>\n",
" <td>376419</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>50299870 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" user_id track_id track_seq started_at target user_id_enc \\\n",
"273 1311288 65891271 274 2022-12-03 1 1305663 \n",
"241 1182663 35112957 242 2022-10-16 1 1177607 \n",
"134 785243 89084998 135 2022-12-14 1 781869 \n",
"14 172914 43117 15 2022-06-08 1 172180 \n",
"20 572275 20003416 21 2022-12-08 1 569812 \n",
".. ... ... ... ... ... ... \n",
"621 1056259 2189518 622 2022-03-15 1 1051720 \n",
"382 278471 23344068 383 2022-04-02 1 277280 \n",
"136 682319 59373715 137 2022-11-11 1 679358 \n",
"5 955372 9548591 6 2022-08-05 1 951241 \n",
"896 689124 27759885 897 2022-06-25 1 686139 \n",
"\n",
" track_id_enc \n",
"273 774484 \n",
"241 473510 \n",
"134 899346 \n",
"14 6832 \n",
"20 285162 \n",
".. ... \n",
"621 111587 \n",
"382 321471 \n",
"136 721033 \n",
"5 196517 \n",
"896 376419 \n",
"\n",
"[50299870 rows x 7 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interactions_train\n"
]
},
{
"cell_type": "markdown",
"id": "aaf9dca7",
"metadata": {},
"source": [
"CSR matrix\n",
"\n",
"https://matteding.github.io/images/csr.gif"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "75313c81",
"metadata": {},
"outputs": [],
"source": [
"user_item_matrix_train = scipy.sparse.csr_matrix((\n",
" interactions_train[\"target\"],\n",
" (interactions_train['user_id_enc'], interactions_train['track_id_enc'])),\n",
" dtype=np.int8) "
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "ac801f85",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/andrey/work/institute/MLE/assets/recsys/.venv_recsys/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"/home/andrey/work/institute/MLE/assets/recsys/.venv_recsys/lib/python3.10/site-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 12 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n",
" check_blas_config()\n",
"100%|██████████| 30/30 [02:48<00:00, 5.61s/it]\n"
]
}
],
"source": [
"from implicit.als import AlternatingLeastSquares\n",
"\n",
"als_model = AlternatingLeastSquares(factors=30, iterations=30, regularization=0.05, random_state=0)\n",
"als_model.fit(user_item_matrix_train) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98ce57e3",
"metadata": {},
"outputs": [],
"source": [
"def get_recommendations_als(user_item_matrix, model, user_id, user_encoder, item_encoder, include_seen=False, n=5):\n",
" \"\"\"\n",
" Возвращает отранжированные рекомендации для заданного пользователя\n",
" \"\"\"\n",
" user_id_enc = user_encoder.transform([user_id])[0]\n",
" recommendations = model.recommend(\n",
" user_id_enc, \n",
" user_item_matrix[user_id_enc], \n",
" filter_already_liked_items = not include_seen,\n",
" N=n)\n",
" recommendations = pd.DataFrame({\"item_id_enc\": recommendations[0], \"score\": recommendations[1]})\n",
" recommendations[\"item_id\"] = item_encoder.inverse_transform(recommendations[\"item_id_enc\"])\n",
" \n",
" return recommendations"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e39e5237",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 83,
"id": "887bf440",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"user_id: 39211\n",
"История (последние события)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>track_id</th>\n",
" <th>track_seq</th>\n",
" <th>started_at</th>\n",
" <th>target</th>\n",
" <th>user_id_enc</th>\n",
" <th>track_id_enc</th>\n",
" <th>name</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>39211</td>\n",
" <td>33977439</td>\n",
" <td>143</td>\n",
" <td>2022-11-14</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>459580</td>\n",
" <td>Feel It Still</td>\n",
" <td>[70]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>39211</td>\n",
" <td>15410295</td>\n",
" <td>89</td>\n",
" <td>2022-11-03</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>229667</td>\n",
" <td>Танцуй, пока молодая</td>\n",
" <td>[11, 20]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>39211</td>\n",
" <td>14293633</td>\n",
" <td>85</td>\n",
" <td>2022-11-03</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>219139</td>\n",
" <td>Trebles 2013</td>\n",
" <td>[16]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>39211</td>\n",
" <td>37100370</td>\n",
" <td>156</td>\n",
" <td>2022-11-16</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>502447</td>\n",
" <td>Белая ночь</td>\n",
" <td>[11, 20]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>39211</td>\n",
" <td>32043093</td>\n",
" <td>133</td>\n",
" <td>2022-11-11</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>428825</td>\n",
" <td>Mr. Vain Recall</td>\n",
" <td>[16]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>39211</td>\n",
" <td>878664</td>\n",
" <td>37</td>\n",
" <td>2022-10-24</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>89718</td>\n",
" <td>Y Si No Existieras (Y Si No Has De Volver) (Et...</td>\n",
" <td>[325]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>39211</td>\n",
" <td>67464959</td>\n",
" <td>282</td>\n",
" <td>2022-12-11</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>784874</td>\n",
" <td>Побуяним</td>\n",
" <td>[11, 20]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>39211</td>\n",
" <td>21998692</td>\n",
" <td>108</td>\n",
" <td>2022-11-07</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>313896</td>\n",
" <td>Don't Think I Will Forgive You</td>\n",
" <td>[68]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>39211</td>\n",
" <td>62793881</td>\n",
" <td>267</td>\n",
" <td>2022-12-07</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>748758</td>\n",
" <td>Kanyelele</td>\n",
" <td>[68]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>39211</td>\n",
" <td>1710808</td>\n",
" <td>47</td>\n",
" <td>2022-10-26</td>\n",
" <td>1</td>\n",
" <td>39046</td>\n",
" <td>103882</td>\n",
" <td>Bohemian Rhapsody</td>\n",
" <td>[14, 17, 25, 102]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id track_id track_seq started_at target user_id_enc \\\n",
"56 39211 33977439 143 2022-11-14 1 39046 \n",
"57 39211 15410295 89 2022-11-03 1 39046 \n",
"58 39211 14293633 85 2022-11-03 1 39046 \n",
"59 39211 37100370 156 2022-11-16 1 39046 \n",
"60 39211 32043093 133 2022-11-11 1 39046 \n",
"61 39211 878664 37 2022-10-24 1 39046 \n",
"62 39211 67464959 282 2022-12-11 1 39046 \n",
"63 39211 21998692 108 2022-11-07 1 39046 \n",
"64 39211 62793881 267 2022-12-07 1 39046 \n",
"65 39211 1710808 47 2022-10-26 1 39046 \n",
"\n",
" track_id_enc name \\\n",
"56 459580 Feel It Still \n",
"57 229667 Танцуй, пока молодая \n",
"58 219139 Trebles 2013 \n",
"59 502447 Белая ночь \n",
"60 428825 Mr. Vain Recall \n",
"61 89718 Y Si No Existieras (Y Si No Has De Volver) (Et... \n",
"62 784874 Побуяним \n",
"63 313896 Don't Think I Will Forgive You \n",
"64 748758 Kanyelele \n",
"65 103882 Bohemian Rhapsody \n",
"\n",
" genres \n",
"56 [70] \n",
"57 [11, 20] \n",
"58 [16] \n",
"59 [11, 20] \n",
"60 [16] \n",
"61 [325] \n",
"62 [11, 20] \n",
"63 [68] \n",
"64 [68] \n",
"65 [14, 17, 25, 102] "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"user_id = interactions_train['user_id'].sample().iat[0]\n",
"\n",
"print(f\"user_id: {user_id}\")\n",
"\n",
"print(\"История (последние события)\")\n",
"user_history = (\n",
" interactions_train\n",
" .query(\"user_id == @user_id\")\n",
" .merge(items.set_index(\"track_id\")[[\"name\", \"genres\"]], on=\"track_id\")\n",
")\n",
"user_history_to_print = user_history.tail(10)\n",
"display(user_history_to_print)\n"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "13e662c5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Рекомендации\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>item_id_enc</th>\n",
" <th>score</th>\n",
" <th>item_id</th>\n",
" <th>name</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>774169</td>\n",
" <td>0.511178</td>\n",
" <td>65851540</td>\n",
" <td>Юность</td>\n",
" <td>[11, 20]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>534847</td>\n",
" <td>0.254975</td>\n",
" <td>39257277</td>\n",
" <td>In My Mind</td>\n",
" <td>[16]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>443347</td>\n",
" <td>0.238578</td>\n",
" <td>32947997</td>\n",
" <td>Shape of You</td>\n",
" <td>[11]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>663491</td>\n",
" <td>0.184026</td>\n",
" <td>52380688</td>\n",
" <td>Любимка</td>\n",
" <td>[11, 20]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>728275</td>\n",
" <td>0.179992</td>\n",
" <td>60292250</td>\n",
" <td>Blinding Lights</td>\n",
" <td>[74]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" item_id_enc score item_id name genres\n",
"0 774169 0.511178 65851540 Юность [11, 20]\n",
"1 534847 0.254975 39257277 In My Mind [16]\n",
"2 443347 0.238578 32947997 Shape of You [11]\n",
"3 663491 0.184026 52380688 Любимка [11, 20]\n",
"4 728275 0.179992 60292250 Blinding Lights [74]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"Рекомендации\")\n",
"user_recommendations_als = get_recommendations_als(user_item_matrix_train, als_model, user_id, user_encoder, item_encoder, include_seen=True)\n",
"user_recommendations_als = user_recommendations_als.merge(items.set_index(\"track_id\")[[\"name\", \"genres\"]], left_on=\"item_id\", right_on=\"track_id\")\n",
"\n",
"display(user_recommendations_als)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ab3670e",
"metadata": {},
"outputs": [],
"source": [
"# !!! 45 minutes \n",
"# чтобы не ждать - переместиться на строку считывания parquet\n",
"\n",
"# получаем список всех возможных user_id (перекодированных)\n",
"user_ids_encoded = range(len(user_encoder.classes_)-1)\n",
"\n",
"# получаем рекомендации для всех по+льзователей \n",
"\n",
"\n",
"als_recommendations = als_model.recommend(\n",
" user_ids_encoded, \n",
" user_item_matrix_train[user_ids_encoded], \n",
" filter_already_liked_items=False, N=30)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c32a23c0",
"metadata": {},
"outputs": [],
"source": [
"# преобразуем полученные рекомендации в табличный формат\n",
"item_ids_enc = als_recommendations[0]\n",
"als_scores = als_recommendations[1]\n",
"\n",
"als_recommendations = pd.DataFrame({\n",
" \"user_id_enc\": user_ids_encoded,\n",
" \"item_id_enc\": item_ids_enc.tolist(), \n",
" \"score\": als_scores.tolist()})\n",
"als_recommendations = als_recommendations.explode([\"item_id_enc\", \"score\"], ignore_index=True)\n",
"\n",
"# приводим типы данных\n",
"als_recommendations[\"item_id_enc\"] = als_recommendations[\"item_id_enc\"].astype(\"int\")\n",
"als_recommendations[\"score\"] = als_recommendations[\"score\"].astype(\"float\")\n",
"\n",
"# получаем изначальные идентификаторы\n",
"als_recommendations[\"user_id\"] = user_encoder.inverse_transform(als_recommendations[\"user_id_enc\"])\n",
"als_recommendations[\"item_id\"] = item_encoder.inverse_transform(als_recommendations[\"item_id_enc\"])\n",
"als_recommendations = als_recommendations.drop(columns=[\"user_id_enc\", \"item_id_enc\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "625dd757",
"metadata": {},
"outputs": [],
"source": [
"als_recommendations = als_recommendations[[\"user_id\", \"item_id\", \"score\"]]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15923526",
"metadata": {},
"outputs": [],
"source": [
"als_recommendations = als_recommendations.sort_values(by = \"score\", ascending=False)\n",
"als_recommendations.to_parquet(\"recsys/recomendations/personal_als.parquet\") "
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "ff125847",
"metadata": {},
"outputs": [],
"source": [
"als_recommendations = pd.read_parquet(\"../recommendations/personal_als.parquet\")"
]
},
{
"cell_type": "markdown",
"id": "5f09dc7e-7c91-4355-860a-b9cfb9f33f15",
"metadata": {},
"source": [
"# Похожие"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfc5d8ba",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# !!!! 40 minutes\n",
"# чтобы не ждать - переместиться на строку считывания parquet\n",
"\n",
"# получим энкодированные идентификаторы всех объектов, известных нам из events_train\n",
"train_item_ids_enc = interactions_train['track_id_enc'].unique()\n",
"\n",
"max_similar_items = 10\n",
"\n",
"# получаем списки похожих объектов, используя ранее полученную ALS-модель\n",
"# метод similar_items возвращает и сам объект, как наиболее похожий\n",
"# этот объект мы позже отфильтруем, но сейчас запросим на 1 больше\n",
"\n",
"\n",
"similar_items = als_model.similar_items(train_item_ids_enc, N=max_similar_items+1)\n",
"\n",
"# преобразуем полученные списки в табличный формат\n",
"sim_item_item_ids_enc = similar_items[0]\n",
"sim_item_scores = similar_items[1]\n",
"\n",
"similar_items = pd.DataFrame({\n",
" \"track_id_enc\": train_item_ids_enc,\n",
" \"sim_item_id_enc\": sim_item_item_ids_enc.tolist(), \n",
" \"score\": sim_item_scores.tolist()})\n",
"similar_items = similar_items.sort_values('score')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "62881cf9",
"metadata": {},
"outputs": [],
"source": [
"# Надо бы сделать обратную трансформацию, но зависает спустя несколько часов работы.... Пока будем считать, что энкодед это истиные идентификаторы \n",
"# similar_items[\"item_id\"] = similar_items['track_id_enc'].apply(lambda x: item_encoder.inverse_transform([x])[0])\n",
"# similar_items[\"sim_item_id\"] = similar_items['sim_item_id_enc'].apply(lambda x: item_encoder.inverse_transform(x[1:]))\n"
]
},
{
"cell_type": "markdown",
"id": "1dfcb683-b440-40a8-9975-894156a53872",
"metadata": {},
"source": [
"Рассчитаем похожие, они позже пригодятся для онлайн-рекомендаций."
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "a75d07ee-4b12-4ce5-aa85-e45cb7a7a4f0",
"metadata": {},
"outputs": [],
"source": [
"similar_items.rename(columns={'track_id_enc':'item_id', 'sim_item_id_enc':'sim_item_id'}, inplace=True)\n",
"similar_items.to_parquet(\"../recommendations/similar_items.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "ce370904-4c49-4152-8706-416074ea9b9a",
"metadata": {},
"outputs": [],
"source": [
"similar_items = pd.read_parquet(\"../recommendations/similar_items.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "be54154c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>item_id</th>\n",
" <th>sim_item_id</th>\n",
" <th>score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>787744</th>\n",
" <td>440450</td>\n",
" <td>[440450, 794964, 662906, 789388]</td>\n",
" <td>[0.9999995231628418, 0.8756284713745117, 0.841...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89069</th>\n",
" <td>478024</td>\n",
" <td>[478024, 542933, 200580, 501613]</td>\n",
" <td>[0.9999995231628418, 0.950843870639801, 0.9462...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142892</th>\n",
" <td>170948</td>\n",
" <td>[170948, 357991, 184016, 56267]</td>\n",
" <td>[0.9999995231628418, 0.9587958455085754, 0.956...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>636868</th>\n",
" <td>178504</td>\n",
" <td>[178504, 92743, 206733, 337648]</td>\n",
" <td>[0.9999995231628418, 0.9872869849205017, 0.986...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>694565</th>\n",
" <td>429070</td>\n",
" <td>[429070, 428326, 289784, 428323]</td>\n",
" <td>[0.9999995827674866, 0.8366211652755737, 0.836...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>750870</th>\n",
" <td>752554</td>\n",
" <td>[752554, 511740, 397649, 718404]</td>\n",
" <td>[1.0000004768371582, 1.0000003576278687, 0.972...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>393268</th>\n",
" <td>346630</td>\n",
" <td>[346651, 346630, 346622, 346616]</td>\n",
" <td>[1.0000004768371582, 1.0000004768371582, 1.000...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>559345</th>\n",
" <td>346651</td>\n",
" <td>[346651, 346630, 346622, 346616]</td>\n",
" <td>[1.0000004768371582, 1.0000004768371582, 1.000...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>476378</th>\n",
" <td>346616</td>\n",
" <td>[346651, 346630, 346622, 346616]</td>\n",
" <td>[1.0000004768371582, 1.0000004768371582, 1.000...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>500411</th>\n",
" <td>346622</td>\n",
" <td>[346651, 346630, 346622, 346616]</td>\n",
" <td>[1.0000004768371582, 1.0000004768371582, 1.000...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>873606 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" item_id sim_item_id \\\n",
"787744 440450 [440450, 794964, 662906, 789388] \n",
"89069 478024 [478024, 542933, 200580, 501613] \n",
"142892 170948 [170948, 357991, 184016, 56267] \n",
"636868 178504 [178504, 92743, 206733, 337648] \n",
"694565 429070 [429070, 428326, 289784, 428323] \n",
"... ... ... \n",
"750870 752554 [752554, 511740, 397649, 718404] \n",
"393268 346630 [346651, 346630, 346622, 346616] \n",
"559345 346651 [346651, 346630, 346622, 346616] \n",
"476378 346616 [346651, 346630, 346622, 346616] \n",
"500411 346622 [346651, 346630, 346622, 346616] \n",
"\n",
" score \n",
"787744 [0.9999995231628418, 0.8756284713745117, 0.841... \n",
"89069 [0.9999995231628418, 0.950843870639801, 0.9462... \n",
"142892 [0.9999995231628418, 0.9587958455085754, 0.956... \n",
"636868 [0.9999995231628418, 0.9872869849205017, 0.986... \n",
"694565 [0.9999995827674866, 0.8366211652755737, 0.836... \n",
"... ... \n",
"750870 [1.0000004768371582, 1.0000003576278687, 0.972... \n",
"393268 [1.0000004768371582, 1.0000004768371582, 1.000... \n",
"559345 [1.0000004768371582, 1.0000004768371582, 1.000... \n",
"476378 [1.0000004768371582, 1.0000004768371582, 1.000... \n",
"500411 [1.0000004768371582, 1.0000004768371582, 1.000... \n",
"\n",
"[873606 rows x 3 columns]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"similar_items"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1011f608",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"ExecuteTimeLog": [
{
"duration": 66,
"start_time": "2024-09-02T18:34:52.193Z"
},
{
"duration": 46,
"start_time": "2024-09-02T18:36:43.983Z"
},
{
"duration": 55,
"start_time": "2024-09-02T18:40:09.918Z"
},
{
"duration": 53,
"start_time": "2024-09-02T18:43:19.919Z"
},
{
"duration": 54,
"start_time": "2024-09-02T18:44:22.195Z"
},
{
"duration": 53,
"start_time": "2024-09-02T18:45:24.351Z"
},
{
"duration": 61,
"start_time": "2024-09-02T19:00:18.348Z"
}
],
"kernelspec": {
"display_name": ".venv_recsys",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}