{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# Визуальный анализ данных"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Подключаем необходимые библиотеки."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import numpy as np\n",
"import scipy as sp\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Считываем датасет."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"data = pd.read_csv(\"telecom-churn.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Проверяем, всё ли правильно считалось и \"распарсилось\"."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" account length | \n",
" area code | \n",
" phone number | \n",
" international plan | \n",
" voice mail plan | \n",
" number vmail messages | \n",
" total day minutes | \n",
" total day calls | \n",
" total day charge | \n",
" ... | \n",
" total eve calls | \n",
" total eve charge | \n",
" total night minutes | \n",
" total night calls | \n",
" total night charge | \n",
" total intl minutes | \n",
" total intl calls | \n",
" total intl charge | \n",
" customer service calls | \n",
" churn | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" KS | \n",
" 128 | \n",
" 415 | \n",
" 382-4657 | \n",
" no | \n",
" yes | \n",
" 25 | \n",
" 265.1 | \n",
" 110 | \n",
" 45.07 | \n",
" ... | \n",
" 99 | \n",
" 16.78 | \n",
" 244.7 | \n",
" 91 | \n",
" 11.01 | \n",
" 10.0 | \n",
" 3 | \n",
" 2.70 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" OH | \n",
" 107 | \n",
" 415 | \n",
" 371-7191 | \n",
" no | \n",
" yes | \n",
" 26 | \n",
" 161.6 | \n",
" 123 | \n",
" 27.47 | \n",
" ... | \n",
" 103 | \n",
" 16.62 | \n",
" 254.4 | \n",
" 103 | \n",
" 11.45 | \n",
" 13.7 | \n",
" 3 | \n",
" 3.70 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" NJ | \n",
" 137 | \n",
" 415 | \n",
" 358-1921 | \n",
" no | \n",
" no | \n",
" 0 | \n",
" 243.4 | \n",
" 114 | \n",
" 41.38 | \n",
" ... | \n",
" 110 | \n",
" 10.30 | \n",
" 162.6 | \n",
" 104 | \n",
" 7.32 | \n",
" 12.2 | \n",
" 5 | \n",
" 3.29 | \n",
" 0 | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" OH | \n",
" 84 | \n",
" 408 | \n",
" 375-9999 | \n",
" yes | \n",
" no | \n",
" 0 | \n",
" 299.4 | \n",
" 71 | \n",
" 50.90 | \n",
" ... | \n",
" 88 | \n",
" 5.26 | \n",
" 196.9 | \n",
" 89 | \n",
" 8.86 | \n",
" 6.6 | \n",
" 7 | \n",
" 1.78 | \n",
" 2 | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" OK | \n",
" 75 | \n",
" 415 | \n",
" 330-6626 | \n",
" yes | \n",
" no | \n",
" 0 | \n",
" 166.7 | \n",
" 113 | \n",
" 28.34 | \n",
" ... | \n",
" 122 | \n",
" 12.61 | \n",
" 186.9 | \n",
" 121 | \n",
" 8.41 | \n",
" 10.1 | \n",
" 3 | \n",
" 2.73 | \n",
" 3 | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 21 columns
\n",
"
"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Можно получить сводку и общее представление о типах данных."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 3333 entries, 0 to 3332\n",
"Data columns (total 21 columns):\n",
"state 3333 non-null object\n",
"account length 3333 non-null int64\n",
"area code 3333 non-null int64\n",
"phone number 3333 non-null object\n",
"international plan 3333 non-null object\n",
"voice mail plan 3333 non-null object\n",
"number vmail messages 3333 non-null int64\n",
"total day minutes 3333 non-null float64\n",
"total day calls 3333 non-null int64\n",
"total day charge 3333 non-null float64\n",
"total eve minutes 3333 non-null float64\n",
"total eve calls 3333 non-null int64\n",
"total eve charge 3333 non-null float64\n",
"total night minutes 3333 non-null float64\n",
"total night calls 3333 non-null int64\n",
"total night charge 3333 non-null float64\n",
"total intl minutes 3333 non-null float64\n",
"total intl calls 3333 non-null int64\n",
"total intl charge 3333 non-null float64\n",
"customer service calls 3333 non-null int64\n",
"churn 3333 non-null bool\n",
"dtypes: bool(1), float64(8), int64(8), object(4)\n",
"memory usage: 524.1+ KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Целевая переменная: churn (лояльность абонента). Это категориальный (более конкретно — бинарный) признак. Попробуем узнать, как распределены его значения."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False 2850\n",
"True 483\n",
"Name: churn, dtype: int64"
]
},
"execution_count": 5,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data['churn'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Видим, что 2850 из 3333 абонентов — лояльные. А сколько это в процентах?.."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False 0.855086\n",
"True 0.144914\n",
"Name: churn, dtype: float64"
]
},
"execution_count": 6,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data['churn'].value_counts(normalize=True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Визуализируем это."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "eb58c1303680f70e0c840b8024b3cd7c2294f3a2"
},
"output_type": "display_data"
}
],
"source": [
"data['churn'].value_counts(normalize=True).plot(kind='bar', \n",
" title='Признак churn');"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Нам также может быть интересно, у скольких наших клиентов подключён роуминг."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "1b30217f75cfba5c7c0f37dd809752b3e089062b"
},
"output_type": "display_data"
}
],
"source": [
"data['international plan'].value_counts(normalize=True).plot(kind='bar');"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"А как обстоят дела у нелояльных пользователей (churn=1)?"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "bf924362d8de0d0b916bc5413e923970540953ce"
},
"output_type": "display_data"
}
],
"source": [
"churn_users = data[data['churn'] == True]\n",
"churn_users['international plan'].value_counts(normalize=True).plot(kind='bar');"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Видим, что процент клиентов с роумингом выше, чем в общей выборке. \n",
"\n",
"Можем предположить, что бинарные признаки **international plan** и **churn** коррелируют. Нарисуем теперь их вместе."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" international plan | \n",
" no | \n",
" yes | \n",
" All | \n",
"
\n",
" \n",
" churn | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" False | \n",
" 2664 | \n",
" 186 | \n",
" 2850 | \n",
"
\n",
" \n",
" True | \n",
" 346 | \n",
" 137 | \n",
" 483 | \n",
"
\n",
" \n",
" All | \n",
" 3010 | \n",
" 323 | \n",
" 3333 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 19,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(data['churn'], data['international plan'], margins=True)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "5739f5954f8d9545ddac940d39ad0f001ea9422f"
},
"output_type": "display_data"
}
],
"source": [
"sns.countplot(x='international plan', hue='churn', data=data);"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Большинство клиентов, у которых был подключён роуминг, от нас ушли!"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"international plan\n",
"no 3010\n",
"yes 323\n",
"Name: churn, dtype: int64"
]
},
"execution_count": 25,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('international plan')['churn'].count()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Посмотрим на распределение признака **account length**."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "52816f2fd37519b286aea91df17b0cdcad6892c3"
},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(data['account length']);"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Похоже на нормальное распределение!"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Что можно сказать о связи между **account length** и лояльностью?"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"churn\n",
"False 100.793684\n",
"True 102.664596\n",
"Name: account length, dtype: float64"
]
},
"execution_count": 28,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('churn')['account length'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"churn\n",
"False 39.88235\n",
"True 39.46782\n",
"Name: account length, dtype: float64"
]
},
"execution_count": 29,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('churn')['account length'].std()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"churn\n",
"False 100\n",
"True 103\n",
"Name: account length, dtype: int64"
]
},
"execution_count": 30,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('churn')['account length'].median()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"На первый взгляд, никак не связаны."
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "963212841239ba8b9f8b019138180325d0ed8199"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(1, 2, sharey=True)\n",
"sns.distplot(data[data['churn'] == False]['account length'], \n",
" ax=ax[0]).set_title('Лояльные');\n",
"sns.distplot(churn_users['account length'], \n",
" ax=ax[1]).set_title('Ушедшие');"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"На второй взгляд тоже."
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Теперь посмотрим, связаны ли длительности дневных и ночных звонков."
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "8fa7b65953d6716500a10aa52d0d816da03e9b9b"
},
"output_type": "display_data"
}
],
"source": [
"sns.regplot(data['total day minutes'], data['total night minutes']);"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"А как насчёт количества звонков?"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "7f444c1709be3b358ed668456f53846126753f2d"
},
"output_type": "display_data"
}
],
"source": [
"sns.regplot(data['total day calls'], data['total night calls']);"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Пока никакой связи не видно."
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"Построим корреляционную матрицу для числовых признаков."
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"numeric_data = data.select_dtypes(['int64', 'float64'])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" account length | \n",
" area code | \n",
" number vmail messages | \n",
" total day minutes | \n",
" total day calls | \n",
" total day charge | \n",
" total eve minutes | \n",
" total eve calls | \n",
" total eve charge | \n",
" total night minutes | \n",
" total night calls | \n",
" total night charge | \n",
" total intl minutes | \n",
" total intl calls | \n",
" total intl charge | \n",
" customer service calls | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 128 | \n",
" 415 | \n",
" 25 | \n",
" 265.1 | \n",
" 110 | \n",
" 45.07 | \n",
" 197.4 | \n",
" 99 | \n",
" 16.78 | \n",
" 244.7 | \n",
" 91 | \n",
" 11.01 | \n",
" 10.0 | \n",
" 3 | \n",
" 2.70 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 107 | \n",
" 415 | \n",
" 26 | \n",
" 161.6 | \n",
" 123 | \n",
" 27.47 | \n",
" 195.5 | \n",
" 103 | \n",
" 16.62 | \n",
" 254.4 | \n",
" 103 | \n",
" 11.45 | \n",
" 13.7 | \n",
" 3 | \n",
" 3.70 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 137 | \n",
" 415 | \n",
" 0 | \n",
" 243.4 | \n",
" 114 | \n",
" 41.38 | \n",
" 121.2 | \n",
" 110 | \n",
" 10.30 | \n",
" 162.6 | \n",
" 104 | \n",
" 7.32 | \n",
" 12.2 | \n",
" 5 | \n",
" 3.29 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 84 | \n",
" 408 | \n",
" 0 | \n",
" 299.4 | \n",
" 71 | \n",
" 50.90 | \n",
" 61.9 | \n",
" 88 | \n",
" 5.26 | \n",
" 196.9 | \n",
" 89 | \n",
" 8.86 | \n",
" 6.6 | \n",
" 7 | \n",
" 1.78 | \n",
" 2 | \n",
"
\n",
" \n",
" 4 | \n",
" 75 | \n",
" 415 | \n",
" 0 | \n",
" 166.7 | \n",
" 113 | \n",
" 28.34 | \n",
" 148.3 | \n",
" 122 | \n",
" 12.61 | \n",
" 186.9 | \n",
" 121 | \n",
" 8.41 | \n",
" 10.1 | \n",
" 3 | \n",
" 2.73 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 45,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"numeric_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" account length | \n",
" number vmail messages | \n",
" total day minutes | \n",
" total day calls | \n",
" total day charge | \n",
" total eve minutes | \n",
" total eve calls | \n",
" total eve charge | \n",
" total night minutes | \n",
" total night calls | \n",
" total night charge | \n",
" total intl minutes | \n",
" total intl calls | \n",
" total intl charge | \n",
" customer service calls | \n",
"
\n",
" \n",
" \n",
" \n",
" account length | \n",
" 1.000000 | \n",
" -0.004628 | \n",
" 0.006216 | \n",
" 0.038470 | \n",
" 0.006214 | \n",
" -0.006757 | \n",
" 0.019260 | \n",
" -0.006745 | \n",
" -0.008955 | \n",
" -0.013176 | \n",
" -0.008960 | \n",
" 0.009514 | \n",
" 0.020661 | \n",
" 0.009546 | \n",
" -0.003796 | \n",
"
\n",
" \n",
" number vmail messages | \n",
" -0.004628 | \n",
" 1.000000 | \n",
" 0.000778 | \n",
" -0.009548 | \n",
" 0.000776 | \n",
" 0.017562 | \n",
" -0.005864 | \n",
" 0.017578 | \n",
" 0.007681 | \n",
" 0.007123 | \n",
" 0.007663 | \n",
" 0.002856 | \n",
" 0.013957 | \n",
" 0.002884 | \n",
" -0.013263 | \n",
"
\n",
" \n",
" total day minutes | \n",
" 0.006216 | \n",
" 0.000778 | \n",
" 1.000000 | \n",
" 0.006750 | \n",
" 1.000000 | \n",
" 0.007043 | \n",
" 0.015769 | \n",
" 0.007029 | \n",
" 0.004323 | \n",
" 0.022972 | \n",
" 0.004300 | \n",
" -0.010155 | \n",
" 0.008033 | \n",
" -0.010092 | \n",
" -0.013423 | \n",
"
\n",
" \n",
" total day calls | \n",
" 0.038470 | \n",
" -0.009548 | \n",
" 0.006750 | \n",
" 1.000000 | \n",
" 0.006753 | \n",
" -0.021451 | \n",
" 0.006462 | \n",
" -0.021449 | \n",
" 0.022938 | \n",
" -0.019557 | \n",
" 0.022927 | \n",
" 0.021565 | \n",
" 0.004574 | \n",
" 0.021666 | \n",
" -0.018942 | \n",
"
\n",
" \n",
" total day charge | \n",
" 0.006214 | \n",
" 0.000776 | \n",
" 1.000000 | \n",
" 0.006753 | \n",
" 1.000000 | \n",
" 0.007050 | \n",
" 0.015769 | \n",
" 0.007036 | \n",
" 0.004324 | \n",
" 0.022972 | \n",
" 0.004301 | \n",
" -0.010157 | \n",
" 0.008032 | \n",
" -0.010094 | \n",
" -0.013427 | \n",
"
\n",
" \n",
" total eve minutes | \n",
" -0.006757 | \n",
" 0.017562 | \n",
" 0.007043 | \n",
" -0.021451 | \n",
" 0.007050 | \n",
" 1.000000 | \n",
" -0.011430 | \n",
" 1.000000 | \n",
" -0.012584 | \n",
" 0.007586 | \n",
" -0.012593 | \n",
" -0.011035 | \n",
" 0.002541 | \n",
" -0.011067 | \n",
" -0.012985 | \n",
"
\n",
" \n",
" total eve calls | \n",
" 0.019260 | \n",
" -0.005864 | \n",
" 0.015769 | \n",
" 0.006462 | \n",
" 0.015769 | \n",
" -0.011430 | \n",
" 1.000000 | \n",
" -0.011423 | \n",
" -0.002093 | \n",
" 0.007710 | \n",
" -0.002056 | \n",
" 0.008703 | \n",
" 0.017434 | \n",
" 0.008674 | \n",
" 0.002423 | \n",
"
\n",
" \n",
" total eve charge | \n",
" -0.006745 | \n",
" 0.017578 | \n",
" 0.007029 | \n",
" -0.021449 | \n",
" 0.007036 | \n",
" 1.000000 | \n",
" -0.011423 | \n",
" 1.000000 | \n",
" -0.012592 | \n",
" 0.007596 | \n",
" -0.012601 | \n",
" -0.011043 | \n",
" 0.002541 | \n",
" -0.011074 | \n",
" -0.012987 | \n",
"
\n",
" \n",
" total night minutes | \n",
" -0.008955 | \n",
" 0.007681 | \n",
" 0.004323 | \n",
" 0.022938 | \n",
" 0.004324 | \n",
" -0.012584 | \n",
" -0.002093 | \n",
" -0.012592 | \n",
" 1.000000 | \n",
" 0.011204 | \n",
" 0.999999 | \n",
" -0.015207 | \n",
" -0.012353 | \n",
" -0.015180 | \n",
" -0.009288 | \n",
"
\n",
" \n",
" total night calls | \n",
" -0.013176 | \n",
" 0.007123 | \n",
" 0.022972 | \n",
" -0.019557 | \n",
" 0.022972 | \n",
" 0.007586 | \n",
" 0.007710 | \n",
" 0.007596 | \n",
" 0.011204 | \n",
" 1.000000 | \n",
" 0.011188 | \n",
" -0.013605 | \n",
" 0.000305 | \n",
" -0.013630 | \n",
" -0.012802 | \n",
"
\n",
" \n",
" total night charge | \n",
" -0.008960 | \n",
" 0.007663 | \n",
" 0.004300 | \n",
" 0.022927 | \n",
" 0.004301 | \n",
" -0.012593 | \n",
" -0.002056 | \n",
" -0.012601 | \n",
" 0.999999 | \n",
" 0.011188 | \n",
" 1.000000 | \n",
" -0.015214 | \n",
" -0.012329 | \n",
" -0.015186 | \n",
" -0.009277 | \n",
"
\n",
" \n",
" total intl minutes | \n",
" 0.009514 | \n",
" 0.002856 | \n",
" -0.010155 | \n",
" 0.021565 | \n",
" -0.010157 | \n",
" -0.011035 | \n",
" 0.008703 | \n",
" -0.011043 | \n",
" -0.015207 | \n",
" -0.013605 | \n",
" -0.015214 | \n",
" 1.000000 | \n",
" 0.032304 | \n",
" 0.999993 | \n",
" -0.009640 | \n",
"
\n",
" \n",
" total intl calls | \n",
" 0.020661 | \n",
" 0.013957 | \n",
" 0.008033 | \n",
" 0.004574 | \n",
" 0.008032 | \n",
" 0.002541 | \n",
" 0.017434 | \n",
" 0.002541 | \n",
" -0.012353 | \n",
" 0.000305 | \n",
" -0.012329 | \n",
" 0.032304 | \n",
" 1.000000 | \n",
" 0.032372 | \n",
" -0.017561 | \n",
"
\n",
" \n",
" total intl charge | \n",
" 0.009546 | \n",
" 0.002884 | \n",
" -0.010092 | \n",
" 0.021666 | \n",
" -0.010094 | \n",
" -0.011067 | \n",
" 0.008674 | \n",
" -0.011074 | \n",
" -0.015180 | \n",
" -0.013630 | \n",
" -0.015186 | \n",
" 0.999993 | \n",
" 0.032372 | \n",
" 1.000000 | \n",
" -0.009675 | \n",
"
\n",
" \n",
" customer service calls | \n",
" -0.003796 | \n",
" -0.013263 | \n",
" -0.013423 | \n",
" -0.018942 | \n",
" -0.013427 | \n",
" -0.012985 | \n",
" 0.002423 | \n",
" -0.012987 | \n",
" -0.009288 | \n",
" -0.012802 | \n",
" -0.009277 | \n",
" -0.009640 | \n",
" -0.017561 | \n",
" -0.009675 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 60,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"corr_matrix = numeric_data.drop('area code', axis=1).corr()\n",
"corr_matrix"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "c6aa458fc6fdd72653818dc98b9a432ae6bd0cb6"
},
"output_type": "display_data"
}
],
"source": [
"sns.heatmap(corr_matrix);"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "2601088f3e858ef8830a07e5b3e531d36c72eeb4"
},
"output_type": "display_data"
}
],
"source": [
"sns.pairplot(numeric_data[['total day minutes', \n",
" 'total day calls', \n",
" 'total day charge']]);"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (Ubuntu Linux)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}