Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG5/Grupo_3_Python
2714 views
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting pyreadstat\n",
      "  Downloading pyreadstat-1.1.9-cp38-cp38-win_amd64.whl (2.4 MB)\n",
      "     ---------------------------------------- 2.4/2.4 MB 9.5 MB/s eta 0:00:00\n",
      "Requirement already satisfied: pandas>=1.2.0 in c:\\users\\aldai\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pyreadstat) (1.4.4)\n",
      "Requirement already satisfied: pytz>=2020.1 in c:\\users\\aldai\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas>=1.2.0->pyreadstat) (2022.2.1)\n",
      "Requirement already satisfied: numpy>=1.18.5 in c:\\users\\aldai\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas>=1.2.0->pyreadstat) (1.23.2)\n",
      "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\aldai\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas>=1.2.0->pyreadstat) (2.8.2)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\aldai\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from python-dateutil>=2.8.1->pandas>=1.2.0->pyreadstat) (1.16.0)\n",
      "Installing collected packages: pyreadstat\n",
      "Successfully installed pyreadstat-1.1.9\n"
     ]
    }
   ],
   "source": [
    "# En caso no contar con el paquete pyreadstat, lo instalamos con el siguiente comando\n",
    "!pip install pyreadstat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Importamos el paquete pyreadstat\n",
    "import pyreadstat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>MES</th>\n",
       "      <th>CONGLOME</th>\n",
       "      <th>VIVIENDA</th>\n",
       "      <th>HOGAR</th>\n",
       "      <th>CODPERSO</th>\n",
       "      <th>UBIGEO</th>\n",
       "      <th>DOMINIO</th>\n",
       "      <th>ESTRATO</th>\n",
       "      <th>P201P</th>\n",
       "      <th>P203</th>\n",
       "      <th>P203A</th>\n",
       "      <th>P203B</th>\n",
       "      <th>P204</th>\n",
       "      <th>P205</th>\n",
       "      <th>P206</th>\n",
       "      <th>P207</th>\n",
       "      <th>P208A</th>\n",
       "      <th>P208B</th>\n",
       "      <th>P209</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020</td>\n",
       "      <td>06</td>\n",
       "      <td>010108</td>\n",
       "      <td>136</td>\n",
       "      <td>11</td>\n",
       "      <td>02</td>\n",
       "      <td>100111</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20200101081361102</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2020</td>\n",
       "      <td>06</td>\n",
       "      <td>020257</td>\n",
       "      <td>093</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "      <td>250107</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20200202570931104</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2020</td>\n",
       "      <td>06</td>\n",
       "      <td>015537</td>\n",
       "      <td>065</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "      <td>040126</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>20200155370651104</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2019</td>\n",
       "      <td>10</td>\n",
       "      <td>010614</td>\n",
       "      <td>043</td>\n",
       "      <td>11</td>\n",
       "      <td>07</td>\n",
       "      <td>130901</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20190106140431107</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2020</td>\n",
       "      <td>06</td>\n",
       "      <td>009390</td>\n",
       "      <td>096</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "      <td>230110</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20180093900961104</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85030</th>\n",
       "      <td>2020</td>\n",
       "      <td>12</td>\n",
       "      <td>007337</td>\n",
       "      <td>004</td>\n",
       "      <td>11</td>\n",
       "      <td>01</td>\n",
       "      <td>140105</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20200073370041101</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85031</th>\n",
       "      <td>2019</td>\n",
       "      <td>04</td>\n",
       "      <td>008226</td>\n",
       "      <td>084</td>\n",
       "      <td>13</td>\n",
       "      <td>02</td>\n",
       "      <td>160112</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20190082260841302</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85032</th>\n",
       "      <td>2019</td>\n",
       "      <td>05</td>\n",
       "      <td>008965</td>\n",
       "      <td>060</td>\n",
       "      <td>11</td>\n",
       "      <td>03</td>\n",
       "      <td>210101</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20190089650601103</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85033</th>\n",
       "      <td>2020</td>\n",
       "      <td>05</td>\n",
       "      <td>010311</td>\n",
       "      <td>022</td>\n",
       "      <td>11</td>\n",
       "      <td>02</td>\n",
       "      <td>200501</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>20200103110221102</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85034</th>\n",
       "      <td>2020</td>\n",
       "      <td>11</td>\n",
       "      <td>018778</td>\n",
       "      <td>155</td>\n",
       "      <td>11</td>\n",
       "      <td>06</td>\n",
       "      <td>160404</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20200187781551106</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>85035 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       year MES CONGLOME VIVIENDA HOGAR CODPERSO  UBIGEO  DOMINIO  ESTRATO  \\\n",
       "0      2020  06   010108      136    11       02  100111      5.0      2.0   \n",
       "1      2020  06   020257      093    11       04  250107      7.0      2.0   \n",
       "2      2020  06   015537      065    11       04  040126      6.0      1.0   \n",
       "3      2019  10   010614      043    11       07  130901      4.0      4.0   \n",
       "4      2020  06   009390      096    11       04  230110      3.0      2.0   \n",
       "...     ...  ..      ...      ...   ...      ...     ...      ...      ...   \n",
       "85030  2020  12   007337      004    11       01  140105      1.0      2.0   \n",
       "85031  2019  04   008226      084    13       02  160112      7.0      2.0   \n",
       "85032  2019  05   008965      060    11       03  210101      6.0      2.0   \n",
       "85033  2020  05   010311      022    11       02  200501      1.0      3.0   \n",
       "85034  2020  11   018778      155    11       06  160404      7.0      7.0   \n",
       "\n",
       "                   P201P  P203  P203A  P203B  P204  P205  P206  P207  P208A  \\\n",
       "0      20200101081361102   2.0    NaN    NaN   1.0   2.0   NaN   2.0   43.0   \n",
       "1      20200202570931104   3.0    NaN    NaN   1.0   2.0   NaN   1.0   17.0   \n",
       "2      20200155370651104   3.0    NaN    NaN   1.0   2.0   NaN   2.0    9.0   \n",
       "3      20190106140431107   7.0    2.0    3.0   2.0   NaN   2.0   2.0    4.0   \n",
       "4      20180093900961104   0.0    NaN    NaN   NaN   NaN   NaN   NaN    NaN   \n",
       "...                  ...   ...    ...    ...   ...   ...   ...   ...    ...   \n",
       "85030  20200073370041101   1.0    1.0    1.0   1.0   2.0   NaN   1.0   65.0   \n",
       "85031  20190082260841302  10.0    0.0    1.0   1.0   2.0   NaN   2.0   89.0   \n",
       "85032  20190089650601103   3.0    1.0    3.0   1.0   2.0   NaN   1.0   20.0   \n",
       "85033  20200103110221102   2.0    NaN    NaN   1.0   2.0   NaN   2.0   48.0   \n",
       "85034  20200187781551106   3.0    2.0    1.0   1.0   2.0   NaN   1.0   22.0   \n",
       "\n",
       "       P208B  P209  \n",
       "0        NaN   2.0  \n",
       "1        NaN   6.0  \n",
       "2        NaN   NaN  \n",
       "3        NaN   NaN  \n",
       "4        NaN   NaN  \n",
       "...      ...   ...  \n",
       "85030    NaN   2.0  \n",
       "85031    NaN   3.0  \n",
       "85032    NaN   6.0  \n",
       "85033    NaN   2.0  \n",
       "85034    NaN   1.0  \n",
       "\n",
       "[85035 rows x 20 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Leemos el archivo .sav y guardamos el dataframe en la variable df\n",
    "df, meta = pyreadstat.read_sav(\"../data_administrativa.sav\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Mostrar las variables que presentan missing values (NA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['P203A', 'P203B', 'P204', 'P205', 'P206', 'P207', 'P208A', 'P208B', 'P209']\n"
     ]
    }
   ],
   "source": [
    "vars_with_na = [var for var in df.columns if df[var].isnull().sum() > 0]\n",
    "print (vars_with_na)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Mostrar las etiquetas de dos variables (var labels) y las etiquetas de los valores en dos variables (value's labels)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CODPERSO</th>\n",
       "      <th>UBIGEO</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>02</td>\n",
       "      <td>100111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>04</td>\n",
       "      <td>250107</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  CODPERSO  UBIGEO\n",
       "0       02  100111\n",
       "1       04  250107"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[['CODPERSO', 'UBIGEO']][0:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Detectar personas que fueron entrevistadas en ambos años a partir de las variables: conglome, vivienda, hogar y codperso"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CONGLOME</th>\n",
       "      <th>VIVIENDA</th>\n",
       "      <th>HOGAR</th>\n",
       "      <th>CODPERSO</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2632</th>\n",
       "      <td>006571</td>\n",
       "      <td>174</td>\n",
       "      <td>11</td>\n",
       "      <td>02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2724</th>\n",
       "      <td>005278</td>\n",
       "      <td>048</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2773</th>\n",
       "      <td>005190</td>\n",
       "      <td>037</td>\n",
       "      <td>11</td>\n",
       "      <td>01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3135</th>\n",
       "      <td>006886</td>\n",
       "      <td>168</td>\n",
       "      <td>11</td>\n",
       "      <td>02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3663</th>\n",
       "      <td>008611</td>\n",
       "      <td>048</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84940</th>\n",
       "      <td>008253</td>\n",
       "      <td>047</td>\n",
       "      <td>11</td>\n",
       "      <td>01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84945</th>\n",
       "      <td>007982</td>\n",
       "      <td>008</td>\n",
       "      <td>11</td>\n",
       "      <td>02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84975</th>\n",
       "      <td>007221</td>\n",
       "      <td>043</td>\n",
       "      <td>11</td>\n",
       "      <td>03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84982</th>\n",
       "      <td>008806</td>\n",
       "      <td>114</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85000</th>\n",
       "      <td>009338</td>\n",
       "      <td>017</td>\n",
       "      <td>11</td>\n",
       "      <td>03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4135 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      CONGLOME VIVIENDA HOGAR CODPERSO\n",
       "2632    006571      174    11       02\n",
       "2724    005278      048    11       04\n",
       "2773    005190      037    11       01\n",
       "3135    006886      168    11       02\n",
       "3663    008611      048    11       04\n",
       "...        ...      ...   ...      ...\n",
       "84940   008253      047    11       01\n",
       "84945   007982      008    11       02\n",
       "84975   007221      043    11       03\n",
       "84982   008806      114    11       04\n",
       "85000   009338      017    11       03\n",
       "\n",
       "[4135 rows x 4 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_filter = df[['CONGLOME', 'VIVIENDA', 'HOGAR', 'CODPERSO']]\n",
    "df_duplicados = df_filter[df_filter.duplicated()]\n",
    "df_duplicados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Limpiamos la base de datos de las personas duplicadas\n",
    "df = df.drop_duplicates(subset=['CONGLOME', 'VIVIENDA', 'HOGAR', 'CODPERSO'], keep='first')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Ordenar la base de datos a partir de las variables que identifican cada miembro y la variable año (year)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>MES</th>\n",
       "      <th>CONGLOME</th>\n",
       "      <th>VIVIENDA</th>\n",
       "      <th>HOGAR</th>\n",
       "      <th>CODPERSO</th>\n",
       "      <th>UBIGEO</th>\n",
       "      <th>DOMINIO</th>\n",
       "      <th>ESTRATO</th>\n",
       "      <th>P201P</th>\n",
       "      <th>P203</th>\n",
       "      <th>P203A</th>\n",
       "      <th>P203B</th>\n",
       "      <th>P204</th>\n",
       "      <th>P205</th>\n",
       "      <th>P206</th>\n",
       "      <th>P207</th>\n",
       "      <th>P208A</th>\n",
       "      <th>P208B</th>\n",
       "      <th>P209</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>41445</th>\n",
       "      <td>2019</td>\n",
       "      <td>07</td>\n",
       "      <td>007513</td>\n",
       "      <td>033</td>\n",
       "      <td>11</td>\n",
       "      <td>05</td>\n",
       "      <td>150135</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>20190075130331105</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66886</th>\n",
       "      <td>2019</td>\n",
       "      <td>12</td>\n",
       "      <td>006717</td>\n",
       "      <td>038</td>\n",
       "      <td>11</td>\n",
       "      <td>02</td>\n",
       "      <td>110101</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20170067170381102</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33971</th>\n",
       "      <td>2019</td>\n",
       "      <td>03</td>\n",
       "      <td>005250</td>\n",
       "      <td>169</td>\n",
       "      <td>11</td>\n",
       "      <td>01</td>\n",
       "      <td>020105</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>20190052501691101</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33972</th>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>008425</td>\n",
       "      <td>012</td>\n",
       "      <td>11</td>\n",
       "      <td>05</td>\n",
       "      <td>170103</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20190084250121105</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33973</th>\n",
       "      <td>2019</td>\n",
       "      <td>12</td>\n",
       "      <td>008847</td>\n",
       "      <td>084</td>\n",
       "      <td>11</td>\n",
       "      <td>01</td>\n",
       "      <td>200601</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20190088470841101</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36959</th>\n",
       "      <td>2020</td>\n",
       "      <td>12</td>\n",
       "      <td>017723</td>\n",
       "      <td>018</td>\n",
       "      <td>11</td>\n",
       "      <td>05</td>\n",
       "      <td>140112</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20200177230181105</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36960</th>\n",
       "      <td>2020</td>\n",
       "      <td>05</td>\n",
       "      <td>015946</td>\n",
       "      <td>495</td>\n",
       "      <td>11</td>\n",
       "      <td>04</td>\n",
       "      <td>050510</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20200159464951104</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36962</th>\n",
       "      <td>2020</td>\n",
       "      <td>03</td>\n",
       "      <td>006530</td>\n",
       "      <td>009</td>\n",
       "      <td>11</td>\n",
       "      <td>05</td>\n",
       "      <td>100111</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20200065300091105</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36910</th>\n",
       "      <td>2020</td>\n",
       "      <td>09</td>\n",
       "      <td>008503</td>\n",
       "      <td>045</td>\n",
       "      <td>11</td>\n",
       "      <td>06</td>\n",
       "      <td>180101</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>20180085030451103</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85034</th>\n",
       "      <td>2020</td>\n",
       "      <td>11</td>\n",
       "      <td>018778</td>\n",
       "      <td>155</td>\n",
       "      <td>11</td>\n",
       "      <td>06</td>\n",
       "      <td>160404</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20200187781551106</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>80900 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       year MES CONGLOME VIVIENDA HOGAR CODPERSO  UBIGEO  DOMINIO  ESTRATO  \\\n",
       "41445  2019  07   007513      033    11       05  150135      8.0      1.0   \n",
       "66886  2019  12   006717      038    11       02  110101      2.0      2.0   \n",
       "33971  2019  03   005250      169    11       01  020105      5.0      3.0   \n",
       "33972  2019  08   008425      012    11       05  170103      7.0      5.0   \n",
       "33973  2019  12   008847      084    11       01  200601      1.0      2.0   \n",
       "...     ...  ..      ...      ...   ...      ...     ...      ...      ...   \n",
       "36959  2020  12   017723      018    11       05  140112      1.0      4.0   \n",
       "36960  2020  05   015946      495    11       04  050510      7.0      7.0   \n",
       "36962  2020  03   006530      009    11       05  100111      5.0      2.0   \n",
       "36910  2020  09   008503      045    11       06  180101      3.0      3.0   \n",
       "85034  2020  11   018778      155    11       06  160404      7.0      7.0   \n",
       "\n",
       "                   P201P  P203  P203A  P203B  P204  P205  P206  P207  P208A  \\\n",
       "41445  20190075130331105   7.0    2.0    3.0   1.0   2.0   NaN   2.0    0.0   \n",
       "66886  20170067170381102   2.0    1.0    2.0   1.0   2.0   NaN   2.0   43.0   \n",
       "33971  20190052501691101   1.0    1.0    1.0   1.0   2.0   NaN   1.0   44.0   \n",
       "33972  20190084250121105   3.0    1.0    3.0   1.0   2.0   NaN   1.0    0.0   \n",
       "33973  20190088470841101   1.0    1.0    1.0   1.0   2.0   NaN   1.0   48.0   \n",
       "...                  ...   ...    ...    ...   ...   ...   ...   ...    ...   \n",
       "36959  20200177230181105   3.0    2.0    1.0   1.0   2.0   NaN   2.0   32.0   \n",
       "36960  20200159464951104   3.0    NaN    NaN   1.0   2.0   NaN   2.0   11.0   \n",
       "36962  20200065300091105   3.0    2.0    1.0   1.0   2.0   NaN   1.0   32.0   \n",
       "36910  20180085030451103   0.0    NaN    NaN   NaN   NaN   NaN   NaN    NaN   \n",
       "85034  20200187781551106   3.0    2.0    1.0   1.0   2.0   NaN   1.0   22.0   \n",
       "\n",
       "       P208B  P209  \n",
       "41445    1.0   NaN  \n",
       "66886    NaN   2.0  \n",
       "33971    NaN   2.0  \n",
       "33972    4.0   NaN  \n",
       "33973    NaN   1.0  \n",
       "...      ...   ...  \n",
       "36959    NaN   1.0  \n",
       "36960    NaN   NaN  \n",
       "36962    NaN   1.0  \n",
       "36910    NaN   NaN  \n",
       "85034    NaN   1.0  \n",
       "\n",
       "[80900 rows x 20 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_neat = df.sort_values('year')\n",
    "df_neat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Crear una base de datos para cada año"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Filtramos la base de datos en función al año\n",
    "df_2019 = df[df['year'] == '2019']\n",
    "df_2020 = df[df['year'] == '2020']\n",
    "\n",
    "# Exportamos los dataframes generados\n",
    "df_2019.to_csv('data_2019_(3).csv')\n",
    "df_2020.to_csv('data_2020_(3).csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.0 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "1529b77671c723740968c4c73aa794f3b3bf15c122a394072452cb98e28121b2"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}