CoCalc -- Geopandas

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab12/Geopandas_2.ipynb
²⁷¹⁴ views

Kernel: Python 3 (ipykernel)

In [84]:

from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 85%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 85%; }a
</style>
"""))

Out[84]:

In [85]:

import matplotlib.pyplot as plt 

import numpy as np
import pandas as pd
import geopandas as gpd
from textwrap import wrap

In [86]:

# Get data of labor
sex_work = pd.read_stata( r'../data/geopandas_data/8_trab_sex_20.dta' )

# creamos ubigeo por deparatamento a dos digitos y a provincia a 4 digitos

sex_work[ 'dpt_code' ] = sex_work.cod_ubigeo.str[ :2 ]
sex_work[ 'prov_code' ] = sex_work.cod_ubigeo.str[ :4 ]

# Sex work
women_work = sex_work[ sex_work.sex == 'Mujer' ].copy().reset_index( drop = True )

In [87]:

sex_work

Out[87]:

In [88]:

women_work

Out[88]:

In [89]:

# get data from lima
women_work[ 'month' ] = pd.to_datetime( women_work.month , format = '%B' ) \
                                    .dt.strftime( '%m' ) \
                                    .astype( int )
# '%B'  full month name

women_work

Out[89]:

Tipo de fecha

Revisar para más información

Link

In [90]:

# Sort by department and month
women_work.sort_values([ 'dpt_code', 'month'], inplace = True )

# Get the total number of women workers by dpt
dpt_women_work = women_work.groupby( [ 'dpt_code', 'month'], as_index = False )[['empl']] \
                            .sum() \
                            .rename( columns = {'empl' :'women_empl'})

# Sort by dpt code and month
dpt_women_work.sort_values([ 'dpt_code', 'month'], inplace = True )

In [91]:

df2 = dpt_women_work.groupby( ['dpt_code'],as_index = False )[['women_empl']].mean()

In [92]:

df2

Out[92]:

Shapefile

In [93]:

# Load shapefile, limite departamental 

dpt_shp = gpd.read_file( r'../data/geopandas_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp' )

In [94]:

dpt_shp

Out[94]:

In [95]:

# Merge shapefile e información de empleo 

df3 = pd.merge( dpt_shp, df2, left_on = 'CCDD', right_on = 'dpt_code', validate = "1:1")

In [96]:

# Plot mapa de calor 

fig, ax = plt.subplots(figsize=(20, 20))

# women_empl, se crea el mapa de calor a partir del dato de empleo 

df3.plot( column='women_empl', cmap='Reds', linestyle='--',
                      edgecolor='black', 
                      legend = True, ax = ax)  # sin ax = ax obtendriamos dos graficos separados

Out[96]:

<AxesSubplot:>

In [112]:

# Mapa de calor con tramos definidos manualmente en el dato de empleo

fig, ax = plt.subplots(figsize=(10, 10))

df3.plot( ax = ax, 
        column='women_empl', 
         cmap= 'viridis', 
         linestyle='--',
         edgecolor='black', 
         legend = True,  
         scheme = "User_Defined", 
         classification_kwds = dict( bins = [ 20000, 40000, 60000, 100000  ] ),  # bins: cortes 
         legend_kwds = dict(  loc='upper left',
                            bbox_to_anchor=(1.01, 1),
                            fontsize='x-large',
                            title= "Number of Employers", 
                            frameon= True,
                           prop= {'size': 8})  # frameon: cuadro en al leyenda 
        )

plt.axis("off")  # sin el marco en el mapa

Out[112]:

(-81.96202805930743,
 -68.01848153333607,
 -19.266543824587817,
 0.8770101193715647)

Colors and pallete

Link

In [111]:

# Sin tomar en cuenta a Lima metropolitana y Lima provincia 

df3[ df3.CCDD != "15" ].plot( column='women_empl', cmap='viridis', figsize=(20, 20), linestyle='--',
                      edgecolor='black', 
                      legend = True)

Out[111]:

<AxesSubplot:>

In [100]:

# cantidad  PEA por departamento , genero y mes 
df4 = sex_work.groupby( ['dpt_code', 'month', 'sex'], as_index = False )[['empl']].sum() \
        .pivot( index = [ 'dpt_code', 'month' ] , columns = 'sex',values='empl') \
        .reset_index()
 # reshape a partir de las categorias sex
df4

Out[100]:

In [101]:

# proporción de muejeres respecto al total PEA

df4[ 'prop_wom' ] = ( df4.Mujer * 100 / (df4.Hombre+df4.Mujer+df4['S.I.']) )

In [102]:

# merge con shapefile 

df5 = dpt_shp.merge( df4, left_on = 'CCDD', right_on = 'dpt_code'  )

In [103]:

fig, axis = plt.subplots( nrows = 4, ncols= 3, figsize = ( 15, 15 ) )

idx = 0
for i in range( 4 ): # loop por fila 
    for j in range ( 3 ):  # loop por columna
        
        
        ax = axis[ i ][ j ]  # permite definir en que posición se ubica el gráficos 
        
        month = df5.month.unique()[ idx ]  # se filtra el mes 
        
        df6 = df5[ df5.month == month ]  # filtra mes 
        
        df6.plot( column='prop_wom', 
                  cmap='Reds', 
                  linestyle='--',
                  edgecolor='black', 
                  legend = True, 
                  ax = ax 
                )
        
        ax.set_title( month ) # añadir titulo
        
        idx = idx + 1

Out[103]:

In [104]:

# Inverting colour map
cmap = plt.cm.OrRd

In [105]:

# Se remplaza missing en el dato de Lima 

df5.loc[ (df5.NOMBDEP == 'LIMA'), 'prop_wom' ] = np.nan

In [115]:

fig, ax = plt.subplots(figsize=(20, 20))
df5.plot( ax = ax, 
        column='prop_wom', 
         cmap= cmap, 
         linestyle='--',
         edgecolor='black', 
         legend = True,  
         scheme = "User_Defined", 
         classification_kwds = dict( bins = [ 20, 30, 40, 50,  100 ] ), # bins
         legend_kwds=dict(  loc='upper left',
                            bbox_to_anchor=(1.01, 1),
                            fontsize='x-large',
                            title= "Women Proportion", 
                            title_fontsize = 'x-large', 
                            frameon= True )
        )

Out[115]:

<AxesSubplot:>

In [107]:

fig, ax = plt.subplots(figsize=(20, 20))
df5.plot( ax = ax, 
        column='prop_wom', 
         cmap= cmap, 
         linestyle='--',
         edgecolor='black', 
         legend = True,  
         scheme = "User_Defined", 
         missing_kwds= dict(color = "#DADADB", label='No Data'),   # color en missing y etiqueta 
         classification_kwds = dict( bins = [ 20, 30, 40, 50,  100 ] ), 
         legend_kwds=dict(  loc='upper left',
                            bbox_to_anchor=(1.01, 1),
                            fontsize='x-large',
                            title= "Women Proportion", 
                            title_fontsize = 'x-large', 
                            frameon= True)
        )

Out[107]:

<AxesSubplot:>

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Tipo de fecha

Revisar para más información

Shapefile

Colors and pallete

Product

Resources

Company