Kernel: Python 3 (ipykernel)
In [84]:
from IPython.display import display, HTML display(HTML(data=""" <style> div#notebook-container { width: 85%; } div#menubar-container { width: 65%; } div#maintoolbar-container { width: 85%; }a </style> """))
Out[84]:
In [85]:
import matplotlib.pyplot as plt import numpy as np import pandas as pd import geopandas as gpd from textwrap import wrap
In [86]:
# Get data of labor sex_work = pd.read_stata( r'../data/geopandas_data/8_trab_sex_20.dta' ) # creamos ubigeo por deparatamento a dos digitos y a provincia a 4 digitos sex_work[ 'dpt_code' ] = sex_work.cod_ubigeo.str[ :2 ] sex_work[ 'prov_code' ] = sex_work.cod_ubigeo.str[ :4 ] # Sex work women_work = sex_work[ sex_work.sex == 'Mujer' ].copy().reset_index( drop = True )
In [87]:
sex_work
Out[87]:
In [88]:
women_work
Out[88]:
In [89]:
# get data from lima women_work[ 'month' ] = pd.to_datetime( women_work.month , format = '%B' ) \ .dt.strftime( '%m' ) \ .astype( int ) # '%B' full month name women_work
Out[89]:
In [90]:
# Sort by department and month women_work.sort_values([ 'dpt_code', 'month'], inplace = True ) # Get the total number of women workers by dpt dpt_women_work = women_work.groupby( [ 'dpt_code', 'month'], as_index = False )[['empl']] \ .sum() \ .rename( columns = {'empl' :'women_empl'}) # Sort by dpt code and month dpt_women_work.sort_values([ 'dpt_code', 'month'], inplace = True )
In [91]:
df2 = dpt_women_work.groupby( ['dpt_code'],as_index = False )[['women_empl']].mean()
In [92]:
df2
Out[92]:
Shapefile
In [93]:
# Load shapefile, limite departamental dpt_shp = gpd.read_file( r'../data/geopandas_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp' )
In [94]:
dpt_shp
Out[94]:
In [95]:
# Merge shapefile e información de empleo df3 = pd.merge( dpt_shp, df2, left_on = 'CCDD', right_on = 'dpt_code', validate = "1:1")
In [96]:
# Plot mapa de calor fig, ax = plt.subplots(figsize=(20, 20)) # women_empl, se crea el mapa de calor a partir del dato de empleo df3.plot( column='women_empl', cmap='Reds', linestyle='--', edgecolor='black', legend = True, ax = ax) # sin ax = ax obtendriamos dos graficos separados
Out[96]:
<AxesSubplot:>
In [112]:
# Mapa de calor con tramos definidos manualmente en el dato de empleo fig, ax = plt.subplots(figsize=(10, 10)) df3.plot( ax = ax, column='women_empl', cmap= 'viridis', linestyle='--', edgecolor='black', legend = True, scheme = "User_Defined", classification_kwds = dict( bins = [ 20000, 40000, 60000, 100000 ] ), # bins: cortes legend_kwds = dict( loc='upper left', bbox_to_anchor=(1.01, 1), fontsize='x-large', title= "Number of Employers", frameon= True, prop= {'size': 8}) # frameon: cuadro en al leyenda ) plt.axis("off") # sin el marco en el mapa
Out[112]:
(-81.96202805930743,
-68.01848153333607,
-19.266543824587817,
0.8770101193715647)
Colors and pallete
In [111]:
# Sin tomar en cuenta a Lima metropolitana y Lima provincia df3[ df3.CCDD != "15" ].plot( column='women_empl', cmap='viridis', figsize=(20, 20), linestyle='--', edgecolor='black', legend = True)
Out[111]:
<AxesSubplot:>
In [100]:
# cantidad PEA por departamento , genero y mes df4 = sex_work.groupby( ['dpt_code', 'month', 'sex'], as_index = False )[['empl']].sum() \ .pivot( index = [ 'dpt_code', 'month' ] , columns = 'sex',values='empl') \ .reset_index() # reshape a partir de las categorias sex df4
Out[100]:
In [101]:
# proporción de muejeres respecto al total PEA df4[ 'prop_wom' ] = ( df4.Mujer * 100 / (df4.Hombre+df4.Mujer+df4['S.I.']) )
In [102]:
# merge con shapefile df5 = dpt_shp.merge( df4, left_on = 'CCDD', right_on = 'dpt_code' )
In [103]:
fig, axis = plt.subplots( nrows = 4, ncols= 3, figsize = ( 15, 15 ) ) idx = 0 for i in range( 4 ): # loop por fila for j in range ( 3 ): # loop por columna ax = axis[ i ][ j ] # permite definir en que posición se ubica el gráficos month = df5.month.unique()[ idx ] # se filtra el mes df6 = df5[ df5.month == month ] # filtra mes df6.plot( column='prop_wom', cmap='Reds', linestyle='--', edgecolor='black', legend = True, ax = ax ) ax.set_title( month ) # añadir titulo idx = idx + 1
Out[103]:
In [104]:
# Inverting colour map cmap = plt.cm.OrRd
In [105]:
# Se remplaza missing en el dato de Lima df5.loc[ (df5.NOMBDEP == 'LIMA'), 'prop_wom' ] = np.nan
In [115]:
fig, ax = plt.subplots(figsize=(20, 20)) df5.plot( ax = ax, column='prop_wom', cmap= cmap, linestyle='--', edgecolor='black', legend = True, scheme = "User_Defined", classification_kwds = dict( bins = [ 20, 30, 40, 50, 100 ] ), # bins legend_kwds=dict( loc='upper left', bbox_to_anchor=(1.01, 1), fontsize='x-large', title= "Women Proportion", title_fontsize = 'x-large', frameon= True ) )
Out[115]:
<AxesSubplot:>
In [107]:
fig, ax = plt.subplots(figsize=(20, 20)) df5.plot( ax = ax, column='prop_wom', cmap= cmap, linestyle='--', edgecolor='black', legend = True, scheme = "User_Defined", missing_kwds= dict(color = "#DADADB", label='No Data'), # color en missing y etiqueta classification_kwds = dict( bins = [ 20, 30, 40, 50, 100 ] ), legend_kwds=dict( loc='upper left', bbox_to_anchor=(1.01, 1), fontsize='x-large', title= "Women Proportion", title_fontsize = 'x-large', frameon= True) )
Out[107]:
<AxesSubplot:>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: