Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab12/Geopandas_2.ipynb
2714 views
Kernel: Python 3 (ipykernel)
from IPython.display import display, HTML display(HTML(data=""" <style> div#notebook-container { width: 85%; } div#menubar-container { width: 65%; } div#maintoolbar-container { width: 85%; }a </style> """))
import matplotlib.pyplot as plt import numpy as np import pandas as pd import geopandas as gpd from textwrap import wrap
# Get data of labor sex_work = pd.read_stata( r'../data/geopandas_data/8_trab_sex_20.dta' ) # creamos ubigeo por deparatamento a dos digitos y a provincia a 4 digitos sex_work[ 'dpt_code' ] = sex_work.cod_ubigeo.str[ :2 ] sex_work[ 'prov_code' ] = sex_work.cod_ubigeo.str[ :4 ] # Sex work women_work = sex_work[ sex_work.sex == 'Mujer' ].copy().reset_index( drop = True )
sex_work
women_work
# get data from lima women_work[ 'month' ] = pd.to_datetime( women_work.month , format = '%B' ) \ .dt.strftime( '%m' ) \ .astype( int ) # '%B' full month name women_work

Tipo de fecha

Revisar para más información

Link

# Sort by department and month women_work.sort_values([ 'dpt_code', 'month'], inplace = True ) # Get the total number of women workers by dpt dpt_women_work = women_work.groupby( [ 'dpt_code', 'month'], as_index = False )[['empl']] \ .sum() \ .rename( columns = {'empl' :'women_empl'}) # Sort by dpt code and month dpt_women_work.sort_values([ 'dpt_code', 'month'], inplace = True )
df2 = dpt_women_work.groupby( ['dpt_code'],as_index = False )[['women_empl']].mean()
df2

Shapefile

# Load shapefile, limite departamental dpt_shp = gpd.read_file( r'../data/geopandas_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp' )
dpt_shp
# Merge shapefile e información de empleo df3 = pd.merge( dpt_shp, df2, left_on = 'CCDD', right_on = 'dpt_code', validate = "1:1")
# Plot mapa de calor fig, ax = plt.subplots(figsize=(20, 20)) # women_empl, se crea el mapa de calor a partir del dato de empleo df3.plot( column='women_empl', cmap='Reds', linestyle='--', edgecolor='black', legend = True, ax = ax) # sin ax = ax obtendriamos dos graficos separados
<AxesSubplot:>
Image in a Jupyter notebook
# Mapa de calor con tramos definidos manualmente en el dato de empleo fig, ax = plt.subplots(figsize=(10, 10)) df3.plot( ax = ax, column='women_empl', cmap= 'viridis', linestyle='--', edgecolor='black', legend = True, scheme = "User_Defined", classification_kwds = dict( bins = [ 20000, 40000, 60000, 100000 ] ), # bins: cortes legend_kwds = dict( loc='upper left', bbox_to_anchor=(1.01, 1), fontsize='x-large', title= "Number of Employers", frameon= True, prop= {'size': 8}) # frameon: cuadro en al leyenda ) plt.axis("off") # sin el marco en el mapa
(-81.96202805930743, -68.01848153333607, -19.266543824587817, 0.8770101193715647)
Image in a Jupyter notebook

Colors and pallete

Link

# Sin tomar en cuenta a Lima metropolitana y Lima provincia df3[ df3.CCDD != "15" ].plot( column='women_empl', cmap='viridis', figsize=(20, 20), linestyle='--', edgecolor='black', legend = True)
<AxesSubplot:>
Image in a Jupyter notebook
# cantidad PEA por departamento , genero y mes df4 = sex_work.groupby( ['dpt_code', 'month', 'sex'], as_index = False )[['empl']].sum() \ .pivot( index = [ 'dpt_code', 'month' ] , columns = 'sex',values='empl') \ .reset_index() # reshape a partir de las categorias sex df4
# proporción de muejeres respecto al total PEA df4[ 'prop_wom' ] = ( df4.Mujer * 100 / (df4.Hombre+df4.Mujer+df4['S.I.']) )
# merge con shapefile df5 = dpt_shp.merge( df4, left_on = 'CCDD', right_on = 'dpt_code' )
fig, axis = plt.subplots( nrows = 4, ncols= 3, figsize = ( 15, 15 ) ) idx = 0 for i in range( 4 ): # loop por fila for j in range ( 3 ): # loop por columna ax = axis[ i ][ j ] # permite definir en que posición se ubica el gráficos month = df5.month.unique()[ idx ] # se filtra el mes df6 = df5[ df5.month == month ] # filtra mes df6.plot( column='prop_wom', cmap='Reds', linestyle='--', edgecolor='black', legend = True, ax = ax ) ax.set_title( month ) # añadir titulo idx = idx + 1
Image in a Jupyter notebook
# Inverting colour map cmap = plt.cm.OrRd
# Se remplaza missing en el dato de Lima df5.loc[ (df5.NOMBDEP == 'LIMA'), 'prop_wom' ] = np.nan
fig, ax = plt.subplots(figsize=(20, 20)) df5.plot( ax = ax, column='prop_wom', cmap= cmap, linestyle='--', edgecolor='black', legend = True, scheme = "User_Defined", classification_kwds = dict( bins = [ 20, 30, 40, 50, 100 ] ), # bins legend_kwds=dict( loc='upper left', bbox_to_anchor=(1.01, 1), fontsize='x-large', title= "Women Proportion", title_fontsize = 'x-large', frameon= True ) )
<AxesSubplot:>
Image in a Jupyter notebook
fig, ax = plt.subplots(figsize=(20, 20)) df5.plot( ax = ax, column='prop_wom', cmap= cmap, linestyle='--', edgecolor='black', legend = True, scheme = "User_Defined", missing_kwds= dict(color = "#DADADB", label='No Data'), # color en missing y etiqueta classification_kwds = dict( bins = [ 20, 30, 40, 50, 100 ] ), legend_kwds=dict( loc='upper left', bbox_to_anchor=(1.01, 1), fontsize='x-large', title= "Women Proportion", title_fontsize = 'x-large', frameon= True) )
<AxesSubplot:>
Image in a Jupyter notebook