Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab12/Geopandas.ipynb
2714 views
Kernel: Python 3 (ipykernel)

Geopandas y operaciones ARCGIS

@author: Roberto mendoza

import pandas as pd from pandas import Series, DataFrame import numpy as np import matplotlib.pyplot as plt import geopandas as gpd # manejo de datos georefereciados from geopandas import GeoSeries # series de datos georerenciados from shapely.geometry import Point, LineString, Polygon, MultiLineString # objetos geométricos from shapely.ops import nearest_points # operaciones entre objetods geometricos import contextily as cx # Fondo Goole maps, fondo de mapa from pyproj import CRS, Geod # proyecciones a sistemas planares import googlemaps #google request import matplotlib.patches as mpatches import haversine as hs # distancia de grat-cricle entre puntos from geopy import distance # distancia entre puntos from tqdm import tqdm # contador de tiempo en un loop from matplotlib.lines import Line2D import pyreadstat # import spss files import swifter # parallel procesing import unidecode import warnings warnings.filterwarnings('ignore') # eliminar warning messages

no module name, solo instalelo usando !pip install

1.0 Polygonos

# Load .gdb dataset. # La particuladridad es que almacena basese de datos por capas. Podemos acceder a cada una de ellas con layer # El layer 5 contiene el shapefile de polygono de cada distrito dist_mita = gpd.read_file(r'..\data\Mita\mita.gdb', layer=5 ) dist_mita
dist_mita.crs # sistema de coordenadas proyectadas en un plano cartesiano (unidad de medida en metros) bajo el método Transverse Mercator # compatible para países de Argentina, Brazil, Chile, Colombia, Ecuadro y Peru
<Derived Projected CRS: EPSG:32718> Name: WGS 84 / UTM zone 18S Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Between 78°W and 72°W, southern hemisphere between 80°S and equator, onshore and offshore. Argentina. Brazil. Chile. Colombia. Ecuador. Peru. - bounds: (-78.0, -80.0, -72.0, 0.0) Coordinate Operation: - name: UTM zone 18S - method: Transverse Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich

Sistema de coordendas proyectada usada en la Mita

link

# Conversión de sistemas # del sistema EPSG:32718 hacia el sistema de coordenadas estándar EPSG:4326 (WGS84) # El WGS 84 es un sistema geodésico de coordenadas geográficas usado mundialmente, # que permite localizar cualquier punto de la Tierra por medio de tres unidades dadas. dist_mita.to_crs(epsg=4326,inplace=True) dist_mita
dist_mita.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World. - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# Proyección cilindrica equidistante mita_proy_cil = dist_mita.to_crs(epsg=4087) mita_proy_cil
mita_proy_cil.crs
<Derived Projected CRS: EPSG:4087> Name: WGS 84 / World Equidistant Cylindrical Axis Info [cartesian]: - X[east]: Easting (metre) - Y[north]: Northing (metre) Area of Use: - name: World. - bounds: (-180.0, -90.0, 180.0, 90.0) Coordinate Operation: - name: World Equidistant Cylindrical - method: Equidistant Cylindrical Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# Se retorna al sistema de coordenadas iniciales mita_cartesiano = dist_mita.to_crs(epsg=32718) mita_cartesiano
mita_cartesiano.plot()
<AxesSubplot:>
Image in a Jupyter notebook

2.0 Point

# Coordenad de las capitales de los distritos dentro de la MITA minera centroids = gpd.read_file(r'..\data\Mita\mita.gdb', layer=0 ) centroids
centroids.crs
<Derived Projected CRS: PROJCS["mita_equi",GEOGCS["WGS 84",DATUM["WGS_1984 ...> Name: mita_equi Axis Info [cartesian]: - [east]: Easting (metre) - [north]: Northing (metre) Area of Use: - undefined Coordinate Operation: - name: unnamed - method: Equidistant Cylindrical Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# De columnas de latitud longitud al objeto geometry (Point) centroids['Point'] = gpd.points_from_xy(centroids.LON, centroids.LAT, crs="EPSG:4326") centroids
centroids.plot()
<AxesSubplot:>
Image in a Jupyter notebook
# Del objeto Geometry (proyección cilindrico equidistante) a columnas latitud y longitud por separado centroids["longitude"] = centroids.geometry.map(lambda p: p.x) centroids["latitude"] = centroids.geometry.map(lambda p: p.y)
centroids

3.0 Linestring

# Load a shapefile in Python mita_boundary = gpd.read_file(r'..\data\Mita\MitaBoundary.shp')
mita_boundary
mita_boundary.plot()
<AxesSubplot:>
Image in a Jupyter notebook
mita_boundary.crs
<Derived Projected CRS: EPSG:32718> Name: WGS 84 / UTM zone 18S Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Between 78°W and 72°W, southern hemisphere between 80°S and equator, onshore and offshore. Argentina. Brazil. Chile. Colombia. Ecuador. Peru. - bounds: (-78.0, -80.0, -72.0, 0.0) Coordinate Operation: - name: UTM zone 18S - method: Transverse Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# load shapefile a nivel distrital distritos = gpd.read_file(r'..\data\geopandas\LIMITE_DISTRITO\LIMITE_DIST.shp') distritos
# centroide de polygonos. En este caso del poligono a nivel distrital distritos['centroid'] = distritos.geometry.centroid distritos

3.0 Merge Geospatial information

# select columns and then convert to geodataframe. gdf = gpd.GeoDataFrame( centroids[['Point','UBIGEO']], geometry= centroids.Point)
centroids
gdf
gdf.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 305 entries, 0 to 304 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Point 305 non-null geometry 1 UBIGEO 305 non-null float64 2 geometry 305 non-null geometry dtypes: float64(1), geometry(2) memory usage: 7.3 KB

3.1 Join - Contains

# Merge: el poligono del distrito que contenga (contains) el centroide de los distritos ubicadas en la MITA minera merge = gpd.sjoin(distritos, gdf , how="inner", op="contains") #merge[merge.Point != None ] merge

3.2 Join - whitin

# el centroide de los distritos ubicadas en la MITA minera está al interior del polygono del distrito (whitin) merge = gpd.sjoin( gdf, distritos , how="inner", op="within") merge

3.3 Join - Intersects

# Merge: el poligono del distrito que intersecte (intersects) el centroide de los distritos ubicadas en la MITA minera data_geo= gpd.sjoin(distritos, gdf , how="inner", op="intersects").reset_index() data_geo
f, ax = plt.subplots(figsize=(8,8)) data_geo['geometry'].plot(color='none', edgecolor='black', zorder=0.5, ax = ax) data_geo['centroid'].plot(color = 'r', markersize=10, ax = ax)
<AxesSubplot:>
Image in a Jupyter notebook
f, ax = plt.subplots(figsize=(8,8)) data_geo['geometry'].plot(color='none', edgecolor='black', zorder=0.5, ax = ax) data_geo['Point'].plot(color = 'r', markersize=10, ax = ax) mita_boundary.plot(color = 'b', markersize=10, ax = ax)
<AxesSubplot:>
Image in a Jupyter notebook
mita_boundary.crs
<Derived Projected CRS: EPSG:32718> Name: WGS 84 / UTM zone 18S Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Between 78°W and 72°W, southern hemisphere between 80°S and equator, onshore and offshore. Argentina. Brazil. Chile. Colombia. Ecuador. Peru. - bounds: (-78.0, -80.0, -72.0, 0.0) Coordinate Operation: - name: UTM zone 18S - method: Transverse Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# Se proyecta al sistema convencional de coordenadas EPSG:4326 # D esta manera sea compatible con los otros mapas mita_boundary.to_crs(epsg=4326,inplace=True)
f, ax = plt.subplots(figsize=(15,15)) # capa de limite distrital data_geo['geometry'].plot(color='none', linewidth=0.8,edgecolor='black', zorder=0.5, ax = ax) # capital de cada distrito data_geo['Point'].plot(color = '#DC143C', markersize=14, ax = ax, marker = "o", label="Distric's capital") # Una parte de los limites de la MITA mita_boundary.plot(color = '#834333' ,linewidth=4, ax = ax, label='Mita Boundary target') # añade un mapa de fondo # contextuly cx.add_basemap(ax, crs="EPSG:4326", attribution = False, zoom = 11) # sin valores en los ejes plt.xticks([]) plt.yticks([]) # Añadir texto f.text(0.65,0.7,'Mita Boundaries',color = 'black', size = 20, bbox=dict(facecolor='none', edgecolor='none', pad=5.0)) # bbox: añade una caja al texto, pad : largo y ancho de la caja # Costumize legend plt.legend(loc='upper left', title = "",frameon=True, bbox_to_anchor=(0, 0.15), prop={'size': 15})
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Input In [33], in <cell line: 16>() 11 mita_boundary.plot(color = '#834333' ,linewidth=4, ax = ax, label='Mita Boundary target') 13 # añade un mapa de fondo 14 15 # contextuly ---> 16 cx.add_basemap(ax, crs="EPSG:4326", attribution = False, zoom = 11) 18 # sin valores en los ejes 20 plt.xticks([])
File ~\anaconda3\envs\env_geopandas\lib\site-packages\contextily\plotting.py:121, in add_basemap(ax, zoom, source, interpolation, attribution, attribution_size, reset_extent, crs, resampling, **extra_imshow_args) 117 left, right, bottom, top = _reproj_bb( 118 left, right, bottom, top, crs, {"init": "epsg:3857"} 119 ) 120 # Download image --> 121 image, extent = bounds2img( 122 left, bottom, right, top, zoom=zoom, source=source, ll=False 123 ) 124 # Warping 125 if crs is not None:
File ~\anaconda3\envs\env_geopandas\lib\site-packages\contextily\tile.py:222, in bounds2img(w, s, e, n, zoom, source, ll, wait, max_retries) 220 x, y, z = t.x, t.y, t.z 221 tile_url = provider.build_url(x=x, y=y, z=z) --> 222 image = _fetch_tile(tile_url, wait, max_retries) 223 tiles.append(t) 224 arrays.append(image)
File ~\anaconda3\envs\env_geopandas\lib\site-packages\joblib\memory.py:594, in MemorizedFunc.__call__(self, *args, **kwargs) 593 def __call__(self, *args, **kwargs): --> 594 return self._cached_call(args, kwargs)[0]
File ~\anaconda3\envs\env_geopandas\lib\site-packages\joblib\memory.py:537, in MemorizedFunc._cached_call(self, args, kwargs, shelving) 534 must_call = True 536 if must_call: --> 537 out, metadata = self.call(*args, **kwargs) 538 if self.mmap_mode is not None: 539 # Memmap the output at the first call to be consistent with 540 # later calls 541 if self._verbose:
File ~\anaconda3\envs\env_geopandas\lib\site-packages\joblib\memory.py:779, in MemorizedFunc.call(self, *args, **kwargs) 777 if self._verbose > 0: 778 print(format_call(self.func, args, kwargs)) --> 779 output = self.func(*args, **kwargs) 780 self.store_backend.dump_item( 781 [func_id, args_id], output, verbose=self._verbose) 783 duration = time.time() - start_time
File ~\anaconda3\envs\env_geopandas\lib\site-packages\contextily\tile.py:252, in _fetch_tile(tile_url, wait, max_retries) 250 @memory.cache 251 def _fetch_tile(tile_url, wait, max_retries): --> 252 request = _retryer(tile_url, wait, max_retries) 253 with io.BytesIO(request.content) as image_stream: 254 image = Image.open(image_stream).convert("RGBA")
File ~\anaconda3\envs\env_geopandas\lib\site-packages\contextily\tile.py:395, in _retryer(tile_url, wait, max_retries) 375 """ 376 Retry a url many times in attempt to get a tile 377 (...) 392 request object containing the web response. 393 """ 394 try: --> 395 request = requests.get(tile_url, headers={"user-agent": USER_AGENT}) 396 request.raise_for_status() 397 except requests.HTTPError:
File ~\anaconda3\envs\env_geopandas\lib\site-packages\requests\api.py:75, in get(url, params, **kwargs) 64 def get(url, params=None, **kwargs): 65 r"""Sends a GET request. 66 67 :param url: URL for the new :class:`Request` object. (...) 72 :rtype: requests.Response 73 """ ---> 75 return request('get', url, params=params, **kwargs)
File ~\anaconda3\envs\env_geopandas\lib\site-packages\requests\api.py:61, in request(method, url, **kwargs) 57 # By using the 'with' statement we are sure the session is closed, thus we 58 # avoid leaving sockets open which can trigger a ResourceWarning in some 59 # cases, and look like a memory leak in others. 60 with sessions.Session() as session: ---> 61 return session.request(method=method, url=url, **kwargs)
File ~\anaconda3\envs\env_geopandas\lib\site-packages\requests\sessions.py:529, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 524 send_kwargs = { 525 'timeout': timeout, 526 'allow_redirects': allow_redirects, 527 } 528 send_kwargs.update(settings) --> 529 resp = self.send(prep, **send_kwargs) 531 return resp
File ~\anaconda3\envs\env_geopandas\lib\site-packages\requests\sessions.py:645, in Session.send(self, request, **kwargs) 642 start = preferred_clock() 644 # Send the request --> 645 r = adapter.send(request, **kwargs) 647 # Total elapsed time of the request (approximately) 648 elapsed = preferred_clock() - start
File ~\anaconda3\envs\env_geopandas\lib\site-packages\requests\adapters.py:440, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies) 438 try: 439 if not chunked: --> 440 resp = conn.urlopen( 441 method=request.method, 442 url=url, 443 body=request.body, 444 headers=request.headers, 445 redirect=False, 446 assert_same_host=False, 447 preload_content=False, 448 decode_content=False, 449 retries=self.max_retries, 450 timeout=timeout 451 ) 453 # Send the request. 454 else: 455 if hasattr(conn, 'proxy_pool'):
File ~\anaconda3\envs\env_geopandas\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 700 self._prepare_proxy(conn) 702 # Make the request on the httplib connection object. --> 703 httplib_response = self._make_request( 704 conn, 705 method, 706 url, 707 timeout=timeout_obj, 708 body=body, 709 headers=headers, 710 chunked=chunked, 711 ) 713 # If we're going to release the connection in ``finally:``, then 714 # the response doesn't need to know about the connection. Otherwise 715 # it will also try to release it and we'll have a double-release 716 # mess. 717 response_conn = conn if not release_conn else None
File ~\anaconda3\envs\env_geopandas\lib\site-packages\urllib3\connectionpool.py:386, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 384 # Trigger any extra validation we need to do. 385 try: --> 386 self._validate_conn(conn) 387 except (SocketTimeout, BaseSSLError) as e: 388 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. 389 self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
File ~\anaconda3\envs\env_geopandas\lib\site-packages\urllib3\connectionpool.py:1040, in HTTPSConnectionPool._validate_conn(self, conn) 1038 # Force connect early to allow us to validate the connection. 1039 if not getattr(conn, "sock", None): # AppEngine might not have `.sock` -> 1040 conn.connect() 1042 if not conn.is_verified: 1043 warnings.warn( 1044 ( 1045 "Unverified HTTPS request is being made to host '%s'. " (...) 1050 InsecureRequestWarning, 1051 )
File ~\anaconda3\envs\env_geopandas\lib\site-packages\urllib3\connection.py:358, in HTTPSConnection.connect(self) 356 def connect(self): 357 # Add certificate verification --> 358 self.sock = conn = self._new_conn() 359 hostname = self.host 360 tls_in_tls = False
File ~\anaconda3\envs\env_geopandas\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self) 171 extra_kw["socket_options"] = self.socket_options 173 try: --> 174 conn = connection.create_connection( 175 (self._dns_host, self.port), self.timeout, **extra_kw 176 ) 178 except SocketTimeout: 179 raise ConnectTimeoutError( 180 self, 181 "Connection to %s timed out. (connect timeout=%s)" 182 % (self.host, self.timeout), 183 )
File ~\anaconda3\envs\env_geopandas\lib\site-packages\urllib3\util\connection.py:85, in create_connection(address, timeout, source_address, socket_options) 83 if source_address: 84 sock.bind(source_address) ---> 85 sock.connect(sa) 86 return sock 88 except socket.error as e:
KeyboardInterrupt:
Image in a Jupyter notebook
f, ax = plt.subplots(figsize=(15,15)) data_geo['geometry'].plot(color='none', edgecolor='gray', zorder=0.5, ax = ax) data_geo['Point'].plot(color = '#DC143C', markersize=10, ax = ax, marker = "o", label="Distric's capital") mita_boundary.plot(color = '#FFBF00' ,linewidth=2, ax = ax, label='Mita Boundary target') cx.add_basemap(ax, crs="EPSG:4326", source=cx.providers.NASAGIBS.ViirsEarthAtNight2012, attribution = False, zoom = 8) plt.xticks([]) plt.yticks([]) # Añadir texto f.text(0.65,0.7,'Mita Boundaries',color = 'black', size = 15, bbox=dict(facecolor='none', edgecolor='none', pad=5.0)) # bbox: añade una caja al texto, pad : largo y ancho de la caja # Costumize legend plt.legend(loc='upper left', title = "",frameon=True, bbox_to_anchor=(0, 0.15), prop={'size': 15})

4.0 Distances

data_geo
# selección de un point data_geo.loc[0,"Point"]
Image in a Jupyter notebook
mita_boundary
# Plot de uno de los boundaries mita_boundary.loc[0,"geometry"]
Image in a Jupyter notebook
print( data_geo.loc[0,"Point"].distance(mita_boundary.loc[0,"geometry"] ) ) data_geo.loc[0,"Point"].distance(mita_boundary.loc[1,"geometry"] ) # Se buene observar que la primera capital está cercana al boundary de index : 0 # distance permite hallar la minima distancia geodésica # Las unidades están en grados
0.3780424516996454
0.7991220422560418
# Coordenadas de Mina Potosi y Huancavelica #potosi = (-19.571598207409757, -65.75495755044265) #huanca = (-12.808993982792076, -74.97525207494975) potosi = (-65.75495755044265, -19.571598207409757) huanca = (-74.97525207494975, -12.808993982792076) point = (data_geo.loc[0,"Point"].x, data_geo.loc[0,"Point"].y)
point
(-73.185, -13.95583333)

Referencia

Haversine

# Haversine distance asume que la tierra es una esfera. A esta medida tambien se llama Great-Circle print("Distancia en kilometros: ",hs.haversine( huanca , point, unit = 'km')) print("Distancia en metros: ", hs.haversine( huanca , point, unit = 'm')) print("Distancia en millas: ", hs.haversine( huanca , point, unit = 'mi'))
Distancia en kilometros: 202.10753203071803 Distancia en metros: 202107.53203071802 Distancia en millas: 125.58379809010545
# Usando geopy (from geopy import distance as ) print("Distancia geodésica en kilometros : ", distance.geodesic(huanca , point).km ) print("Distancia great-circle en kilometros : ", distance.great_circle(huanca , point).km )
Distancia geodésica en kilometros : 202.86294742689847 Distancia great-circle en kilometros : 202.1075383753187

Referencia de Geopy

Geo-py

# longitud print(mita_boundary.loc[1,"geometry"].length) # area data_geo.loc[0,"geometry"].area # la unidad de medidas esta en grados (redianes)
6.399277433462688
0.011908123743330926
# Se declara el sistema geodésico de coordenadas: ellipsoide WGS84 ( EPSG 4326 ) geod = Geod(ellps="WGS84") # minima distancia geodesica en metros # nearest_point halla los extremos de la linea mas corta entre boundary y point # LineString usas esos extremos para crear una linea # luego se calcula la distancia de esa linea distance = geod.geometry_length(LineString(nearest_points(mita_boundary.loc[1,"geometry"], data_geo.loc[0,"Point"] )))
distance # medida en metros
87884.59488352262

5.0 Proyecciones a un plano cartesiano (Distancias Euclideanas)

data_geo.to_crs(epsg=32718, inplace = True) # proyeccion que permite calcular distancias precisas en el Perú
data_geo
# Loop para proyectar cada columna del tipo de objeto "geometry" for column in ['geometry','centroid','Point']: data_geo = data_geo.set_geometry(column).to_crs(epsg=32718) data_geo
# Proyeccion de la mita mita_boundary.to_crs(epsg=32718, inplace=True) mita_boundary
print("Distancia minima euclideana en metros al Boundary : ", data_geo.loc[0,"Point"].distance(mita_boundary.loc[1,"geometry"] ) )
Distancia minima euclideana en metros al Boundary : 87900.81102564473
# longitud print(mita_boundary.loc[1,"geometry"].length/1000) # distancia en kilometros del boundary # area data_geo.loc[0,"geometry"].area/1000**2 # 142.4 KM^2
698.6938671485821
142.42277155133257
# Distancia euclideana distance = LineString(nearest_points(mita_boundary.loc[1,"geometry"], data_geo.loc[0,"Point"] )).length distance # 87884.59488352262 cercano a la distancia geodésica , diferencia de 16 metros
87900.81102564473

7.0 Buffers

# Buffer de linestring f, ax = plt.subplots(1, figsize=(8,8)) base1 = mita_boundary.loc[1,"geometry"].buffer(10000) # Linestring (10 kilometros) gpd.GeoSeries(base1).plot(ax=ax) # Se convierte a geoseriies para graficarlo base2 = mita_boundary.loc[0,"geometry"].buffer(3000) # 3 kilometros a la redonda gpd.GeoSeries(base2).plot(ax=ax, color = 'red') mita_boundary.plot(color='black', ax=ax) plt.xticks([]) plt.yticks([])
([], [])
Image in a Jupyter notebook
# Buffer de un polígono f, ax = plt.subplots(1, figsize=(8,8)) base = data_geo.loc[0,"geometry"].buffer(2000) # poligono del distrito con bordes extendido en 2 km gpd.GeoSeries(base).plot(ax=ax) # Se convierte a geoseriies para graficarlo gpd.GeoSeries(data_geo.loc[0,"geometry"]).plot(color="darkgray",ax=ax, linewidth= 1) # poligono del distrito original plt.xticks([]) plt.yticks([])
([], [])
Image in a Jupyter notebook

8.0 Geocoding

1.0 Free geocoding and Google API

data2017, meta = pyreadstat.read_sav(r"../data/crime_data/2017/611-Modulo1312/sample_Denuncia_de_Delitos_2017.sav", encoding="latin1") data2017
# NOMBREDI : nombre del distrito # IH205_DD: codigo de 2 digitos del departamento (callao : 07) # H206: TIPO DE VIA DONDE OCURRIÓ EL DELITO # IH207_B: NOMBRE DE LA DE VÍA DONDE OCURRIÓ EL DELITO # IH207_A: Nº CUADRA DONDE OCURRIÓ EL DELITO data2017[['NOMBREDI','H206','IH207_B','IH207_A']]
data2017.IH207_A.unique()
array(['SN', '1', '2', '6', 'NULL', '7', '8', '4', '13', '3', '21', '5', '11', '20', '10', '55', '9', '33', '14', '28', '49', '41', '38', '40', '31', '16', '15', '39', '3,', '2,', '5,', '1,', '33,', '6,', '31,', '4,', '20,', '8,', '18,', '16,', '40,', '7,', '9,', '19,', '30,'], dtype=object)
# filtro callaro y selecciono las primeras 20 observaciones callao = data2017.iloc[0:20] callao[['NOMBREDI','H206','IH207_B','IH207_A']]
#COMISARÃA BELLAVISTA : erro por no reconocer tilde. Se aplica unicode para corregirlo callao['IH207_B'] = callao['IH207_B'].apply( lambda x : unidecode.unidecode( x ) ) # unicode para corregir tildes no detectados callao
# Free geocoding coordinates = gpd.tools.geocode("PUCP Peru, Avenida Universitaria, 15032, San Miguel, Lima") coordinates # Point(x,y): x: longitud, y:latitud
coordinates['address'][0]
'PUCP Peru, Avenida Universitaria, 15032, Avenida Universitaria, San Miguel, Lima, Perú'
# Google API gmaps = googlemaps.Client( key = 'AIzaSyA-Cj5npW0l2P7bZKP6WmQSq4uXSMvfHCM' ) # se crea el objeto gmaps con el token de permiso result_api = gmaps.geocode( "PUCP, Avenida Universitaria, 15032, San Miguel, Lima" , region = 'PE' ) # region = PE result_api # Google devuelve las solicitudes en formato Json
[{'address_components': [{'long_name': '1801', 'short_name': '1801', 'types': ['street_number']}, {'long_name': 'Avenida Universitaria', 'short_name': 'Av. Universitaria', 'types': ['route']}, {'long_name': 'Fund Pando', 'short_name': 'Fund Pando', 'types': ['political', 'sublocality', 'sublocality_level_1']}, {'long_name': 'San Miguel', 'short_name': 'San Miguel', 'types': ['locality', 'political']}, {'long_name': 'Provincia de Lima', 'short_name': 'Provincia de Lima', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Provincia de Lima', 'short_name': 'Provincia de Lima', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'Peru', 'short_name': 'PE', 'types': ['country', 'political']}, {'long_name': '15088', 'short_name': '15088', 'types': ['postal_code']}], 'formatted_address': 'Av. Universitaria 1801, San Miguel 15088, Peru', 'geometry': {'location': {'lat': -12.0689502, 'lng': -77.0780608}, 'location_type': 'ROOFTOP', 'viewport': {'northeast': {'lat': -12.0676218697085, 'lng': -77.07669776970849}, 'southwest': {'lat': -12.0703198302915, 'lng': -77.0793957302915}}}, 'partial_match': True, 'place_id': 'ChIJpUAI1BLJBZERLobll7e_oNc', 'plus_code': {'compound_code': 'WWJC+CQ San Miguel', 'global_code': '57V4WWJC+CQ'}, 'types': ['establishment', 'point_of_interest', 'university']}]
result_api[0]
{'address_components': [{'long_name': '1801', 'short_name': '1801', 'types': ['street_number']}, {'long_name': 'Avenida Universitaria', 'short_name': 'Av. Universitaria', 'types': ['route']}, {'long_name': 'Fund Pando', 'short_name': 'Fund Pando', 'types': ['political', 'sublocality', 'sublocality_level_1']}, {'long_name': 'San Miguel', 'short_name': 'San Miguel', 'types': ['locality', 'political']}, {'long_name': 'Provincia de Lima', 'short_name': 'Provincia de Lima', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Provincia de Lima', 'short_name': 'Provincia de Lima', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'Peru', 'short_name': 'PE', 'types': ['country', 'political']}, {'long_name': '15088', 'short_name': '15088', 'types': ['postal_code']}], 'formatted_address': 'Av. Universitaria 1801, San Miguel 15088, Peru', 'geometry': {'location': {'lat': -12.0689502, 'lng': -77.0780608}, 'location_type': 'ROOFTOP', 'viewport': {'northeast': {'lat': -12.0676218697085, 'lng': -77.07669776970849}, 'southwest': {'lat': -12.0703198302915, 'lng': -77.0793957302915}}}, 'partial_match': True, 'place_id': 'ChIJpUAI1BLJBZERLobll7e_oNc', 'plus_code': {'compound_code': 'WWJC+CQ San Miguel', 'global_code': '57V4WWJC+CQ'}, 'types': ['establishment', 'point_of_interest', 'university']}
result_api[0].keys()
dict_keys(['address_components', 'formatted_address', 'geometry', 'partial_match', 'place_id', 'plus_code', 'types'])
result_api[0]['geometry']['location']
{'lat': -12.0689502, 'lng': -77.0780608}
lat = result_api[0]['geometry']['location']['lat'] lon = result_api[0]['geometry']['location']['lng'] (lon,lat)
(-77.0780608, -12.0689502)
result_api[0]['types']
['establishment', 'point_of_interest', 'university']
# componentes de la dirección result_api[0]['address_components'][4]
{'long_name': 'Provincia de Lima', 'short_name': 'Provincia de Lima', 'types': ['administrative_area_level_2', 'political']}
callao[['NOMBREDI','H206','IH207_B','IH207_A']]
# crear el tipo de calle # condiciones filt1 = callao['H206'] == 1 filt2 = callao['H206'] == 2 filt3 = callao['H206'] == 3 filt4 = callao['H206'] == 4 filt5 = callao['H206'] == 5 filt6 = callao['H206'] == 6 filt7 = callao['H206'] == 7 # se filtra, se crea la vafriable tipo_calle y se asigno valores callao.loc[filt1, 'tipo_calle'] = "Avenida" callao.loc[filt2, 'tipo_calle'] = "Jiron" callao.loc[filt3, 'tipo_calle'] = "Calle" callao.loc[filt4, 'tipo_calle'] = "Pasaje" callao.loc[filt5, 'tipo_calle'] = "Carretera" callao.loc[filt6, 'tipo_calle'] = "Prolongación" callao.loc[filt7, 'tipo_calle'] = " " callao.replace({'IH207_B': "SN", 'IH207_A': "SN"}, " ", inplace = True)
callao[['NOMBREDI','H206','tipo_calle','IH207_B','IH207_A']]
# function to geocode def georef_geocode( row ): address = f"{row['tipo_calle']} {row['IH207_B']} {row['IH207_A']}, {row['NOMBREDI']}, Callao, Perú" geo = gpd.tools.geocode(address) # Information try: lat = geo['geometry'].y[0] lon = geo['geometry'].x[0] except Exception as e: lat = np.nan lon = np.nan return ( lat, lon ) # function to Geocoding Google API def georef_google_api( row ): address = f"{row['tipo_calle']} {row['IH207_B']} {row['IH207_A']}, {row['NOMBREDI']}, Callao, Perú" # Set Geolocation gmaps = googlemaps.Client( key = 'AIzaSyCJoBnbPDYdUMLngeRKsLLMAsUFdFF28do' ) result_api = gmaps.geocode( address , region = 'PE' ) # Information try: lat = result_api[0]['geometry']['location']['lat'] lon = result_api[0]['geometry']['location']['lng'] except Exception : lat = np.nan lon = np.nan return ( lat, lon )
callao['coordinates_geo'] = callao.swifter.apply( lambda x: georef_geocode( x ) , axis = 1 ) # apply function by each row callao['coordinates_google'] = callao.swifter.apply( lambda x: georef_google_api( x ) , axis = 1 ) # apply function by each row
Pandas Apply: 0%| | 0/20 [00:00<?, ?it/s]
Pandas Apply: 0%| | 0/20 [00:00<?, ?it/s]
callao[['lat_geo', 'lng_geo']] = pd.DataFrame(callao.coordinates_geo.tolist() , index = callao.index ) callao[['lat_google', 'lng_google']] = pd.DataFrame(callao.coordinates_google.tolist() , index = callao.index )
callao[['tipo_calle','IH207_B','IH207_A','lat_geo', 'lng_geo','lat_google', 'lng_google']]