In [1]:
!pip install fiona
Collecting fiona Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/56.6 kB ? eta -:--:-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.6/56.6 kB 1.6 MB/s eta 0:00:00 Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.11/dist-packages (from fiona) (25.3.0) Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from fiona) (2025.4.26) Requirement already satisfied: click~=8.0 in /usr/local/lib/python3.11/dist-packages (from fiona) (8.2.1) Collecting click-plugins>=1.0 (from fiona) Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB) Collecting cligj>=0.5 (from fiona) Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB) Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.3/17.3 MB 73.1 MB/s eta 0:00:00 Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB) Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB) Installing collected packages: cligj, click-plugins, fiona Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.10.1
In [2]:
linkWorldMap="https://github.com/CienciaDeDatosEspacial/intro_geodataframe/raw/main/maps/worldMaps.gpkg"
import geopandas as gpd
from fiona import listlayers
listlayers(linkWorldMap)
Out[2]:
['indicators', 'countries', 'rivers', 'cities']
In [3]:
countries=gpd.read_file(linkWorldMap,layer='countries')
rivers=gpd.read_file(linkWorldMap,layer='rivers')
cities=gpd.read_file(linkWorldMap,layer='cities')
indicators=gpd.read_file(linkWorldMap,layer='indicators')
Subsetting¶
In [4]:
countries.head()
Out[4]:
COUNTRY | geometry | |
---|---|---|
0 | Aruba (Netherlands) | MULTIPOLYGON (((-69.88223 12.41111, -69.94695 ... |
1 | Antigua and Barbuda | MULTIPOLYGON (((-61.73889 17.54055, -61.75195 ... |
2 | Afghanistan | MULTIPOLYGON (((61.27656 35.60725, 61.29638 35... |
3 | Algeria | MULTIPOLYGON (((-5.15213 30.18047, -5.13917 30... |
4 | Azerbaijan | MULTIPOLYGON (((46.54037 38.87559, 46.49554 38... |
In [5]:
# as DF
countries.iloc[50:,]
Out[5]:
COUNTRY | geometry | |
---|---|---|
50 | Central African Republic | MULTIPOLYGON (((20.4533 4.52379, 20.45798 4.61... |
51 | Cuba | MULTIPOLYGON (((-76.94608 21.45221, -76.8839 2... |
52 | Cape Verde | MULTIPOLYGON (((-25.28139 16.91333, -25.29861 ... |
53 | Cook Islands (New Zealand) | MULTIPOLYGON (((-165.84167 -10.89084, -165.848... |
54 | Cyprus | MULTIPOLYGON (((33.27229 34.70955, 33.21722 34... |
... | ... | ... |
247 | South Sudan | MULTIPOLYGON (((34.21807 9.96458, 34.20722 9.9... |
248 | Indonesia | MULTIPOLYGON (((123.21846 -10.80917, 123.19832... |
249 | East Timor | MULTIPOLYGON (((124.41824 -9.3001, 124.40446 -... |
250 | Curacao (Netherlands) | MULTIPOLYGON (((-68.96556 12.19889, -68.91196 ... |
251 | Bonaire (Netherlands) | MULTIPOLYGON (((-68.19736 12.22264, -68.19292 ... |
202 rows × 2 columns
In [6]:
# as DF
countries.loc[50:,'geometry']
Out[6]:
geometry | |
---|---|
50 | MULTIPOLYGON (((20.4533 4.52379, 20.45798 4.61... |
51 | MULTIPOLYGON (((-76.94608 21.45221, -76.8839 2... |
52 | MULTIPOLYGON (((-25.28139 16.91333, -25.29861 ... |
53 | MULTIPOLYGON (((-165.84167 -10.89084, -165.848... |
54 | MULTIPOLYGON (((33.27229 34.70955, 33.21722 34... |
... | ... |
247 | MULTIPOLYGON (((34.21807 9.96458, 34.20722 9.9... |
248 | MULTIPOLYGON (((123.21846 -10.80917, 123.19832... |
249 | MULTIPOLYGON (((124.41824 -9.3001, 124.40446 -... |
250 | MULTIPOLYGON (((-68.96556 12.19889, -68.91196 ... |
251 | MULTIPOLYGON (((-68.19736 12.22264, -68.19292 ... |
202 rows × 1 columns
In [7]:
countries.total_bounds
Out[7]:
array([-180. , -90. , 180. , 83.62359619])
In [8]:
countries.cx[:0,:0]
Out[8]:
COUNTRY | geometry | |
---|---|---|
9 | American Samoa (US) | MULTIPOLYGON (((-170.7439 -14.37556, -170.7494... |
10 | Argentina | MULTIPOLYGON (((-71.01648 -36.47591, -70.98195... |
14 | Antarctica | MULTIPOLYGON (((-45.14528 -60.76611, -45.15639... |
24 | Bolivia | MULTIPOLYGON (((-62.19884 -20.47139, -62.26945... |
29 | Brazil | MULTIPOLYGON (((-70.62862 -9.94849, -70.62889 ... |
42 | Chile | MULTIPOLYGON (((-73.61806 -51.6339, -73.60494 ... |
47 | Colombia | MULTIPOLYGON (((-81.71306 12.49028, -81.72014 ... |
53 | Cook Islands (New Zealand) | MULTIPOLYGON (((-165.84167 -10.89084, -165.848... |
58 | Jarvis Island (US) | MULTIPOLYGON (((-160.02115 -0.39806, -160.0281... |
60 | Ecuador | MULTIPOLYGON (((-78.70903 -4.58479, -78.72348 ... |
71 | Fiji | MULTIPOLYGON (((180 -16.17274, 179.98621 -16.1... |
72 | Falkland Islands (UK) | MULTIPOLYGON (((-59.79139 -51.24945, -59.73195... |
75 | French Polynesia (France) | MULTIPOLYGON (((-140.17783 -8.95639, -140.1894... |
158 | Niue (New Zealand) | MULTIPOLYGON (((-169.89389 -19.14556, -169.930... |
169 | New Zealand | MULTIPOLYGON (((177.91779 -38.9428, 177.9097 -... |
170 | Paraguay | MULTIPOLYGON (((-57.67267 -25.2943, -57.70639 ... |
171 | Pitcairn Islands (UK) | MULTIPOLYGON (((-128.33221 -24.32727, -128.326... |
172 | Peru | MULTIPOLYGON (((-69.5675 -10.95056, -69.56844 ... |
196 | St. Helena (UK) | MULTIPOLYGON (((-5.71298 -15.99286, -5.72917 -... |
208 | South Georgia and the South Sandwich Is (UK) | MULTIPOLYGON (((-36.99139 -54.35056, -36.99973... |
216 | Tokelau (New Zealand) | MULTIPOLYGON (((-171.84805 -9.21889, -171.8588... |
217 | Tonga | MULTIPOLYGON (((-173.93921 -18.56889, -173.933... |
231 | Uruguay | MULTIPOLYGON (((-58.38889 -33.4225, -58.4159 -... |
239 | Wallis and Futuna (France) | MULTIPOLYGON (((-176.16504 -13.35305, -176.169... |
242 | Western Samoa | MULTIPOLYGON (((-172.5965 -13.50911, -172.5519... |
In [9]:
#then
countries.cx[:0,:0].plot()
Out[9]:
<Axes: >
Clipping¶
In [10]:
usa = countries[countries.COUNTRY == 'United States']
In [11]:
citiesUSA_clipped = gpd.clip(gdf=cities, mask=usa)
riversUSA_clipped = gpd.clip(gdf=rivers, mask=usa)
In [12]:
base = usa.plot(facecolor="greenyellow", edgecolor='black', linewidth=0.4, figsize=(5,5))
citiesUSA_clipped.plot(marker='+', color='red', markersize=15, ax=base)
riversUSA_clipped.plot(edgecolor='blue', linewidth=0.5, ax=base)
Out[12]:
<Axes: >
In [13]:
print(usa.crs)
print(cities.crs)
print(rivers.crs)
EPSG:4326 EPSG:4326 EPSG:4326
In [14]:
import matplotlib.pyplot as plt
# Crear figura
fig, ax = plt.subplots(figsize=(12, 8))
# Graficar capas
usa.plot(ax=ax, facecolor="greenyellow", edgecolor='black', linewidth=0.4)
riversUSA_clipped.plot(ax=ax, edgecolor='blue', linewidth=0.5)
citiesUSA_clipped.plot(ax=ax, marker='+', color='red', markersize=15)
# Ajustar límites para los 48 estados continentales
ax.set_xlim(-130, -65)
ax.set_ylim(23, 50)
# Ajustar aspecto para que no se distorsione
ax.set_aspect('auto')
# Añadir etiquetas y título
ax.set_title("Ciudades y Ríos en EE.UU.", fontsize=14)
ax.set_xlabel("Longitud")
ax.set_ylabel("Latitud")
plt.show()
In [15]:
set(usa.geom_type), set(citiesUSA_clipped.geom_type), set(riversUSA_clipped.geom_type)
Out[15]:
({'MultiPolygon'}, {'Point'}, {'LineString', 'MultiLineString'})
Exercise 1¶
You will make two maps:
- Plot your country as the background. Use cx to plot some points (the cities, airports, etc.) below the centroid.
- Plot your country as the background. Select with cx all the first administrative divisions above the centroid. Then, use clip to show some lines (rivers, railroads, etc) that cross those divisions.
1. Plot your country as background and use cx to plot points below the centroid
In [16]:
cities_clean = citiesUSA_clipped[citiesUSA_clipped.geometry.notnull()]
# Centroide
usa_centroid = usa.geometry.centroid.iloc[0]
# Paso 3: Filtrar las ciudades que están por debajo del centroide (más al sur)
cities_below = cities_clean[cities_clean.geometry.y < usa_centroid.y]
<ipython-input-16-1919301299>:4: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. usa_centroid = usa.geometry.centroid.iloc[0]
In [17]:
fig, ax = plt.subplots(figsize=(8, 6))
usa.plot(ax=ax, facecolor="lightgray", edgecolor="black")
cities_below.plot(ax=ax, color='red', markersize=10)
plt.title("Ciudades al sur del centroide de EE.UU.")
plt.show()
In [19]:
import geopandas as gpd
import matplotlib.pyplot as plt
# 1. Cargar los estados de EE.UU.
url = "https://www2.census.gov/geo/tiger/GENZ2022/shp/cb_2022_us_state_20m.zip"
states = gpd.read_file(url)
In [20]:
# 2. Filtrar los 48 estados continentales
excluir = ['Alaska', 'Hawaii', 'Puerto Rico']
states_mainland = states[~states['NAME'].isin(excluir)]
In [21]:
# 3. Unificar en un solo polígono
usa_48 = states_mainland.dissolve()
# 4. Calcular centroide
usa_centroid = usa_48.geometry.centroid.iloc[0]
<ipython-input-21-2712555821>:5: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. usa_centroid = usa_48.geometry.centroid.iloc[0]
In [22]:
print("Coordenadas del centroide:")
print("Longitud:", usa_centroid.x)
print("Latitud:", usa_centroid.y)
Coordenadas del centroide: Longitud: -99.34462193188665 Latitud: 39.38547395898724
In [23]:
cities_url = "https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_50m_populated_places_simple.geojson"
cities = gpd.read_file(cities_url)
In [24]:
cities = cities.to_crs(usa_48.crs)
citiesUSA_clipped = gpd.clip(cities, usa_48)
In [25]:
cities_below = citiesUSA_clipped[citiesUSA_clipped.geometry.y < usa_centroid.y]
In [26]:
fig, ax = plt.subplots(figsize=(12, 8))
usa_48.plot(ax=ax, facecolor="greenyellow", edgecolor="black", linewidth=0.4)
cities_below.plot(ax=ax, marker='+', color='red', markersize=15)
ax.set_title("Ciudades al sur del centroide de EE.UU.", fontsize=14)
ax.set_xlabel("Longitud")
ax.set_ylabel("Latitud")
plt.show()
2. Plot your country as the background. Select with cx all the first administrative divisions above the centroid. Then, use clip to show some lines (rivers, railroads, etc) that cross those divisions.
In [27]:
# Filtrar estados al norte del centroide
states_north = states_mainland[states_mainland.geometry.centroid.y > usa_centroid.y]
<ipython-input-27-2262528969>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. states_north = states_mainland[states_mainland.geometry.centroid.y > usa_centroid.y]
In [28]:
# Cargando ríos
rivers_url = "https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_50m_rivers_lake_centerlines.geojson"
rivers = gpd.read_file(rivers_url)
In [29]:
# Unificar CRS y recortar por los estados al norte
rivers = rivers.to_crs(states_north.crs)
rivers_north = gpd.clip(rivers, states_north)
In [30]:
# Graficar
fig, ax = plt.subplots(figsize=(12, 8))
usa_48.plot(ax=ax, facecolor="whitesmoke", edgecolor="black", linewidth=0.4)
states_north.plot(ax=ax, facecolor="lightblue", edgecolor="black", linewidth=0.6)
rivers_north.plot(ax=ax, edgecolor='blue', linewidth=0.5)
ax.set_xlim(-130, -65)
ax.set_ylim(23, 50)
ax.set_aspect("auto")
ax.set_title("Ríos que cruzan los estados al norte del centroide", fontsize=14)
ax.set_xlabel("Longitud")
ax.set_ylabel("Latitud")
plt.show()
Exercise 2¶
- Create some subset of polygons with your country data at the municipal (or similar level). Use Unary UNION with those polygons, and create a geoDF with the result.
- Dissolve your municipalities by another higher level administrative level. Plot the result.
- If possible, color some areas of your country by aggregating; if not, plot the "median" values in the indicators map.
In [38]:
import geopandas as gpd
import matplotlib.pyplot as plt
# Cargar estados continentales
url_states = "https://www2.census.gov/geo/tiger/GENZ2022/shp/cb_2022_us_state_20m.zip"
states = gpd.read_file(url_states)
states_mainland = states[~states['NAME'].isin(['Alaska', 'Hawaii', 'Puerto Rico'])]
# Disolver todos los estados en un solo polígono (EE.UU. continental)
usa_48 = states_mainland.dissolve()
# Calcular centroide
centroid = usa_48.geometry.centroid.iloc[0]
centroidX, centroidY = centroid.x, centroid.y
<ipython-input-38-668106148>:13: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. centroid = usa_48.geometry.centroid.iloc[0]
In [32]:
# Cargar ciudades del mundo
cities_url = "https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_50m_populated_places_simple.geojson"
cities = gpd.read_file(cities_url)
# Reproyectar al CRS de EE.UU.
cities = cities.to_crs(usa_48.crs)
# Recortar ciudades dentro del área de EE.UU. continental
cities_usa = gpd.clip(cities, usa_48)
In [39]:
# Subdividir ciudades por cuadrantes usando el centroide
cities_TL = cities_usa.cx[:centroidX, centroidY:] # Top Left
cities_TR = cities_usa.cx[centroidX:, centroidY:] # Top Right
cities_BL = cities_usa.cx[:centroidX, :centroidY] # Bottom Left
cities_BR = cities_usa.cx[centroidX:, :centroidY] # Bottom Right
In [40]:
# Dissolver antes de aplicar convex_hull
hull_TL = cities_TL.dissolve().convex_hull
hull_TR = cities_TR.dissolve().convex_hull
hull_BL = cities_BL.dissolve().convex_hull
hull_BR = cities_BR.dissolve().convex_hull
# Convertir a GeoDataFrames
hulls = gpd.GeoDataFrame(geometry=[hull_TL.iloc[0], hull_TR.iloc[0], hull_BL.iloc[0], hull_BR.iloc[0]],
crs=cities_usa.crs)
hulls["quadrant"] = ["Top Left", "Top Right", "Bottom Left", "Bottom Right"]
In [41]:
# Graficar mapa final
base = usa_48.plot(facecolor='white', edgecolor='black', figsize=(10, 8))
cities_TL.plot(ax=base, color='gray', markersize=5)
cities_TR.plot(ax=base, color='orange', markersize=5)
cities_BL.plot(ax=base, color='green', markersize=5)
cities_BR.plot(ax=base, color='red', markersize=5)
# Dibujar las convex hulls con hatch
hulls.plot(ax=base, facecolor='none', edgecolor='blue', linewidth=1.5, linestyle='--', hatch='///', alpha=0.5)
plt.title("Convex Hull por cuadrantes de ciudades en EE.UU.")
plt.xlabel("Longitud")
plt.ylabel("Latitud")
plt.show()
Exercise 3¶
Select some points from your maps.
Create the convex hull for those points.
Turn the hull into a GDF.
Plot the hull on top of the country.
In [42]:
import geopandas as gpd
import matplotlib.pyplot as plt
# Leer datos de estados y provincias del mundo
url = "https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_1_states_provinces.zip"
gdf = gpd.read_file(url)
# Filtrar solo USA continental (excluyendo Alaska, Hawaii y Puerto Rico)
states = gdf[(gdf['admin'] == 'United States of America') &
(~gdf['name'].isin(['Alaska', 'Hawaii', 'Puerto Rico']))].copy()
In [43]:
# Centroides para dividir en 4 regiones
centroidX = states.unary_union.centroid.x
centroidY = states.unary_union.centroid.y
# División geográfica
statesN = states.cx[:, centroidY:]
statesS = states.cx[:, :centroidY]
statesW = states.cx[:centroidX, :]
statesE = states.cx[centroidX:, :]
# Graficar las divisiones
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
# Norte vs Sur
base = statesN.plot(facecolor='yellow', edgecolor='black', linewidth=0.3, alpha=0.6, ax=ax[0])
statesS.plot(facecolor='gray', edgecolor='black', linewidth=0.3, alpha=0.4, ax=base)
ax[0].set_title("North (yellow) & South (gray)")
# Este vs Oeste
base = statesE.plot(facecolor='yellow', edgecolor='black', linewidth=0.3, alpha=0.6, ax=ax[1])
statesW.plot(facecolor='gray', edgecolor='black', linewidth=0.3, alpha=0.4, ax=base)
ax[1].set_title("East (yellow) & West (gray)")
plt.show()
<ipython-input-43-2259675516>:2: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead. centroidX = states.unary_union.centroid.x <ipython-input-43-2259675516>:3: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead. centroidY = states.unary_union.centroid.y
In [44]:
# INTERSECTION: zona común entre N-S y E-O
statesNS = statesN.overlay(statesS, how='intersection', keep_geom_type=True)
statesWE = statesW.overlay(statesE, how='intersection', keep_geom_type=True)
# UNION de ambas intersecciones
mid_union = statesNS.overlay(statesWE, how='union', keep_geom_type=True).dissolve()
mid_union['zone'] = 'middle'
In [45]:
# DIFFERENCE: lo que está en USA pero no en la zona central
diff = states.overlay(mid_union, how='difference', keep_geom_type=True)
# SYMMETRIC DIFFERENCE: lo que está fuera de la intersección de N-S y E-O
sym_diff = statesN.overlay(statesS, how='symmetric_difference', keep_geom_type=True)
# GRAFICAR RESULTADOS
fig, axs = plt.subplots(1, 3, figsize=(18, 5))
# Zona media (intersección)
mid_union.plot(ax=axs[0], facecolor='orange', edgecolor='black')
axs[0].set_title("Zona media de USA (N ∩ S ∩ E ∩ O)")
axs[0].axis('off')
# Diferencia
diff.plot(ax=axs[1], facecolor='lightblue', edgecolor='black')
axs[1].set_title("Diferencia: USA - zona media")
axs[1].axis('off')
# Simétrica
sym_diff.plot(ax=axs[2], facecolor='purple', edgecolor='white', alpha=0.6)
axs[2].set_title("Diferencia simétrica N vs S")
axs[2].axis('off')
plt.tight_layout()
plt.show()
Exercise 4¶
Apply two spatial overlays to your maps. If possible. If not, try the codes below.
In [64]:
states.columns
Out[64]:
Index(['featurecla', 'scalerank', 'adm1_code', 'diss_me', 'iso_3166_2', 'wikipedia', 'iso_a2', 'adm0_sr', 'name', 'name_alt', ... 'FCLASS_ID', 'FCLASS_PL', 'FCLASS_GR', 'FCLASS_IT', 'FCLASS_NL', 'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'FCLASS_TLC', 'geometry'], dtype='object', length=122)
In [65]:
import geopandas as gpd
import matplotlib.pyplot as plt
# Cargar datos
url = "https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_1_states_provinces.zip"
usa = gpd.read_file(url)
usa = usa[(usa['admin'] == 'United States of America') & (~usa['name'].isin(['Alaska', 'Hawaii', 'Puerto Rico']))]
In [66]:
usa['geometry'] = usa.geometry.buffer(0)
In [67]:
# Dividir por centroide
centroidX, centroidY = usa.unary_union.centroid.x, usa.unary_union.centroid.y
usa_N, usa_S = usa.cx[:, centroidY:], usa.cx[:, :centroidY]
usa_E, usa_W = usa.cx[centroidX:, :], usa.cx[:centroidX, :]
<ipython-input-67-2876454555>:2: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead. centroidX, centroidY = usa.unary_union.centroid.x, usa.unary_union.centroid.y
In [68]:
# Aplicar buffer(0) a las divisiones
for df in [usa_N, usa_S, usa_E, usa_W]:
df['geometry'] = df.geometry.buffer(0)
/usr/local/lib/python3.11/dist-packages/geopandas/geodataframe.py:1819: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
In [69]:
diff_NS = usa_N.overlay(usa_S, how='symmetric_difference', keep_geom_type=True)
diff_EW = usa_E.overlay(usa_W, how='symmetric_difference', keep_geom_type=True)
In [70]:
# Graficar
fig, axs = plt.subplots(1, 2, figsize=(16, 6))
diff_NS.plot(ax=axs[0], color='purple'); axs[0].set_title("Norte vs Sur"); axs[0].axis('off')
diff_EW.plot(ax=axs[1], color='orange'); axs[1].set_title("Este vs Oeste"); axs[1].axis('off')
plt.show()