In [ ]:
Copied!
import leafmap
import polars as pl
import geopandas as gpd
from shapely.geometry import Point, Polygon, box
from shapely import wkb
import leafmap
import polars as pl
import geopandas as gpd
from shapely.geometry import Point, Polygon, box
from shapely import wkb
Example 1: Simple Point Data¶
Create a Polars DataFrame with point geometries using WKT (Well-Known Text)
In [ ]:
Copied!
# Create sample data with WKT geometries
data = {
"city": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"],
"population": [8336817, 3979576, 2693976, 2320268, 1680992],
"geometry": [
"POINT(-74.0060 40.7128)", # New York
"POINT(-118.2437 34.0522)", # Los Angeles
"POINT(-87.6298 41.8781)", # Chicago
"POINT(-95.3698 29.7604)", # Houston
"POINT(-112.0740 33.4484)", # Phoenix
],
}
df_polars = pl.DataFrame(data)
print(df_polars)
# Create sample data with WKT geometries
data = {
"city": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"],
"population": [8336817, 3979576, 2693976, 2320268, 1680992],
"geometry": [
"POINT(-74.0060 40.7128)", # New York
"POINT(-118.2437 34.0522)", # Los Angeles
"POINT(-87.6298 41.8781)", # Chicago
"POINT(-95.3698 29.7604)", # Houston
"POINT(-112.0740 33.4484)", # Phoenix
],
}
df_polars = pl.DataFrame(data)
print(df_polars)
In [ ]:
Copied!
# Visualize with leafmap
m = leafmap.Map(center=[37.0902, -95.7129], zoom=4)
m.add_polars(
df_polars,
geometry="geometry",
crs="EPSG:4326",
layer_name="US Cities",
zoom_to_layer=True,
)
m
# Visualize with leafmap
m = leafmap.Map(center=[37.0902, -95.7129], zoom=4)
m.add_polars(
df_polars,
geometry="geometry",
crs="EPSG:4326",
layer_name="US Cities",
zoom_to_layer=True,
)
m
Example 2: Converting from GeoDataFrame to Polars¶
Read GeoJSON/GeoParquet and work with it in Polars
In [ ]:
Copied!
# Start with a GeoDataFrame
url = "https://github.com/opengeos/datasets/releases/download/vector/cables.geojson"
gdf = gpd.read_file(url)
# Convert to Polars with WKB geometry
from shapely import wkb
# Convert geometries to WKB bytes
gdf["geometry_wkb"] = gdf["geometry"].apply(lambda x: wkb.dumps(x))
df_polars = pl.DataFrame(gdf.drop(columns=["geometry"]))
print(df_polars.head())
# Start with a GeoDataFrame
url = "https://github.com/opengeos/datasets/releases/download/vector/cables.geojson"
gdf = gpd.read_file(url)
# Convert to Polars with WKB geometry
from shapely import wkb
# Convert geometries to WKB bytes
gdf["geometry_wkb"] = gdf["geometry"].apply(lambda x: wkb.dumps(x))
df_polars = pl.DataFrame(gdf.drop(columns=["geometry"]))
print(df_polars.head())
In [ ]:
Copied!
# Visualize the Polars DataFrame
m = leafmap.Map()
m.add_polars(
df_polars,
geometry="geometry_wkb",
crs="EPSG:4326",
layer_name="Submarine Cables",
zoom_to_layer=True,
style={"color": "blue", "weight": 2},
)
m
# Visualize the Polars DataFrame
m = leafmap.Map()
m.add_polars(
df_polars,
geometry="geometry_wkb",
crs="EPSG:4326",
layer_name="Submarine Cables",
zoom_to_layer=True,
style={"color": "blue", "weight": 2},
)
m
Example 3: Polars Data Processing Pipeline¶
Demonstrate Polars-first workflow with filtering and aggregation
In [ ]:
Copied!
from shapely.geometry import box
import numpy as np
# Create sample polygon data
# Create grid of rectangles
geometries = []
names = []
values = []
for i in range(-5, 5):
for j in range(-5, 5):
geometries.append(wkb.dumps(box(i, j, i + 0.8, j + 0.8)))
names.append(f"Cell_{i}_{j}")
values.append(np.random.randint(0, 100))
df_grid = pl.DataFrame({"name": names, "value": values, "geometry": geometries})
print(df_grid.head())
print(f"\nTotal cells: {len(df_grid)}")
from shapely.geometry import box
import numpy as np
# Create sample polygon data
# Create grid of rectangles
geometries = []
names = []
values = []
for i in range(-5, 5):
for j in range(-5, 5):
geometries.append(wkb.dumps(box(i, j, i + 0.8, j + 0.8)))
names.append(f"Cell_{i}_{j}")
values.append(np.random.randint(0, 100))
df_grid = pl.DataFrame({"name": names, "value": values, "geometry": geometries})
print(df_grid.head())
print(f"\nTotal cells: {len(df_grid)}")
In [ ]:
Copied!
# Filter high-value cells using Polars
df_filtered = df_grid.filter(pl.col("value") > 75)
print(f"High-value cells: {len(df_filtered)}")
# Visualize
m = leafmap.Map(center=[0, 0], zoom=6)
m.add_polars(
df_filtered,
geometry="geometry",
crs="EPSG:4326",
layer_name="High Value Cells",
zoom_to_layer=True,
style={"fillColor": "red", "fillOpacity": 0.5, "color": "darkred"},
)
m
# Filter high-value cells using Polars
df_filtered = df_grid.filter(pl.col("value") > 75)
print(f"High-value cells: {len(df_filtered)}")
# Visualize
m = leafmap.Map(center=[0, 0], zoom=6)
m.add_polars(
df_filtered,
geometry="geometry",
crs="EPSG:4326",
layer_name="High Value Cells",
zoom_to_layer=True,
style={"fillColor": "red", "fillOpacity": 0.5, "color": "darkred"},
)
m
Benefits of Polars Integration¶
- Performance: Polars is significantly faster than Pandas for large datasets
- Memory Efficiency: Better memory management with Apache Arrow backend
- Native Pipeline: Stay in Polars-native workflow end-to-end
- Modern API: Expressive and intuitive query syntax
- GeoParquet Support: Native support for reading/writing GeoParquet files