Source code for hydromt.data_catalog.drivers.geodataframe.pyogrio_driver

"""Driver to read geodataframes using Pyogrio."""

import logging
from os.path import splitext
from pathlib import Path
from typing import Any, ClassVar

import geopandas as gpd
import pandas as pd
from pyogrio import read_dataframe, read_info, write_dataframe
from pyproj import CRS

from hydromt._utils.unused_kwargs import _warn_on_unused_kwargs
from hydromt.data_catalog.drivers.geodataframe.geodataframe_driver import (
    GeoDataFrameDriver,
)
from hydromt.error import NoDataStrategy, exec_nodata_strat
from hydromt.typing import Bbox, Geom, SourceMetadata

logger = logging.getLogger(__name__)


[docs] class PyogrioDriver(GeoDataFrameDriver): """ Driver for GeoDataFrame using the pyogrio library: ``pyogrio``. Supports reading and writing files supported by the OGR library, including geopackage, shapefile, geojson and flatgeobuf. """ name: ClassVar[str] = "pyogrio" supports_writing: ClassVar[bool] = True SUPPORTED_EXTENSIONS: ClassVar[set[str]] = {".gpkg", ".shp", ".geojson", ".fgb"}
[docs] def read( self, uris: list[str], *, handle_nodata: NoDataStrategy = NoDataStrategy.RAISE, metadata: SourceMetadata | None = None, mask: Any = None, variables: str | list[str] | None = None, ) -> gpd.GeoDataFrame: """ Read geospatial data using the pyogrio library into a GeoDataFrame. Supports formats such as GeoPackage, Shapefile, GeoJSON, and FlatGeobuf. Optionally applies spatial filtering through a bounding box derived from a provided mask. Parameters ---------- uris : list[str] List of URIs to read data from. Only one file is supported per read operation. handle_nodata : NoDataStrategy, optional Strategy to handle missing or empty data. Default is NoDataStrategy.RAISE. metadata : SourceMetadata | None, optional Optional metadata object describing the dataset source (e.g. CRS). mask : Any, optional Optional geometry or GeoDataFrame used to spatially filter the data while reading. variables : str | list[str] | None, optional Optional list of columns to load from the dataset. Returns ------- gpd.GeoDataFrame The loaded geospatial data. Raises ------ ValueError If multiple URIs are provided. IOError If the source file contains no geometry column. Warning ------- The `metadata` parameter is not used directly in this driver, but is included for consistency with the GeoDataFrameDriver interface. """ _warn_on_unused_kwargs(self.__class__.__name__, {"metadata": metadata}) if len(uris) > 1: raise ValueError( "DataFrame: Reading multiple files with the " f"{self.__class__.__name__} driver is not supported." ) _uri = uris[0] if mask is not None: bbox = _bbox_from_file_and_mask( _uri, mask=mask, **self.options.get_kwargs() ) else: bbox = None gdf: pd.DataFrame | gpd.GeoDataFrame = read_dataframe( _uri, bbox=bbox, columns=variables, **self.options.get_kwargs() ) if not isinstance(gdf, gpd.GeoDataFrame): raise IOError(f"DataFrame from uri: '{_uri}' contains no geometry column.") if gdf.index.size == 0: exec_nodata_strat( f"No data from {self.name} driver for file uris: {', '.join(uris)}.", strategy=handle_nodata, ) return None # handle_nodata == ignore return gdf
[docs] def write( self, path: Path | str, data: gpd.GeoDataFrame, *, write_kwargs: dict[str, Any] | None = None, ) -> Path: """ Write a GeoDataFrame to disk using the pyogrio library. Supports writing to vector formats supported by the OGR library, including GeoPackage (`.gpkg`), Shapefile (`.shp`), GeoJSON (`.geojson`), and FlatGeobuf (`.fgb`). The file format is inferred from the file extension. If the extension is unsupported, it falls back to FlatGeobuf (`.fgb`). Parameters ---------- path : Path | str Destination path or URI where the GeoDataFrame will be written. Supported extensions are `.gpkg`, `.shp`, `.geojson`, and `.fgb`. data : gpd.GeoDataFrame The GeoDataFrame to write. write_kwargs : dict[str, Any], optional Additional keyword arguments passed to `pyogrio.write_dataframe`. Default is None. Returns ------- Path The path where the GeoDataFrame was written. Raises ------ ValueError If the file extension cannot be determined or writing fails. """ no_ext, ext = splitext(path) write_kwargs = write_kwargs or {} if ext not in self.SUPPORTED_EXTENSIONS: logger.warning( f"driver {self.name} has no support for extension {ext}" "switching to .fgb." ) path = no_ext + ".fgb" write_dataframe(data, path, **write_kwargs) return Path(path)
def _bbox_from_file_and_mask( uri: str, mask: Geom, **kwargs, ) -> Bbox | None: """Create a bbox from the file metadata and mask given. Pyogrio's mask or bbox arguments require a mask or bbox in the same CRS as the data. This function takes the mask filter and crs of the input data and returns a bbox in the same crs as the data based on the input filters. Parameters ---------- uri: str, URI of the data. mask: GeoDataFrame | GeoSeries | BaseGeometry mask to filter the data while reading. """ source_crs = None if source_crs_str := read_info(uri, **kwargs).get("crs"): source_crs = CRS.from_user_input(source_crs_str) if not source_crs: logger.warning( f"Reading from uri: '{uri}' without CRS definition. Filtering with crs:" f" {mask.crs}, cannot compare crs." ) elif mask.crs != source_crs: mask = mask.to_crs(source_crs) return tuple(mask.total_bounds)