Source code for hydromt.gis_utils

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""GIS related convenience functions."""
from __future__ import annotations

import glob
import logging
import os
from os.path import dirname
from typing import Optional, Tuple, Union

import geopandas as gpd
import numpy as np
import xarray as xr
from pyflwdir import gis_utils as gis
from pyogrio import read_info
from pyproj import CRS
from pyproj.transformer import Transformer
from rasterio.transform import Affine
from shapely.geometry import box
from shapely.geometry.base import BaseGeometry

from . import _compat

__all__ = ["spread2d", "nearest", "nearest_merge"]

logger = logging.getLogger(__name__)

_R = 6371e3  # Radius of earth in m. Use 3956e3 for miles
GDAL_DRIVER_CODE_MAP = {
    "asc": "AAIGrid",
    "blx": "BLX",
    "bmp": "BMP",
    "bt": "BT",
    "dat": "ZMap",
    "dem": "USGSDEM",
    "gen": "ADRG",
    "gif": "GIF",
    "gpkg": "GPKG",
    "grd": "NWT_GRD",
    "gsb": "NTv2",
    "gtx": "GTX",
    "hdr": "MFF",
    "hf2": "HF2",
    "hgt": "SRTMHGT",
    "img": "HFA",
    "jpg": "JPEG",
    "kro": "KRO",
    "lcp": "LCP",
    "mbtiles": "MBTiles",
    "mpr/mpl": "ILWIS",
    "ntf": "NITF",
    "pix": "PCIDSK",
    "png": "PNG",
    "pnm": "PNM",
    "rda": "R",
    "rgb": "SGI",
    "rst": "RST",
    "rsw": "RMF",
    "sdat": "SAGA",
    "sqlite": "Rasterlite",
    "ter": "Terragen",
    "tif": "GTiff",
    "vrt": "VRT",
    "xpm": "XPM",
    "xyz": "XYZ",
}
GDAL_EXT_CODE_MAP = {v: k for k, v in GDAL_DRIVER_CODE_MAP.items()}

GPD_TYPES = Union[gpd.GeoDataFrame, gpd.GeoSeries]
GEOM_TYPES = Union[GPD_TYPES, BaseGeometry]

## GEOM functions


[docs] def nearest_merge( gdf1: gpd.GeoDataFrame, gdf2: gpd.GeoDataFrame, columns: Optional[list] = None, max_dist: Optional[float] = None, overwrite: bool = False, inplace: bool = False, logger=logger, ) -> gpd.GeoDataFrame: """Merge attributes of gdf2 with the nearest feature of gdf1. Output is optionally bounded by a maximumum distance `max_dist`. Unless `overwrite = True`, gdf2 values are only merged where gdf1 has missing values. Parameters ---------- gdf1, gdf2: geopandas.GeoDataFrame Source `gdf1` and destination `gdf2` geometries. columns : list of str, optional Names of columns in `gdf2` to merge, by default None max_dist : float, optional Maximum distance threshold for merge, by default None, i.e.: no threshold. overwrite : bool, optional If False (default) gdf2 values are only merged where gdf1 has missing values, i.e. NaN values for existing columns or missing columns. inplace : bool, If True, apply the merge to gdf1, otherwise return a new object. logger: The logger to use. Returns ------- gpd.GeoDataFrame Merged GeoDataFrames """ idx_nn, dst = nearest(gdf1, gdf2) if not inplace: gdf1 = gdf1.copy() valid = dst < max_dist if max_dist is not None else np.ones_like(idx_nn, dtype=bool) columns = gdf2.columns if columns is None else columns gdf1["distance_right"] = dst gdf1["index_right"] = -1 gdf1.loc[valid, "index_right"] = idx_nn[valid] skip = ["geometry"] for col in columns: if col in skip or col not in gdf2: if col not in gdf2: logger.warning(f"Column {col} not found in gdf2 and skipped.") continue new_vals = gdf2.loc[idx_nn[valid], col].values if col in gdf1 and not overwrite: old_vals = gdf1.loc[valid, col] replace = np.logical_or(old_vals.isnull(), old_vals.eq("")) new_vals = np.where(replace, new_vals, old_vals) gdf1.loc[valid, col] = new_vals return gdf1
[docs] def nearest( gdf1: gpd.GeoDataFrame, gdf2: gpd.GeoDataFrame ) -> Tuple[np.ndarray, np.ndarray]: """Return the index of and distance [m] to the nearest geometry. For Line geometries in `gdf1` the nearest geometry is based line center point and for polygons on its representative point. Mixed geometry types are not yet supported. Note: Since geopandas v0.10.0 it contains a sjoin_nearest method which is very similar and should. Parameters ---------- gdf1, gdf2: geopandas.GeoDataFrame Source `gdf1` and destination `gdf2` geometries. Returns ------- index: ndarray index of nearest `gdf2` geometry dst: ndarray of float distance to the nearest `gdf2` geometry """ if np.all(gdf1.type == "Point"): pnts = gdf1.geometry.copy() elif np.all(np.isin(gdf1.type, ["LineString", "MultiLineString"])): pnts = gdf1.geometry.interpolate(0.5, normalized=True) # mid point elif np.all(np.isin(gdf1.type, ["Polygon", "MultiPolygon"])): pnts = gdf1.geometry.representative_point() # inside polygon else: raise NotImplementedError("Mixed geometry dataframes are not yet supported.") if gdf1.crs != gdf2.crs: pnts = pnts.to_crs(gdf2.crs) # find nearest # NOTE: requires shapely v2.0; changed in v0.6.1 if not _compat.HAS_SHAPELY20: raise ImportError("Shapely >= 2.0.0 is required for execution") other = pnts.geometry.values idx = gdf2.sindex.nearest(other, return_all=False)[1] # get distance in meters gdf2_nearest = gdf2.iloc[idx] if gdf2_nearest.crs.is_geographic: pnts = pnts.to_crs(3857) # web mercator gdf2_nearest = gdf2_nearest.to_crs(3857) dst = gdf2_nearest.distance(pnts, align=False).values return gdf2.index.values[idx], dst
[docs] def filter_gdf(gdf, geom=None, bbox=None, crs=None, predicate="intersects"): """Filter GeoDataFrame geometries based on geometry mask or bounding box.""" gtypes = (gpd.GeoDataFrame, gpd.GeoSeries, BaseGeometry) if bbox is not None and geom is None: if crs is None: crs = gdf.crs geom = gpd.GeoSeries([box(*bbox)], crs=crs) elif geom is not None and not isinstance(geom, gtypes): raise ValueError( f"Unknown geometry mask type {type(geom).__name__}. " "Provide geopandas GeoDataFrame, GeoSeries or shapely geometry." ) elif bbox is None and geom is None: raise ValueError("Either geom or bbox is required.") if not isinstance(geom, BaseGeometry): # reproject if geom.crs is None and gdf.crs is not None: geom = geom.set_crs(gdf.crs) elif gdf.crs is not None and geom.crs != gdf.crs: geom = geom.to_crs(gdf.crs) # convert geopandas to geometry geom = geom.unary_union idx = np.sort(gdf.sindex.query(geom, predicate=predicate)) return idx
def parse_geom_bbox_buffer(geom=None, bbox=None, buffer=0): """Parse geom or bbox to a (buffered) geometry. Arguments --------- geom : geopandas.GeoDataFrame/Series, optional A geometry defining the area of interest. bbox : array-like of floats, optional (xmin, ymin, xmax, ymax) bounding box of area of interest (in WGS84 coordinates). buffer : float, optional Buffer around the `bbox` or `geom` area of interest in meters. By default 0. Returns ------- geom: geometry the actual geometry """ if geom is None and bbox is not None: # convert bbox to geom with crs EPGS:4326 to apply buffer later geom = gpd.GeoDataFrame(geometry=[box(*bbox)], crs=4326) elif geom is None: raise ValueError("No geom or bbox provided.") if buffer > 0: # make sure geom is projected > buffer in meters! if geom.crs.is_geographic: geom = geom.to_crs(3857) geom = geom.buffer(buffer) return geom # REPROJ
[docs] def utm_crs(bbox): """Return wkt string of nearest UTM projects. Parameters ---------- bbox : array-like of floats (xmin, ymin, xmax, ymax) bounding box in latlon WGS84 (EPSG:4326) coordinates Returns ------- crs: pyproj.CRS CRS of UTM projection """ left, bottom, right, top = bbox x = (left + right) / 2 y = (top + bottom) / 2 kwargs = dict(zone=int(np.ceil((x + 180) / 6))) # BUGFIX hydroMT v0.3.5: south=False doesn't work only add south=True if y<0 if y < 0: kwargs.update(south=True) # BUGFIX hydroMT v0.4.6: add datum epsg = CRS(proj="utm", datum="WGS84", ellps="WGS84", **kwargs).to_epsg() return CRS.from_epsg(epsg)
[docs] def parse_crs(crs, bbox=None): if crs == "utm": if bbox is not None: crs = utm_crs(bbox) else: raise ValueError('CRS "utm" requires bbox') else: crs = CRS.from_user_input(crs) return crs
[docs] def axes_attrs(crs): """Provide CF-compliant variable names and metadata for axes. Parameters ---------- crs: pyproj.CRS coordinate reference system Returns ------- x_dim: str - variable name of x dimension (e.g. 'x') y_dim: str - variable name of y dimension (e.g. 'lat') x_attr: dict - attributes of variable x y_attr: dict - attributes of variable y """ # # check for type of crs if not isinstance(crs, CRS): crs = CRS.from_user_input(crs) if crs.is_geographic: x_dim, y_dim = "longitude", "latitude" else: x_dim, y_dim = "x", "y" cf_coords = crs.cs_to_cf() x_attrs = [c for c in cf_coords if c["axis"] == "X"][0] y_attrs = [c for c in cf_coords if c["axis"] == "Y"][0] return x_dim, y_dim, x_attrs, y_attrs
[docs] def meridian_offset(ds, bbox=None): """Shift data along the x-axis of global datasets to avoid issues along the 180 meridian. Without a bbox the data is shifted to span 180W to 180E. With bbox the data is shifted to at least span the bbox west to east, also if the bbox crosses the 180 meridian. Note that this method is only applicable to data that spans 360 degrees longitude and is set in a global geographic CRS (WGS84). Parameters ---------- ds: xarray.Dataset input dataset bbox: tuple of float bounding box (west, south, east, north) in degrees Returns ------- ds: xarray.Dataset dataset with x dim re-arranged if needed """ w, _, e, _ = ds.raster.bounds if ( ds.raster.crs is None or ds.raster.crs.is_projected or not np.isclose(e - w, 360) # grid should span 360 degrees! ): raise ValueError( "This method is only applicable to data that spans 360 degrees " "longitude and is set in a global geographic CRS" ) x_name = ds.raster.x_dim lons = np.copy(ds[x_name].values) if bbox is not None: # bbox west and east bbox_w, bbox_e = bbox[0], bbox[2] else: # global west and east in case of no bbox bbox_w, bbox_e = -180, 180 if bbox_w < w: # shift lons east of x0 by 360 degrees west x0 = 180 if bbox_w >= -180 else 0 lons = np.where(lons > max(bbox_e, x0), lons - 360, lons) elif bbox_e > e: # shift lons west of x0 by 360 degrees east x0 = -180 if bbox_e <= 180 else 0 lons = np.where(lons < min(bbox_w, x0), lons + 360, lons) else: return ds ds = ds.copy(deep=False) # make sure not to overwrite original ds ds[x_name] = xr.Variable(ds[x_name].dims, lons) return ds.sortby(x_name)
# TRANSFORM
[docs] def affine_to_coords(transform, shape, x_dim="x", y_dim="y"): """Return a raster axis with pixel center coordinates based on the transform. Parameters ---------- transform : affine transform Two dimensional affine transform for 2D linear mapping shape : tuple of int The height, width of the raster. x_dim, y_dim: str The name of the x and y dimensions Returns ------- x, y coordinate arrays : dict of tuple with dims and coords """ if not isinstance(transform, Affine): transform = Affine(*transform) height, width = shape if np.isclose(transform.b, 0) and np.isclose(transform.d, 0): x_coords, _ = transform * (np.arange(width) + 0.5, np.zeros(width) + 0.5) _, y_coords = transform * (np.zeros(height) + 0.5, np.arange(height) + 0.5) coords = { y_dim: (y_dim, y_coords), x_dim: (x_dim, x_coords), } else: x_coords, y_coords = ( transform * transform.translation(0.5, 0.5) * np.meshgrid(np.arange(width), np.arange(height)) ) coords = { "yc": ((y_dim, x_dim), y_coords), "xc": ((y_dim, x_dim), x_coords), } return coords
def affine_to_meshgrid(transform, shape): """Return a meshgrid of pixel center coordinates based on the transform. Parameters ---------- transform : affine transform Two dimensional affine transform for 2D linear mapping shape : tuple of int The height, width of the raster. Returns ------- x_coords, y_coords: ndarray 2D arrays of x and y coordinates """ if not isinstance(transform, Affine): transform = Affine(*transform) height, width = shape x_coords, y_coords = ( transform * transform.translation(0.5, 0.5) * np.meshgrid(np.arange(width), np.arange(height)) ) return x_coords, y_coords ## CELLAREAS
[docs] def reggrid_area(lats, lons): """Return the cell area [m2] for a regular grid based on its cell centres lat, lon.""" # noqa: E501 xres = np.abs(np.mean(np.diff(lons))) yres = np.abs(np.mean(np.diff(lats))) area = np.ones((lats.size, lons.size), dtype=lats.dtype) return cellarea(lats, xres, yres)[:, None] * area
[docs] def cellarea(lat, xres=1.0, yres=1.0): """Return the area [m2] of cell based on its center latitude and resolution in degrees. Resolution is in measured degrees. """ # noqa: E501 l1 = np.radians(lat - np.abs(yres) / 2.0) l2 = np.radians(lat + np.abs(yres) / 2.0) dx = np.radians(np.abs(xres)) return _R**2 * dx * (np.sin(l2) - np.sin(l1))
[docs] def cellres(lat, xres=1.0, yres=1.0): """Return the cell (x, y) resolution [m]. Based on cell center latitude and its resolution measured in degrees. """ m1 = 111132.92 # latitude calculation term 1 m2 = -559.82 # latitude calculation term 2 m3 = 1.175 # latitude calculation term 3 m4 = -0.0023 # latitude calculation term 4 p1 = 111412.84 # longitude calculation term 1 p2 = -93.5 # longitude calculation term 2 p3 = 0.118 # longitude calculation term 3 radlat = np.radians(lat) # numpy cos work in radians! # Calculate the length of a degree of latitude and longitude in meters dy = ( m1 + (m2 * np.cos(2.0 * radlat)) + (m3 * np.cos(4.0 * radlat)) + (m4 * np.cos(6.0 * radlat)) ) dx = ( (p1 * np.cos(radlat)) + (p2 * np.cos(3.0 * radlat)) + (p3 * np.cos(5.0 * radlat)) ) return dx * xres, dy * yres
## SPREAD
[docs] def spread2d( da_obs: xr.DataArray, da_mask: Optional[xr.DataArray] = None, da_friction: Optional[xr.DataArray] = None, nodata: Optional[float] = None, ) -> xr.Dataset: """Return values of `da_obs` spreaded to cells with `nodata` value within `da_mask`. powered by :py:meth:`pyflwdir.gis_utils.spread2d`. Parameters ---------- da_obs : xarray.DataArray Input raster with observation values and background/nodata values which are filled by the spreading algorithm. da_mask : xarray.DataArray, optional Mask of cells to fill with the spreading algorithm, by default None da_friction : xarray.DataArray, optional Friction values used by the spreading algorithm to calcuate the friction distance, by default None nodata : float, optional Nodata or background value. Must be finite numeric value. If not given the raster nodata value is used. Returns ------- ds_out: xarray.Dataset Dataset with spreaded source values, linear index of the source cell "source_idx" and friction distance to the source cell "source_dst". """ nodata = da_obs.raster.nodata if nodata is None else nodata if nodata is None or np.isnan(nodata): raise ValueError(f'"nodata" must be a finite value, not {nodata}') msk, frc = None, None if da_mask is not None: assert da_obs.raster.identical_grid(da_mask) msk = da_mask.values if da_friction is not None: assert da_obs.raster.identical_grid(da_friction) frc = da_friction.values out, src, dst = gis.spread2d( obs=da_obs.values, msk=msk, frc=frc, nodata=nodata, latlon=da_obs.raster.crs.is_geographic, transform=da_obs.raster.transform, ) # combine outputs and return as dataset dims = da_obs.raster.dims coords = da_obs.raster.coords name = da_obs.name if da_obs.name else "source_value" da_out = xr.DataArray(dims=dims, coords=coords, data=out, name=name) da_out.raster.attrs.update(**da_obs.attrs) # keep attrs incl nodata and unit da_src = xr.DataArray(dims=dims, coords=coords, data=src, name="source_idx") da_src.raster.set_nodata(-1) da_dst = xr.DataArray(dims=dims, coords=coords, data=dst, name="source_dst") da_dst.raster.set_nodata(-1) da_dst.attrs.update(unit="m") ds_out = xr.merge([da_out, da_src, da_dst]) ds_out.raster.set_crs(da_obs.raster.crs) return ds_out
[docs] def create_vrt( vrt_path: str, files: list = None, files_path: str = None, ): r"""Create a .vrt file from a list op raster datasets. Either a list of files (`files`) or a path containing wildcards (`files_path`) to infer the list of files is required. Parameters ---------- vrt_path : str Path of the output vrt files : list, optional List of raster datasets filenames, by default None files_path : str, optional Unix style path containing a pattern using wildcards (*) n.b. this is without an extension e.g. c:\\temp\\*\\*.tif for all tif files in subfolders of 'c:\temp' """ if files is None and files_path is None: raise ValueError("Either 'files' or 'files_path' is required") if not _compat.HAS_RIO_VRT: raise ImportError( "rio-vrt is required for execution, install with 'pip install rio-vrt'" ) import rio_vrt if files is None and files_path is not None: files = glob.glob(files_path) if len(files) == 0: raise IOError(f"No files found at {files_path}") outdir = dirname(vrt_path) if not os.path.isdir(outdir): os.makedirs(outdir) rio_vrt.build_vrt(vrt_path, files=files, relative=True) return None
def to_geographic_bbox(bbox, source_crs): target_crs = CRS.from_user_input(4326) if source_crs is None: logger.warning("No CRS was set. Skipping CRS conversion") elif source_crs != target_crs: bbox = Transformer.from_crs(source_crs, target_crs).transform_bounds(*bbox) return bbox def bbox_from_file_and_filters( fn: str, bbox: Union[GEOM_TYPES, None] = None, mask: GEOM_TYPES | None = None, crs: CRS | None = None, ) -> Tuple[float, float, float, float] | None: """Create a bbox from the file metadata and filter options. Pyogrio does not accept a mask, and requires a bbox in the same CRS as the data. This function takes the possible bbox filter, mask filter and crs of the input data and returns a bbox in the same crs as the data based on the input filters. As pyogrio currently does not support filtering using a mask, the mask is converted to a bbox and the bbox is returned so that the data has some geospatial filtering. Parameters ---------- fn: str, uri to the filename. bbox: GeoDataFrame | GeoSeries | BaseGeometry bounding box to filter the data while reading mask: GeoDataFrame | GeoSeries | BaseGeometry mask to filter the data while reading crs: pyproj.CRS coordinate reference system of the bounding box or geometry. If already set, this argument is ignored. """ if bbox is not None and mask is not None: raise ValueError( "Both 'bbox' and 'mask' are provided. Please provide only one." ) if bbox is None and mask is None: return None if source_crs_str := read_info(fn).get("crs"): source_crs = CRS(source_crs_str) elif crs: source_crs = crs else: # assume WGS84 source_crs = CRS("EPSG:4326") if mask is not None: bbox = mask # convert bbox to geom with input crs (assume WGS84 if not provided) crs = crs if crs is not None else CRS.from_user_input(4326) if issubclass(type(bbox), BaseGeometry): bbox = gpd.GeoSeries(bbox, crs=crs) bbox = bbox if bbox.crs is not None else bbox.set_crs(crs) return tuple(bbox.to_crs(source_crs).total_bounds)