"""Grid Component."""
from logging import Logger, getLogger
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
from affine import Affine
from pyproj import CRS
from shapely.geometry import box
from hydromt._io.readers import _read_nc
from hydromt._io.writers import _write_nc
from hydromt._typing.error import NoDataStrategy, exec_nodata_strat
from hydromt._typing.type_def import DeferedFileClose, Number
from hydromt.model.components.base import ModelComponent
from hydromt.model.components.spatial import SpatialModelComponent
from hydromt.model.processes.grid import (
create_grid_from_region,
grid_from_constant,
grid_from_geodataframe,
grid_from_raster_reclass,
grid_from_rasterdataset,
)
from hydromt.model.steps import hydromt_step
if TYPE_CHECKING:
from hydromt.model.model import Model
__all__ = ["GridComponent"]
logger: Logger = getLogger(__name__)
[docs]
class GridComponent(SpatialModelComponent):
"""ModelComponent class for grid components.
This class is used for setting, creating, writing, and reading regular grid data for a
HydroMT model. The grid component data stored in the ``data`` property of this class is of the
hydromt.gis.raster.RasterDataset type which is an extension of xarray.Dataset for regular grid.
"""
[docs]
def __init__(
self,
model: "Model",
*,
filename: str = "grid/grid.nc",
region_component: Optional[str] = None,
region_filename: str = "grid/grid_region.geojson",
):
"""
Initialize a GridComponent.
Parameters
----------
model: Model
HydroMT model instance
filename: str
The path to use for reading and writing of component data by default.
By default "grid/grid.nc".
region_component: str, optional
The name of the region component to use as reference for this component's region.
If None, the region will be set to the grid extent. Note that the create
method only works if the region_component is None. For add_data_from_*
methods, the other region_component should be a reference to another
grid component for correct reprojection.
region_filename: str
The path to use for reading and writing of the region data by default.
By default "grid/grid_region.geojson".
"""
# region_component referencing is not possible for grids. The region should be passed via create().
super().__init__(
model=model,
region_component=region_component,
region_filename=region_filename,
)
self._data: Optional[xr.Dataset] = None
self._filename: str = filename
[docs]
def set(
self,
data: Union[xr.DataArray, xr.Dataset, np.ndarray],
name: Optional[str] = None,
):
"""Add data to grid.
All layers of grid must have identical spatial coordinates.
Parameters
----------
data: xarray.DataArray or xarray.Dataset
new map layer to add to grid
name: str, optional
Name of new map layer, this is used to overwrite the name of a DataArray
and ignored if data is a Dataset
"""
self._initialize_grid()
assert self._data is not None
name_required = isinstance(data, np.ndarray) or (
isinstance(data, xr.DataArray) and data.name is None
)
if name is None and name_required:
raise ValueError(f"Unable to set {type(data).__name__} data without a name")
if isinstance(data, np.ndarray):
if data.shape != self._data.raster.shape:
raise ValueError("Shape of data and grid maps do not match")
data = xr.DataArray(dims=self._data.raster.dims, data=data, name=name)
elif isinstance(data, xr.DataArray):
if name is not None:
data.name = name
data = data.to_dataset()
elif not isinstance(data, xr.Dataset):
raise ValueError(f"cannot set data of type {type(data).__name__}")
if len(self._data) == 0: # empty grid
self._data = data
else:
for dvar in data.data_vars:
if dvar in self._data:
if self.root.is_reading_mode():
logger.warning(f"Replacing grid map: {dvar}")
self._data[dvar] = data[dvar]
[docs]
@hydromt_step
def write(
self,
filename: Optional[str] = None,
*,
gdal_compliant: bool = False,
rename_dims: bool = False,
force_sn: bool = False,
region_options: Optional[Dict[str, Any]] = None,
**kwargs,
) -> Optional[DeferedFileClose]:
"""Write model grid data to netcdf file at <root>/<fn>.
key-word arguments are passed to :py:meth:`~hydromt.model.Model.write_nc`
Parameters
----------
filename : str, optional
filename relative to model root, by default 'grid/grid.nc'
gdal_compliant : bool, optional
If True, write grid data in a way that is compatible with GDAL,
by default False
rename_dims: bool, optional
If True and gdal_compliant, rename x_dim and y_dim to standard names
depending on the CRS (x/y for projected and lat/lon for geographic).
force_sn: bool, optional
If True and gdal_compliant, forces the dataset to have
South -> North orientation.
region_options : dict, optional
Options to pass to the write_region method.
Can contain `filename`, `to_wgs84`, and anything that will be passed to `GeoDataFrame.to_file`.
If `filename` is not provided, self.region_filename will be used.
**kwargs : dict
Additional keyword arguments to be passed to the `write_nc` method.
"""
self.root._assert_write_mode()
region_options = region_options or {}
self.write_region(**region_options)
if len(self.data) == 0:
exec_nodata_strat(
msg="No grid data found, skip writing.",
strategy=NoDataStrategy.WARN,
)
return None
self.write_region()
# write_nc requires dict - use dummy 'grid' key
return _write_nc(
{"grid": self.data},
filename or self._filename,
root=self.model.root.path,
gdal_compliant=gdal_compliant,
rename_dims=rename_dims,
force_overwrite=self.root.mode.is_override_mode(),
force_sn=force_sn,
**kwargs,
)
[docs]
@hydromt_step
def read(
self,
filename: Optional[str] = None,
*,
mask_and_scale: bool = False,
**kwargs,
) -> None:
"""Read model grid data at <root>/<fn> and add to grid property.
key-word arguments are passed to :py:meth:`~hydromt.model.Model.read_nc`
Parameters
----------
filename : str, optional
filename relative to model root, by default 'grid/grid.nc'
mask_and_scale : bool, optional
If True, replace array values equal to _FillValue with NA and scale values
according to the formula original_values * scale_factor + add_offset, where
_FillValue, scale_factor and add_offset are taken from variable attributes
(if they exist).
**kwargs : dict
Additional keyword arguments to be passed to the `read_nc` method.
"""
self.root._assert_read_mode()
self._initialize_grid(skip_read=True)
# Load grid data in r+ mode to allow overwriting netcdf files
if self.root.is_reading_mode() and self.root.is_writing_mode():
kwargs["load"] = True
loaded_nc_files = _read_nc(
filename or self._filename,
self.root.path,
single_var_as_array=False,
mask_and_scale=mask_and_scale,
**kwargs,
)
for ds in loaded_nc_files.values():
self.set(ds)
[docs]
@hydromt_step
def create_from_region(
self,
region: Dict[str, Any],
*,
res: Optional[Number] = None,
crs: Optional[int] = None,
region_crs: int = 4326,
rotated: bool = False,
hydrography_path: Optional[str] = None,
basin_index_path: Optional[str] = None,
add_mask: bool = True,
align: bool = True,
dec_origin: int = 0,
dec_rotation: int = 3,
) -> xr.DataArray:
"""HYDROMT CORE METHOD: Create a 2D regular grid or reads an existing grid.
A 2D regular grid will be created from a geometry (geom_fn) or bbox. If an
existing grid is given, then no new grid will be generated.
Adds/Updates model layers (if add_mask):
* **mask** grid mask: add grid mask to grid object
Parameters
----------
region : dict
Dictionary describing region of interest, e.g.:
* {'bbox': [xmin, ymin, xmax, ymax]}
* {'geom': 'path/to/polygon_geometry'}
* {'grid': 'path/to/grid_file'}
* {'basin': [x, y]}
Region must be of kind [grid, bbox, geom, basin, subbasin, interbasin].
res: float or int, optional
Resolution used to generate 2D grid [unit of the CRS], required if region
is not based on 'grid'.
crs : int, optional
EPSG code of the grid to create.
region_crs : int, optional
EPSG code of the region geometry, by default None. Only applies if region is
of kind 'bbox'or if geom crs is not defined in the file itself.
rotated : bool
if True, a minimum rotated rectangular grid is fitted around the region,
by default False. Only applies if region is of kind 'bbox', 'geom'
hydrography_fn : str, optional
Name of data source for hydrography data. Required if region is of kind
'basin', 'subbasin' or 'interbasin'.
* Required variables: ['flwdir'] and any other 'snapping' variable required
to define the region.
* Optional variables: ['basins'] if the `region` is based on a
(sub)(inter)basins without a 'bounds' argument.
basin_index_path: str, optional
Name of data source with basin (bounding box) geometries associated with
the 'basins' layer of `hydrography_fn`. Only required if the `region` is
based on a (sub)(inter)basins without a 'bounds' argument.
add_mask : bool
Add mask variable to grid object, by default True.
align : bool
If True (default), align target transform to resolution.
dec_origin : int, optional
number of decimals to round the origin coordinates, by default 0
dec_rotation : int, optional
number of decimals to round the rotation angle, by default 3
Returns
-------
grid : xr.DataArray
Generated grid mask.
"""
logger.info("Preparing 2D grid.")
# Check if this component's region is a reference to another component
if self._region_component is not None:
raise ValueError(
"Region is a reference to another component. Cannot create grid."
)
grid = create_grid_from_region(
region,
data_catalog=self.data_catalog,
res=res,
crs=crs,
region_crs=region_crs,
rotated=rotated,
hydrography_path=hydrography_path,
basin_index_path=basin_index_path,
add_mask=add_mask,
align=align,
dec_origin=dec_origin,
dec_rotation=dec_rotation,
)
self.set(grid)
return grid
@property
def res(self) -> Optional[Tuple[float, float]]:
"""Returns the resolution of the model grid."""
if len(self.data) > 0:
return self.data.raster.res
exec_nodata_strat(
msg="No grid data found for deriving resolution",
strategy=NoDataStrategy.WARN,
)
return None
@property
def transform(self) -> Optional[Affine]:
"""Returns spatial transform of the model grid."""
if len(self.data) > 0:
return self.data.raster.transform
exec_nodata_strat(
msg="No grid data found for deriving transform",
strategy=NoDataStrategy.WARN,
)
return None
@property
def crs(self) -> Optional[CRS]:
"""Returns coordinate reference system embedded in the model grid."""
if self.data.raster is None:
exec_nodata_strat(
msg="No grid data found for deriving crs",
strategy=NoDataStrategy.WARN,
)
return None
if self.data.raster.crs is None:
exec_nodata_strat(
msg="No crs found in grid data",
strategy=NoDataStrategy.WARN,
)
return None
return CRS(self.data.raster.crs)
@property
def bounds(self) -> Optional[Tuple[float, float, float, float]]:
"""Returns the bounding box of the model grid."""
if len(self.data) > 0:
return self.data.raster.bounds
exec_nodata_strat(
msg="No grid data found for deriving bounds",
strategy=NoDataStrategy.WARN,
)
return None
@property
def _region_data(self) -> Optional[gpd.GeoDataFrame]:
"""Returns the geometry of the model area of interest."""
if len(self.data) > 0:
crs: Optional[Union[int, CRS]] = self.crs
if crs is not None and hasattr(crs, "to_epsg"):
crs = crs.to_epsg() # not all CRS have an EPSG code
return gpd.GeoDataFrame(geometry=[box(*self.bounds)], crs=crs)
exec_nodata_strat(
msg="No grid data found for deriving region", strategy=NoDataStrategy.WARN
)
return None
@property
def data(self) -> xr.Dataset:
"""Model static gridded data as xarray.Dataset."""
if self._data is None:
self._initialize_grid()
assert self._data is not None
return self._data
def _initialize_grid(self, skip_read: bool = False) -> None:
"""Initialize grid object."""
if self._data is None:
self._data = xr.Dataset()
if self.root.is_reading_mode() and not skip_read:
self.read()
[docs]
@hydromt_step
def add_data_from_constant(
self,
constant: Union[int, float],
name: str,
dtype: Optional[str] = "float32",
nodata: Optional[Union[int, float]] = None,
mask_name: Optional[str] = "mask",
) -> List[str]:
"""HYDROMT CORE METHOD: Adds data to grid component based on a constant value.
Parameters
----------
constant: int, float
Constant value to fill grid with.
name: str
Name of grid.
dtype: str, optional
Data type of grid. By default 'float32'.
nodata: int, float, optional
Nodata value. By default inferred from dtype.
mask_name: str, optional
Name of mask in self.grid to use for masking raster_data. By default 'mask'.
Use None to disable masking.
Returns
-------
list
Names of added model grid layer.
"""
da = grid_from_constant(
grid_like=self._get_grid_data(),
constant=constant,
name=name,
dtype=dtype,
nodata=nodata,
mask_name=mask_name,
)
# Add to grid
self.set(da)
return [name]
[docs]
@hydromt_step
def add_data_from_rasterdataset(
self,
raster_data: Union[str, Path, xr.DataArray, xr.Dataset],
variables: Optional[List[str]] = None,
fill_method: Optional[str] = None,
reproject_method: Optional[Union[List[str], str]] = "nearest",
mask_name: Optional[str] = "mask",
rename: Optional[Dict[str, str]] = None,
) -> List[str]:
"""HYDROMT CORE METHOD: Add data variable(s) from ``raster_data`` to grid component.
If raster is a dataset, all variables will be added unless ``variables`` list
is specified.
Adds model layers:
* **raster.name** grid: data from raster_data
Parameters
----------
raster_data: str, Path, xr.DataArray, xr.Dataset
Data catalog key, path to raster file or raster xarray data object.
If a path to a raster file is provided it will be added
to the data_catalog with its name based on the file basename without
extension.
variables: list, optional
List of variables to add to grid from raster_data. By default all.
fill_method : str, optional
If specified, fills nodata values using fill_nodata method.
Available methods are {'linear', 'nearest', 'cubic', 'rio_idw'}.
reproject_method: list, str, optional
See rasterio.warp.reproject for existing methods, by default 'nearest'.
Can provide a list corresponding to ``variables``.
mask_name: str, optional
Name of mask in self.grid to use for masking raster_data. By default 'mask'.
Use None to disable masking.
rename: dict, optional
Dictionary to rename variable names in raster_data before adding to grid
{'name_in_raster_data': 'name_in_grid'}. By default empty.
Returns
-------
list
Names of added model map layers
"""
rename = rename or {}
logger.info(f"Preparing grid data from raster source {raster_data}")
# Read raster data and select variables
ds = self.data_catalog.get_rasterdataset(
raster_data,
geom=self.region,
buffer=2,
variables=variables,
single_var_as_array=False,
)
assert ds is not None
# Data resampling
ds_out = grid_from_rasterdataset(
grid_like=self._get_grid_data(),
ds=ds,
variables=variables,
fill_method=fill_method,
reproject_method=reproject_method,
mask_name=mask_name,
rename=rename,
)
# Add to grid
self.set(ds_out)
return list(ds_out.data_vars.keys())
[docs]
@hydromt_step
def add_data_from_raster_reclass(
self,
raster_data: Union[str, Path, xr.DataArray],
reclass_table_data: Union[str, Path, pd.DataFrame],
reclass_variables: List[str],
variable: Optional[str] = None,
fill_method: Optional[str] = None,
reproject_method: Optional[Union[List[str], str]] = "nearest",
mask_name: Optional[str] = "mask",
rename: Optional[Dict[str, str]] = None,
**kwargs,
) -> List[str]:
"""HYDROMT CORE METHOD: Add data variable(s) to grid component by reclassifying the data in ``raster_data`` based on ``reclass_table_data``.
Adds model layers:
* **reclass_variables** grid: reclassified raster data
Parameters
----------
raster_data: str, Path, xr.DataArray
Data catalog key, path to raster file or raster xarray data object.
Should be a DataArray. Else use `variable` argument for selection.
reclass_table_data: str, Path, pd.DataFrame
Data catalog key, path to tabular data file or tabular pandas dataframe
object for the reclassification table of `raster_data`.
reclass_variables: list
List of reclass_variables from reclass_table_data table to add to maps.
Index column should match values in `raster_data`.
variable: str, optional
Name of raster_data dataset variable to use. This is only required when
reading datasets with multiple variables.
By default None.
fill_method : str, optional
If specified, fills nodata values in `raster_data` using fill_nodata method
before reclassifying. Available methods are
{'linear', 'nearest', 'cubic', 'rio_idw'}.
reproject_method: str, optional
See rasterio.warp.reproject for existing methods, by default "nearest".
Can provide a list corresponding to ``reclass_variables``.
mask_name: str, optional
Name of mask in self.grid to use for masking raster_data. By default 'mask'.
Use None to disable masking.
rename: dict, optional
Dictionary to rename variable names in reclass_variables before adding to
grid {'name_in_reclass_table': 'name_in_grid'}. By default empty.
**kwargs : dict
Additional keyword arguments to be passed to `get_rasterdataset`
Returns
-------
list
Names of added model grid layers
""" # noqa: E501
rename = rename or dict()
logger.info(
f"Preparing grid data by reclassifying the data in {raster_data} based "
f"on {reclass_table_data}"
)
# Read raster data and remapping table
da = self.data_catalog.get_rasterdataset(
raster_data, geom=self.region, buffer=2, variables=variable, **kwargs
)
if not isinstance(da, xr.DataArray):
raise ValueError(
f"raster_data {raster_data} should be a single variable. "
"Please select one using the 'variable' argument"
)
df_vars = self.data_catalog.get_dataframe(
reclass_table_data, variables=reclass_variables
)
# Data resampling
ds_vars = grid_from_raster_reclass(
grid_like=self._get_grid_data(),
da=da,
reclass_table=df_vars,
reclass_variables=reclass_variables,
fill_method=fill_method,
reproject_method=reproject_method,
mask_name=mask_name,
rename=rename,
)
# Add to maps
self.set(ds_vars)
return list(ds_vars.data_vars.keys())
[docs]
@hydromt_step
def add_data_from_geodataframe(
self,
vector_data: Union[str, Path, gpd.GeoDataFrame],
variables: Optional[Union[List[str], str]] = None,
nodata: Optional[Union[List[Union[int, float]], int, float]] = -1,
rasterize_method: Optional[str] = "value",
mask_name: Optional[str] = "mask",
rename: Optional[Dict[str, str]] = None,
all_touched: Optional[bool] = True,
) -> Optional[List[str]]:
"""HYDROMT CORE METHOD: Add data variable(s) to grid component by rasterizing the data from ``vector_data``.
Several type of rasterization are possible:
* "fraction": the fraction of the grid cell covered by the vector
shape is returned.
* "area": the area of the grid cell covered by the vector shape is returned.
* "value": the value from the variables columns of vector_data are used.
If this is used, variables must be specified.
Parameters
----------
vector_data : str, Path, gpd.GeoDataFrame
Data catalog key, path to vector file or a vector geopandas object.
variables : List, str, optional
List of variables to add to grid from vector_data. Required if
rasterize_method is "value", by default None.
nodata : List, int, float, optional
No data value to use for rasterization, by default -1. If a list is
provided, it should have the same length has variables.
rasterize_method : str, optional
Method to rasterize the vector data. Either {"value", "fraction", "area"}.
If "value", the value from the variables columns in vector_data are used
directly in the raster. If "fraction", the fraction of the grid
cell covered by the vector file is returned. If "area", the area of the
grid cell covered by the vector file is returned.
mask_name: str, optional
Name of mask in self.grid to use for masking raster_data. By default 'mask'.
Use None to disable masking.
rename: dict, optional
Dictionary to rename variable names in variables before adding to grid
{'name_in_variables': 'name_in_grid'}. To rename with method fraction or
area use {'vector_data': 'name_in_grid'}. By default empty.
all_touched : bool, optional
If True (default), all pixels touched by geometries will be burned in.
If false, only pixels whose center is within the polygon or that are
selected by Bresenham's line algorithm will be burned in.
Returns
-------
list
Names of added model grid layers
""" # noqa: E501
rename = rename or dict()
logger.info(f"Preparing grid data from vector '{vector_data}'.")
gdf = self.data_catalog.get_geodataframe(
vector_data, geom=self.region, dst_crs=self.crs
)
if gdf is None or gdf.empty:
exec_nodata_strat(
f"No shapes of {vector_data} found within region, skipping {self.add_data_from_geodataframe.__name__}.",
NoDataStrategy.WARN,
)
return None
# Data resampling
# In case of choosing a new name with area or fraction method pass the name directly
renames = (
rename.get(vector_data, rename) if isinstance(vector_data, str) else rename
)
ds = grid_from_geodataframe(
grid_like=self._get_grid_data(),
gdf=gdf,
variables=variables,
nodata=nodata,
rasterize_method=rasterize_method,
mask_name=mask_name,
rename=renames,
all_touched=all_touched,
)
# Add to grid
self.set(ds)
return list(ds.data_vars.keys())
[docs]
def test_equal(self, other: ModelComponent) -> Tuple[bool, Dict[str, str]]:
"""Test if two components are equal.
Parameters
----------
other : ModelComponent
The component to compare against.
Returns
-------
Tuple[bool, Dict[str, str]]
True if the components are equal, and a dict with the associated errors per property checked.
"""
eq, errors = super().test_equal(other)
if not eq:
return eq, errors
other_grid = cast(GridComponent, other)
try:
xr.testing.assert_allclose(self.data, other_grid.data)
except AssertionError as e:
errors["data"] = str(e)
return len(errors) == 0, errors
def _get_grid_data(self) -> Union[xr.DataArray, xr.Dataset]:
"""Get grid data as xarray.DataArray from this component or the reference."""
if self._region_component is not None:
reference_component = self.model.get_component(self._region_component)
if not isinstance(reference_component, GridComponent):
raise ValueError(
f"Unable to find the referenced grid component: '{self._region_component}'."
)
if reference_component.data is None:
raise ValueError(
f"Unable to get grid from the referenced region component: '{self._region_component}'."
)
return reference_component.data
if self.data is None:
raise ValueError("Unable to get grid data from this component.")
return self.data