# -*- coding: utf-8 -*-
"""HydroMT VectorComponent class definition."""
import os
from logging import Logger, getLogger
from os.path import basename, dirname, isfile, join
from typing import TYPE_CHECKING, Optional, Union, cast
import geopandas as gpd
import numpy as np
import xarray as xr
from geopandas.testing import assert_geodataframe_equal
from pyproj import CRS
from shapely.geometry import box
from hydromt._io.readers import _read_nc
from hydromt._io.writers import _write_nc
from hydromt.gis.vector import GeoDataset
from hydromt.model.components.base import ModelComponent
from hydromt.model.components.spatial import SpatialModelComponent
from hydromt.model.steps import hydromt_step
if TYPE_CHECKING:
from hydromt.model.model import Model
__all__ = ["VectorComponent"]
logger: Logger = getLogger(__name__)
[docs]
class VectorComponent(SpatialModelComponent):
"""ModelComponent class for vector components.
This class is used to manage vector data in a model (e.g. for polygons of a semi
distributed model). The vector component data stored in the ``data`` property of
this class if of the hydromt.gis.vector.GeoDataset type which is an extension of
xarray.Dataset with a geometry coordinate.
"""
[docs]
def __init__(
self,
model: "Model",
*,
region_component: Optional[str] = None,
region_filename: str = "vector/vector_region.geojson",
) -> None:
"""Initialize a vector component.
Parameters
----------
model : Model
Parent model
region_component : str, optional
The name of the region component to use as reference for this component's region.
If None, the region will be set to the bounds of the geometry of this vector component.
region_filename : str
The path to use for writing the region data to a file. By default "vector/vector_region.geojson".
"""
super().__init__(
model, region_component=region_component, region_filename=region_filename
)
self._data: Optional[xr.Dataset] = None
@property
def data(self) -> xr.Dataset:
"""Model vector (polygon) data.
Returns xr.Dataset with a polygon geometry coordinate.
"""
if self._data is None:
self._initialize()
assert self._data is not None
return self._data
@property
def _region_data(self) -> Optional[gpd.GeoDataFrame]:
if len(self.data.vector) == 0:
return None
gdf = self.geometry.to_frame("geometry")
return gpd.GeoDataFrame(geometry=[box(*gdf.total_bounds)], crs=gdf.crs)
def _initialize(self, skip_read=False) -> None:
if self._data is None:
self._data = xr.Dataset()
if self.root.is_reading_mode() and not skip_read:
self.read()
[docs]
def set(
self,
data: Union[xr.DataArray, xr.Dataset, np.ndarray, gpd.GeoDataFrame] = None,
*,
name: Optional[str] = None,
overwrite_geom: bool = False,
) -> None:
"""Add data to vector.
All layers of data must have identical spatial index.
Only polygon geometry is supported.
If vector already contains a geometry layer different than data,
`overwrite_geom` can be set to True to overwrite the complete vector
object wit data (use with caution as previous data could be lost).
Parameters
----------
data: xarray.DataArray or xarray.Dataset or np.ndarray or gpd.GeoDataFrame
new data to add to vector
name: str, optional
Name of new data, this is used to overwrite the name of a DataArray
or to select a variable from a Dataset.
overwrite_geom: bool, optional
If True, overwrite the complete vector object with data, by default False
"""
self._initialize()
assert self._data is not None
name_required = isinstance(data, np.ndarray) or (
isinstance(data, xr.DataArray) and data.name is None
)
if name is None and name_required:
raise ValueError(f"Unable to set {type(data).__name__} data without a name")
# check the type of data
if isinstance(data, np.ndarray) and "geometry" in self._data:
index_dim = self.index_dim
index = self._data[index_dim]
if data.size != index.size:
if data.ndim == 1:
raise ValueError("Size of data and number of vector do not match")
else:
raise ValueError(
"set_vector with np.ndarray is only supported if data is 1D"
)
data = xr.DataArray(dims=[index_dim], data=data)
if isinstance(data, xr.DataArray):
if name is not None: # rename
data.name = name
data = data.to_dataset()
elif isinstance(data, gpd.GeoDataFrame):
data = GeoDataset.from_gdf(data, keep_cols=True, cols_as_data_vars=True)
elif not isinstance(data, xr.Dataset):
raise ValueError(f"cannot set data of type {type(data).__name__}")
# Add to vector
# 1. self.vector does not have a geometry yet or overwrite_geom
# then use data directly
if self.geometry is None or overwrite_geom:
if data.vector.geometry is None:
raise ValueError("Cannot instantiate vector without geometry in data")
else:
if overwrite_geom:
logger.warning("Overwriting vector object with data")
self._data = data
# 2. self.vector has a geometry
else:
# data has a geometry - check if it is the same as self.vector
if data.vector.geometry is not None:
if not np.all(
data.vector.geometry.geom_equals_exact(
self.geometry, tolerance=0.0001
)
):
raise ValueError("Geometry of data and vector do not match")
# add data (with check on index)
for dvar in data.data_vars:
if dvar in self._data:
logger.warning(f"Replacing vector variable: {dvar}")
# check on index coordinate before merging
dims = data[dvar].dims
if np.array_equal(
data[dims[0]].values, self._data[self.index_dim].values
):
self._data[dvar] = data[dvar]
else:
raise ValueError(
f"Index coordinate of data variable {dvar} "
"does not match vector index coordinate"
)
[docs]
@hydromt_step
def read(
self,
*,
filename: Optional[str] = "vector/vector.nc",
geometry_filename: Optional[str] = "vector/vector.geojson",
**kwargs,
) -> None:
"""Read model vector from combined netcdf and geojson file.
Files are read at <root>/<filename> and geojson file at <root>/<geometry_filename>.
Three options are possible:
* The netcdf file contains the attribute data and the geojson file the
geometry vector data.
* The netcdf file contains both the attribute and the geometry data.
(geometry_filename is ignored)
* The geojson file contains both the attribute and the geometry data.
(filename is ignored)
Key-word arguments are passed to :py:meth:`~hydromt.model.Model.read_nc`
Parameters
----------
filename : str, optional
netcdf filename relative to model root,
by default 'vector/vector.nc'
geometry_filename : str, optional
geojson filename relative to model root,
by default 'vector/vector.geojson'
kwargs:
Additional keyword arguments that are passed to the `read_nc`
function.
"""
self.root._assert_read_mode()
self._initialize(skip_read=True)
if filename is None and geometry_filename is None:
raise ValueError(
"Both filename and geometry_filename are None, no source file given."
)
if filename is not None:
# Disable lazy loading of data
# to avoid issues with reading object dtype data
if "chunks" not in kwargs:
kwargs["chunks"] = None
ds = xr.merge(_read_nc(filename, root=self.root.path, **kwargs).values())
# check if ds is empty (default filename has a value)
if len(ds.sizes) == 0:
filename = None
if geometry_filename is not None and isfile(
join(self.root.path, geometry_filename)
):
gdf = gpd.read_file(join(self.root.path, geometry_filename))
# geom + netcdf data
if filename is not None:
ds = GeoDataset.from_gdf(gdf, data_vars=ds)
# geom only
else:
ds = GeoDataset.from_gdf(gdf, keep_cols=True, cols_as_data_vars=True)
# netcdf only
elif filename is not None:
ds = GeoDataset.from_netcdf(ds)
else:
logger.info("No vector data found, skip reading.")
return
self.set(data=ds)
[docs]
@hydromt_step
def write(
self,
*,
filename: Optional[str] = "vector/vector.nc",
geometry_filename: Optional[str] = "vector/vector.geojson",
ogr_compliant: bool = False,
**kwargs,
) -> None:
"""Write model vector to combined netcdf and geojson files.
Files are written at <root>/<filename> and at <root>/<geometry_filename> respectively.
Three options are possible:
* The netcdf file contains the attribute data and the geojson file the
geometry vector data. Key-word arguments are passed to
:py:meth:`~hydromt.model.Model.write_nc`
* The netcdf file contains both the attribute and the geometry data
(geometry_filename set to None). Key-word arguments are passed to
:py:meth:`~hydromt.model.Model.write_nc`
* The geojson file contains both the attribute and the geometry data
(filename set to None). This option is possible only if all data variables
are 1D. Else the data will be written to netcdf. Key-word arguments are
passed to :py:meth:`~hydromt.vector.GeoDataset.to_gdf`
Parameters
----------
filename : str, optional
netcdf filename relative to model root,
by default 'vector/vector.nc'
geometry_filename : str, optional
geojson filename relative to model root,
by default 'vector/vector.geojson'
ogr_compliant : bool
If filename only, write the netCDF4 file in an ogr compliant format
This makes it readable as a vector file in e.g. QGIS
see :py:meth:`~hydromt.vector.GeoBase.ogr_compliant` for more details.
**kwargs:
Additional keyword arguments that are passed to the `write_nc`
function.
"""
ds = self.data
if len(ds) == 0:
logger.debug("No vector data found, skip writing.")
return
self.root._assert_write_mode()
if filename is None and geometry_filename is None:
raise ValueError(
"Both filename and geometry_filename are None, no destination file given. Please provide either filename or geometry_filename."
)
# If filename is None check if vector contains only 1D data
if filename is None:
assert geometry_filename is not None
# Check if 1D data only is present
snames = ["y_name", "x_name", "index_dim", "geom_name"]
sdims = [ds.vector.attrs.get(n) for n in snames if n in ds.vector.attrs]
if "spatial_ref" in ds:
sdims.append("spatial_ref")
for name in list(set(ds.vector._all_names) - set(sdims)):
dims = ds[name].dims
# check 1D variables with matching index_dim
if len(dims) > 1 or dims[0] != ds.vector.index_dim:
filename = join(
dirname(join(self.root.path, geometry_filename)),
f"{basename(geometry_filename).split('.')[0]}.nc",
)
logger.warning(
"2D data found in vector,"
f"will write data to {filename} instead."
)
break
# write to netcdf only
if geometry_filename is None:
assert filename is not None
os.makedirs(dirname(join(self.root.path, filename)), exist_ok=True)
# cannot call directly ds.vector.to_netcdf
# because of possible PermissionError
if ogr_compliant:
ds = ds.vector.ogr_compliant()
else:
ds = ds.vector.update_geometry(geom_format="wkt", geom_name="ogc_wkt")
# write_nc requires dict - use dummy key
_write_nc(
{"vector": ds},
filename,
engine="netcdf4",
root=self.root.path,
force_overwrite=self.root.mode.is_override_mode(),
**kwargs,
)
# write to geojson only
elif filename is None:
os.makedirs(dirname(join(self.root.path, geometry_filename)), exist_ok=True)
gdf = ds.vector.to_gdf(**kwargs)
gdf.to_file(join(self.root.path, geometry_filename))
# write data to netcdf and geometry to geojson
else:
os.makedirs(dirname(join(self.root.path, geometry_filename)), exist_ok=True)
# write geometry
gdf = ds.vector.geometry.to_frame("geometry")
gdf.to_file(join(self.root.path, geometry_filename))
# write_nc requires dict - use dummy key
_write_nc(
{"vector": ds.drop_vars("geometry")},
filename,
root=self.root.path,
**kwargs,
)
# Other vector properties
@property
def geometry(self) -> gpd.GeoSeries:
"""Returns the geometry of the model vector as gpd.GeoSeries."""
# check if vector is empty
if len(self.data.sizes) == 0:
return None
else:
return self.data.vector.geometry
@property
def index_dim(self) -> str:
"""Returns the index dimension of the vector."""
return self.data.vector.index_dim
@property
def crs(self) -> Optional[CRS]:
"""Returns coordinate reference system embedded in the vector."""
if self.data.vector.crs is not None:
return self.data.vector.crs
logger.warning("No CRS found in vector data.")
return None
[docs]
def test_equal(self, other: ModelComponent) -> tuple[bool, dict[str, str]]:
"""Test if two components are equal.
Parameters
----------
other : ModelComponent
The component to compare against.
Returns
-------
tuple[bool, dict[str, str]]
True if the components are equal, and a dict with the associated errors per property checked.
"""
eq, errors = super().test_equal(other)
if not eq:
return eq, errors
other_vector = cast(VectorComponent, other)
geods = self.data
other_geods = other_vector.data
gdf = self.geometry.to_frame("geometry")
gdf_other = other_vector.geometry.to_frame("geometry")
try:
assert_geodataframe_equal(
gdf, gdf_other, check_like=True, check_less_precise=True
)
except AssertionError as e:
errors["geometry"] = str(e)
drop_vars = (
[geods.vector.x_name, geods.vector.y_name]
if geods.vector.geom_format == "xy"
else [geods.vector.geom_name]
)
ds = geods.drop_vars(drop_vars)
ds_other = other_geods.drop_vars(drop_vars)
try:
xr.testing.assert_allclose(ds, ds_other)
except AssertionError as e:
errors["data"] = str(e)
return len(errors) == 0, errors