Source code for hydromt_wflow.workflows.landuse

"""Landuse workflows for Wflow plugin."""

import logging
import os
from typing import List, Optional, Tuple, Union

import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
from shapely.geometry import box

from .demand import create_grid_from_bbox

logger = logging.getLogger(__name__)


__all__ = [
    "landuse",
    "landuse_from_vector",
    "lai",
    "create_lulc_lai_mapping_table",
    "lai_from_lulc_mapping",
    "add_paddy_to_landuse",
    "add_planted_forest_to_landuse",
]


RESAMPLING = {"landuse": "nearest", "lai": "average", "alpha_h1": "mode"}
DTYPES = {"landuse": np.int16, "alpha_h1": np.int16}


[docs] def landuse( da: xr.DataArray, ds_like: xr.Dataset, df: pd.DataFrame, params: Optional[List] = None, logger=logger, ): """Return landuse map and related parameter maps. The parameter maps are prepared based on landuse map and mapping table as provided in the generic data folder of hydromt. Parameters ---------- da : xarray.DataArray DataArray containing LULC classes. ds_like : xarray.DataArray Dataset at model resolution. Returns ------- ds_out : xarray.Dataset Dataset containing gridded landuse based maps """ keys = df.index.values if params is None: params = [p for p in df.columns if p != "description"] elif not np.all(np.isin(params, df.columns)): missing = [p for p in params if p not in df.columns] raise ValueError(f"Parameter(s) missing in mapping file: {missing}") # setup ds out ds_out = xr.Dataset(coords=ds_like.raster.coords) # setup reclass method def reclass(x): return np.vectorize(d.get)(x, nodata) da = da.raster.interpolate_na(method="nearest") # apply for each parameter for param in params: method = RESAMPLING.get(param, "average") values = df[param].values nodata = values[-1] # NOTE values is set in last row d = dict(zip(keys, values)) # NOTE global param in reclass method logger.info(f"Deriving {param} using {method} resampling (nodata={nodata}).") da_param = xr.apply_ufunc( reclass, da, dask="parallelized", output_dtypes=[values.dtype] ) da_param.attrs.update(_FillValue=nodata) # first set new nodata values ds_out[param] = da_param.raster.reproject_like( ds_like, method=method ) # then resample return ds_out
[docs] def landuse_from_vector( gdf: gpd.GeoDataFrame, ds_like: xr.Dataset, df: pd.DataFrame, params: Optional[List] = None, lulc_res: Optional[Union[float, int]] = None, all_touched: bool = False, buffer: int = 1000, lulc_out: Optional[str] = None, logger=logger, ): """ Derive several wflow maps based on vector landuse-landcover (LULC) data. The vector lulc data is first rasterized to a raster map at the model resolution or at a higher resolution specified in ``lulc_res``. Lookup table `df` columns are converted to lulc classes model parameters based on literature. The data is remapped at its rasterized resolution and then resampled to the model resolution using the average value, unless noted differently. Parameters ---------- gdf : geopandas.GeoDataFrame GeoDataFrame containing LULC classes. ds_like : xarray.Dataset Dataset at model resolution. df : pd.DataFrame Mapping table with landuse values. params : list of str, optional List of parameters to derive, by default None lulc_res : float or int, optional Resolution of the rasterized LULC data, by default None (use model resolution) all_touched : bool, optional If True, all pixels touched by the polygon will be burned in, by default False buffer : int, optional Buffer in meters to add around the bounding box of the vector data, by default 1000. lulc_out : str, optional Path to save the rasterised original landuse map to file, by default None. logger : logging.Logger, optional Logger object. Returns ------- ds_out : xarray.Dataset Dataset containing gridded landuse based maps """ # intersect with bbox bounds = gpd.GeoDataFrame( geometry=[box(*ds_like.raster.bounds)], crs=ds_like.raster.crs ) bounds = bounds.to_crs(3857).buffer(buffer).to_crs(gdf.crs) gdf = gdf.overlay(gpd.GeoDataFrame(geometry=bounds), how="intersection") # rasterize the vector data logger.info("Rasterizing landuse map") if lulc_res is None: gdf_reproj = gdf.to_crs(ds_like.raster.crs) grid_like = create_grid_from_bbox( gdf_reproj.total_bounds, res=max(np.abs(ds_like.raster.res)), crs=ds_like.raster.crs, align=True, ) else: grid_like = create_grid_from_bbox( gdf.total_bounds, res=lulc_res, crs=gdf.crs, align=True, ) # get the nodata values of the landuse (last row in the df) nodata = df["landuse"].values[-1] da = grid_like.raster.rasterize( gdf, col_name="landuse", nodata=nodata, all_touched=all_touched, dtype="int32", ) if lulc_out is not None: logger.info(f"Saving rasterized landuse map to {lulc_out}") os.makedirs(os.path.dirname(lulc_out), exist_ok=True) da.raster.to_raster(lulc_out) # derive the landuse maps ds_out = landuse(da, ds_like, df, params=params, logger=logger) return ds_out
[docs] def lai(da: xr.DataArray, ds_like: xr.Dataset, logger=logger): """Return climatology of Leaf Area Index (LAI). The following topography maps are calculated: - LAI Parameters ---------- da : xarray.DataArray or xarray.Dataset DataArray or Dataset with LAI array containing LAI values. ds_like : xarray.DataArray Dataset at model resolution. Returns ------- da_out : xarray.DataArray Dataset containing resampled LAI maps """ if isinstance(da, xr.Dataset) and "LAI" in da: da = da["LAI"] elif not isinstance(da, xr.DataArray): raise ValueError("lai method requires a DataArray or Dataset with LAI array") method = RESAMPLING.get(da.name, "average") nodata = da.raster.nodata logger.info(f"Deriving {da.name} using {method} resampling (nodata={nodata}).") da = da.astype(np.float32) da = da.where(da.values != nodata).fillna( 0.0 ) # Assuming missing values correspond to bare soil, urban and snow (LAI=0.0) da_out = da.raster.reproject_like(ds_like, method=method) da_out.attrs.update(_FillValue=nodata) return da_out
[docs] def create_lulc_lai_mapping_table( da_lulc: xr.DataArray, da_lai: xr.DataArray, sampling_method: str = "any", lulc_zero_classes: List[int] = [], logger=logger, ) -> pd.DataFrame: """ Derive LAI values per landuse class. Parameters ---------- da_lulc : xr.DataArray Landuse map. da_lai : xr.DataArray Cyclic LAI map. sampling_method : str, optional Resampling method for the LULC data to the LAI resolution. Two methods are supported: * 'any' (default): if any cell of the desired landuse class is present in the resampling window (even just one), it will be used to derive LAI values. This method is less exact but will provide LAI values for all landuse classes for the high resolution landuse map. * 'mode': the most frequent value in the resampling window is used. This method is less precise as for cells with a lot of different landuse classes, the most frequent value might still be only a small fraction of the cell. More landuse classes should however be covered and it can always be used with the landuse map of the wflow model instead of the original high resolution one. * 'q3': only cells with the most frequent value (mode) and that cover 75% (q3) of the resampling window will be used. This method is more exact but for small basins, you may have less or no samples to derive LAI values for some classes. lulc_zero_classes : list of int, optional List of landuse classes that should have zero for leaf area index values for example waterbodies, open ocean etc. For very high resolution landuse maps, urban surfaces and bare areas can be included here as well. By default empty. Returns ------- df_lai_mapping : pd.DataFrame Mapping table with LAI values per landuse class. One column for each month and one line per landuse class. The number of samples used to derive the mapping values is also added to a `samples` column in the dataframe. """ # check the method values if sampling_method not in ["any", "mode", "q3"]: raise ValueError(f"Unsupported resampling method: {sampling_method}") # process the lai da if "dim0" in da_lai.dims: da_lai = da_lai.rename({"dim0": "time"}) da_lai = da_lai.raster.mask_nodata() da_lai = da_lai.fillna( 0 ) # use zeros to better represent city and open water surfaces # landuse da_lulc.name = "landuse" lulc_classes = np.unique(da_lulc.values) # Initialise the outputs df_lai_mapping = None if sampling_method != "any": # The data can already be resampled to the LAI resolution da_lulc_mode = da_lulc.raster.reproject_like(da_lai, method="mode") if sampling_method == "q3": # Filter mode cells that cover less than 75% of the resampling window da_lulc_q3 = da_lulc.raster.reproject_like(da_lai, method="q3") da_lulc = da_lulc_mode.where( da_lulc_q3 == da_lulc_mode, da_lulc_mode.raster.nodata ) else: da_lulc = da_lulc_mode # Loop over the landuse classes for lulc_id in lulc_classes: logger.info(f"Processing landuse class {lulc_id}") if lulc_id in lulc_zero_classes: logger.info(f"Using zeros for landuse class {lulc_id}") df_lai = pd.DataFrame( columns=da_lai.time.values, data=[[0] * 12], index=[lulc_id], ) df_lai.index.name = "landuse" n_samples = 0 else: # Select for a specific landuse class lu = da_lulc.where(da_lulc == lulc_id, da_lulc.raster.nodata) lu = lu.raster.mask_nodata() if sampling_method == "any": # Resample only now the landuse data to the LAI resolution lu = lu.raster.reproject_like(da_lai, method="mode") # Add lai lu = lu.to_dataset() lu["lai"] = da_lai # Stack and remove the nodata values lu = lu.stack(z=(lu.raster.y_dim, lu.raster.x_dim)).dropna( dim="z", how="all", subset=["landuse"] ) # Count the number of samples n_samples = len(lu["z"]) if n_samples == 0: logger.info( f"No samples found for landuse class {lulc_id}. " "Try using a different resampling method." ) df_lai = pd.DataFrame( columns=da_lai.time.values, data=[[0] * 12], index=[lulc_id], ) df_lai.index.name = "landuse" else: # Compute the mean lai_mean_per_lu = np.round(lu["lai"].load().mean(dim="z"), 3) # Add the landuse id as an extra dimension lai_mean_per_lu = lai_mean_per_lu.expand_dims("landuse") lai_mean_per_lu["landuse"] = [lulc_id] # Convert to dataframe df_lai = lai_mean_per_lu.drop_vars( "spatial_ref", errors="ignore" ).to_pandas() # Add number of samples in the first column df_lai.insert(0, "samples", n_samples) # Append to the output if df_lai_mapping is None: df_lai_mapping = df_lai else: df_lai_mapping = pd.concat([df_lai_mapping, df_lai]) return df_lai_mapping
[docs] def lai_from_lulc_mapping( da: xr.DataArray, ds_like: xr.Dataset, df: pd.DataFrame, logger=logger, ) -> xr.Dataset: """ Derive LAI values from a landuse map and a mapping table. Parameters ---------- da : xr.DataArray Landuse map. ds_like : xr.Dataset Dataset at model resolution. df : pd.DataFrame Mapping table with LAI values per landuse class. One column for each month and one line per landuse class. logger : logging.Logger, optional Logger object. Returns ------- ds_lai : xr.Dataset Dataset with LAI values for each month. """ months = np.arange(1, 13) df.columns = [int(col) if str(col).isdigit() else col for col in df.columns] # Map the monthly LAI values to the landuse map ds_lai = landuse( da=da, ds_like=ds_like, df=df, params=months, logger=logger, ) # Re-organise the dataset to have a time dimension da_lai = ds_lai.to_array(dim="time", name="LAI") return da_lai
[docs] def add_paddy_to_landuse( landuse: xr.DataArray, paddy: xr.DataArray, paddy_class: int, df_mapping: pd.DataFrame, df_paddy_mapping: pd.DataFrame, output_paddy_class: Optional[int] = None, ) -> Tuple[xr.DataArray, pd.DataFrame]: """ Burn paddy fields into landuse map and update mapping table. The resulting paddy class in the landuse map will have ID output_paddy_class if provided and paddy_class otherwise. The mapping table will be updated with the values from the df_paddy_mapping table. Parameters ---------- landuse : xr.DataArray Landuse map. paddy : xr.DataArray Paddy fields map. paddy_class : int ID of the paddy class in the paddy map. df_mapping : pd.DataFrame Mapping table with landuse values. df_paddy_mapping : pd.DataFrame Mapping table with paddy values. output_paddy_class : int, optional ID of the paddy class in the output landuse map. If not provided, the paddy_class will be used. Returns ------- landuse : xr.DataArray Updated landuse map. df_mapping : pd.DataFrame Updated mapping table. """ # Get output paddy class if output_paddy_class is None: output_paddy_class = paddy_class # Reproject paddy map to landuse resolution # if paddy has lower res than landuse, use nearest resampling if abs(paddy.raster.res[0]) >= abs(landuse.raster.res[0]): paddy = paddy.raster.reproject_like(landuse, method="nearest") # else use mode resampling else: paddy = paddy.raster.reproject_like(landuse, method="mode") # Burn in the rice fields in the landuse map landuse = landuse.where(paddy != paddy_class, output_paddy_class) # Update the mapping table df_paddy_mapping.index = [output_paddy_class] df_paddy_mapping["landuse"] = output_paddy_class # Add the paddy class to the first line of the mapping table df_mapping = pd.concat([df_paddy_mapping, df_mapping]) return landuse, df_mapping
[docs] def add_planted_forest_to_landuse( planted_forest: gpd.GeoDataFrame, ds_like: xr.Dataset, planted_forest_c: float = 0.0881, orchard_name: str = "Orchard", orchard_c: float = 0.2188, logger=logger, ) -> xr.DataArray: """ Update USLE C map with planted forest and orchard data. Default USLE C values for planted forest and orchards are derived from Panagos et al., 2015 (10.1016/j.landusepol.2015.05.021). For harvested forest at different regrowth stages see also Borrelli and Schutt, 2014 (10.1016/j.geomorph.2013.08.022). Parameters ---------- planted_forest : geopandas.GeoDataFrame GeoDataFrame containing planted forest data. Required columns are: 'geometry', and optionally 'forest_type' to find orchards. ds_like : xr.Dataset Dataset at model resolution. Required variables are 'USLE_C'. planted_forest_c : float, optional USLE C value for planted forest, by default 0.0881. orchard_name : str, optional Name of the orchard landuse class, by default "Orchard". orchard_c : float, optional USLE C value for orchards, by default 0.2188. Returns ------- usle_c : xr.DataArray Updated USLE C map. """ # Add a USLE_C column with default value logger.info( "Correcting USLE_C with planted forest and orchards using {planted_forest_fn}." ) planted_forest["USLE_C"] = planted_forest_c # If forest_type column is available, update USLE_C value for orchards if "forest_type" in planted_forest.columns: planted_forest.loc[planted_forest["forest_type"] == orchard_name, "USLE_C"] = ( orchard_c ) # Rasterize forest data usle_c = ds_like.raster.rasterize( gdf=planted_forest, col_name="USLE_C", nodata=ds_like["USLE_C"].raster.nodata, all_touched=False, ) # Cover nodata with the USLE_C map from all landuse classes usle_c = usle_c.where( usle_c != usle_c.raster.nodata, ds_like["USLE_C"], ) return usle_c