Source code for hydromt_wflow.workflows.landuse

"""Landuse workflows for Wflow plugin."""

import logging
from typing import List, Optional

import numpy as np
import pandas as pd
import xarray as xr

logger = logging.getLogger(__name__)


__all__ = ["landuse", "lai", "create_lulc_lai_mapping_table", "lai_from_lulc_mapping"]


RESAMPLING = {"landuse": "nearest", "lai": "average", "alpha_h1": "mode"}
DTYPES = {"landuse": np.int16, "alpha_h1": np.int16}


[docs] def landuse( da: xr.DataArray, ds_like: xr.Dataset, df: pd.DataFrame, params: Optional[List] = None, logger=logger, ): """Return landuse map and related parameter maps. The parameter maps are prepared based on landuse map and mapping table as provided in the generic data folder of hydromt. The following topography maps are calculated: - TODO Parameters ---------- da : xarray.DataArray DataArray containing LULC classes. ds_like : xarray.DataArray Dataset at model resolution. Returns ------- ds_out : xarray.Dataset Dataset containing gridded landuse based maps """ keys = df.index.values if params is None: params = [p for p in df.columns if p != "description"] elif not np.all(np.isin(params, df.columns)): missing = [p for p in params if p not in df.columns] raise ValueError(f"Parameter(s) missing in mapping file: {missing}") # setup ds out ds_out = xr.Dataset(coords=ds_like.raster.coords) # setup reclass method def reclass(x): return np.vectorize(d.get)(x, nodata) da = da.raster.interpolate_na(method="nearest") # apply for each parameter for param in params: method = RESAMPLING.get(param, "average") values = df[param].values nodata = values[-1] # NOTE values is set in last row d = dict(zip(keys, values)) # NOTE global param in reclass method logger.info(f"Deriving {param} using {method} resampling (nodata={nodata}).") da_param = xr.apply_ufunc( reclass, da, dask="parallelized", output_dtypes=[values.dtype] ) da_param.attrs.update(_FillValue=nodata) # first set new nodata values ds_out[param] = da_param.raster.reproject_like( ds_like, method=method ) # then resample return ds_out
[docs] def lai(da: xr.DataArray, ds_like: xr.Dataset, logger=logger): """Return climatology of Leaf Area Index (LAI). The following topography maps are calculated: - LAI Parameters ---------- da : xarray.DataArray or xarray.Dataset DataArray or Dataset with LAI array containing LAI values. ds_like : xarray.DataArray Dataset at model resolution. Returns ------- da_out : xarray.DataArray Dataset containing resampled LAI maps """ if isinstance(da, xr.Dataset) and "LAI" in da: da = da["LAI"] elif not isinstance(da, xr.DataArray): raise ValueError("lai method requires a DataArray or Dataset with LAI array") method = RESAMPLING.get(da.name, "average") nodata = da.raster.nodata logger.info(f"Deriving {da.name} using {method} resampling (nodata={nodata}).") da = da.astype(np.float32) da = da.where(da.values != nodata).fillna( 0.0 ) # Assuming missing values correspond to bare soil, urban and snow (LAI=0.0) da_out = da.raster.reproject_like(ds_like, method=method) da_out.attrs.update(_FillValue=nodata) return da_out
[docs] def create_lulc_lai_mapping_table( da_lulc: xr.DataArray, da_lai: xr.DataArray, sampling_method: str = "any", lulc_zero_classes: List[int] = [], logger=logger, ) -> pd.DataFrame: """ Derive LAI values per landuse class. Parameters ---------- da_lulc : xr.DataArray Landuse map. da_lai : xr.DataArray Cyclic LAI map. sampling_method : str, optional Resampling method for the LULC data to the LAI resolution. Two methods are supported: * 'any' (default): if any cell of the desired landuse class is present in the resampling window (even just one), it will be used to derive LAI values. This method is less exact but will provide LAI values for all landuse classes for the high resolution landuse map. * 'mode': the most frequent value in the resampling window is used. This method is less precise as for cells with a lot of different landuse classes, the most frequent value might still be only a small fraction of the cell. More landuse classes should however be covered and it can always be used with the landuse map of the wflow model instead of the original high resolution one. * 'q3': only cells with the most frequent value (mode) and that cover 75% (q3) of the resampling window will be used. This method is more exact but for small basins, you may have less or no samples to derive LAI values for some classes. lulc_zero_classes : list of int, optional List of landuse classes that should have zero for leaf area index values for example waterbodies, open ocean etc. For very high resolution landuse maps, urban surfaces and bare areas can be included here as well. By default empty. Returns ------- df_lai_mapping : pd.DataFrame Mapping table with LAI values per landuse class. One column for each month and one line per landuse class. The number of samples used to derive the mapping values is also added to a `samples` column in the dataframe. """ # check the method values if sampling_method not in ["any", "mode", "q3"]: raise ValueError(f"Unsupported resampling method: {sampling_method}") # process the lai da if "dim0" in da_lai.dims: da_lai = da_lai.rename({"dim0": "time"}) da_lai = da_lai.raster.mask_nodata() da_lai = da_lai.fillna( 0 ) # use zeros to better represent city and open water surfaces # landuse da_lulc.name = "landuse" lulc_classes = np.unique(da_lulc.values) # Initialise the outputs df_lai_mapping = None if sampling_method != "any": # The data can already be resampled to the LAI resolution da_lulc_mode = da_lulc.raster.reproject_like(da_lai, method="mode") if sampling_method == "q3": # Filter mode cells that cover less than 75% of the resampling window da_lulc_q3 = da_lulc.raster.reproject_like(da_lai, method="q3") da_lulc = da_lulc_mode.where( da_lulc_q3 == da_lulc_mode, da_lulc_mode.raster.nodata ) else: da_lulc = da_lulc_mode # Loop over the landuse classes for lulc_id in lulc_classes: logger.info(f"Processing landuse class {lulc_id}") if lulc_id in lulc_zero_classes: logger.info(f"Using zeros for landuse class {lulc_id}") df_lai = pd.DataFrame( columns=da_lai.time.values, data=[[0] * 12], index=[lulc_id], ) df_lai.index.name = "landuse" n_samples = 0 else: # Select for a specific landuse class lu = da_lulc.where(da_lulc == lulc_id, da_lulc.raster.nodata) lu = lu.raster.mask_nodata() if sampling_method == "any": # Resample only now the landuse data to the LAI resolution lu = lu.raster.reproject_like(da_lai, method="mode") # Add lai lu = lu.to_dataset() lu["lai"] = da_lai # Stack and remove the nodata values lu = lu.stack(z=("y", "x")).dropna(dim="z", how="all", subset=["landuse"]) # Count the number of samples n_samples = len(lu["z"]) if n_samples == 0: logger.info( f"No samples found for landuse class {lulc_id}. " "Try using a different resampling method." ) df_lai = pd.DataFrame( columns=da_lai.time.values, data=[[0] * 12], index=[lulc_id], ) df_lai.index.name = "landuse" else: # Compute the mean lai_mean_per_lu = np.round(lu["lai"].load().mean(dim="z") / 10, 3) # Add the landuse id as an extra dimension lai_mean_per_lu = lai_mean_per_lu.expand_dims("landuse") lai_mean_per_lu["landuse"] = [lulc_id] # Convert to dataframe df_lai = lai_mean_per_lu.drop_vars( "spatial_ref", errors="ignore" ).to_pandas() # Add number of samples in the first column df_lai.insert(0, "samples", n_samples) # Append to the output if df_lai_mapping is None: df_lai_mapping = df_lai else: df_lai_mapping = pd.concat([df_lai_mapping, df_lai]) return df_lai_mapping
[docs] def lai_from_lulc_mapping( da: xr.DataArray, ds_like: xr.Dataset, df: pd.DataFrame, logger=logger, ) -> xr.Dataset: """ Derive LAI values from a landuse map and a mapping table. Parameters ---------- da : xr.DataArray Landuse map. ds_like : xr.Dataset Dataset at model resolution. df : pd.DataFrame Mapping table with LAI values per landuse class. One column for each month and one line per landuse class. logger : logging.Logger, optional Logger object. Returns ------- ds_lai : xr.Dataset Dataset with LAI values for each month. """ months = np.arange(1, 13) # Map the monthly LAI values to the landuse map ds_lai = landuse( da=da, ds_like=ds_like, df=df, params=months, logger=logger, ) # Re-organise the dataset to have a time dimension da_lai = ds_lai.to_array(dim="time", name="LAI") return da_lai