Source code for imod.formats.rasterio

"""
Functions that make use of `rasterio
<https://rasterio.readthedocs.io/en/stable/>`_ for input and output to other
raster formats.

Currently only :func:`imod.rasterio.write` is implemented.
"""

import pathlib
import warnings
from typing import Dict

import numpy as np
import pandas as pd

import imod
from imod.formats import array_io
from imod.util.imports import MissingOptionalModule

# since rasterio is a big dependency that is sometimes hard to install
# and not always required, we made this an optional dependency
try:
    import rasterio
except ImportError:
    rasterio = MissingOptionalModule("rasterio")

f_open = open


# Based on this comment
# https://github.com/mapbox/rasterio/issues/265#issuecomment-367044836
def _create_ext_driver_code_map():
    from osgeo import gdal

    if hasattr(gdal, "DCAP_RASTER"):

        def _check_driver(drv):
            return drv.GetMetadataItem(gdal.DCAP_RASTER)

    else:

        def _check_driver(drv):
            return True

    output = {}
    for i in range(gdal.GetDriverCount()):
        drv = gdal.GetDriver(i)
        if _check_driver(drv):
            if drv.GetMetadataItem(gdal.DCAP_CREATE) or drv.GetMetadataItem(
                gdal.DCAP_CREATECOPY
            ):
                ext = drv.GetMetadataItem(gdal.DMD_EXTENSION)
                if ext is not None and len(ext) > 0:
                    output[drv.GetMetadataItem(gdal.DMD_EXTENSION)] = drv.ShortName
    sortedkeys = sorted(output.keys())
    output = {k: output[k] for k in sortedkeys}
    return output


# tiff and jpeg keys have been added manually.
EXTENSION_GDAL_DRIVER_CODE_MAP = {
    "asc": "AAIGrid",
    "bag": "BAG",
    "bil": "EHdr",
    "blx": "BLX",
    "bmp": "BMP",
    "bt": "BT",
    "dat": "ZMap",
    "dem": "USGSDEM",
    "ers": "ERS",
    "gen": "ADRG",
    "gif": "GIF",
    "gpkg": "GPKG",
    "grd": "NWT_GRD",
    "gsb": "NTv2",
    "gtx": "GTX",
    "hdr": "MFF",
    "hf2": "HF2",
    "hgt": "SRTMHGT",
    "img": "HFA",
    "jp2": "JP2OpenJPEG",
    "jpg": "JPEG",
    "jpeg": "JPEG",
    "kea": "KEA",
    "kro": "KRO",
    "lcp": "LCP",
    "map": "PCRaster",
    "mbtiles": "MBTiles",
    "mrf": "MRF",
    "nc": "netCDF",
    "ntf": "NITF",
    "pdf": "PDF",
    "pix": "PCIDSK",
    "png": "PNG",
    "rda": "R",
    "rgb": "SGI",
    "rst": "RST",
    "rsw": "RMF",
    "sigdem": "SIGDEM",
    "sqlite": "Rasterlite",
    "ter": "Terragen",
    "tif": "GTiff",
    "tiff": "GTiff",
    "vrt": "VRT",
    "xml": "PDS4",
    "xpm": "XPM",
    "xyz": "XYZ",
}


def _get_driver(path):
    ext = path.suffix.lower()[1:]  # skip the period
    try:
        return EXTENSION_GDAL_DRIVER_CODE_MAP[ext]
    except KeyError:
        raise ValueError(
            f'Unknown extension "{ext}", available extensions: '
            f'{", ".join(EXTENSION_GDAL_DRIVER_CODE_MAP.keys())}'
        )


def _limitations(riods, path):
    if riods.count != 1:
        raise NotImplementedError(
            f"Cannot open multi-band grid: {path}. Try rioxarray instead."
        )
    if not riods.transform.is_rectilinear:
        raise NotImplementedError(
            f"Cannot open non-rectilinear grid: {path}. Try rioxarray instead."
        )






def _read(path, headersize, nrow, ncol, nodata, dtype):
    with rasterio.open(path, "r") as dataset:
        a = dataset.read(1)

    # None signifies no replacement; skip if nodata already is nan
    if (nodata is None) or np.isnan(nodata):
        return a
    # Only set nodata to nan if the dtype supports it
    if (a.dtype == np.float64) or (a.dtype == np.float32):
        return array_io.reading._to_nan(a, nodata)
    else:
        return a


def read(path, use_cftime=False, pattern=None):
    warnings.warn(
        "The imod.rasterio.read() function is deprecated. Instead use imod.rasterio.open().",
        FutureWarning,
    )
    return open(path, use_cftime=False, pattern=None).load()


[docs] def open(path, use_cftime=False, pattern=None): r""" Open one or more GDAL supported raster files as an xarray.DataArray. In accordance with xarray's design, ``open`` loads the data of the files lazily. This means the data of the rasters are not loaded into memory until the data is needed. This allows for easier handling of large datasets, and more efficient computations. Parameters ---------- path : str, Path or list This can be a single file, 'head_l1.tif', a glob pattern expansion, 'head_l*.tif', or a list of files, ['head_l1.tif', 'head_l2.tif']. Note that each file needs to be of the same name (part before the first underscore) but have a different layer and/or timestamp, such that they can be combined in a single xarray.DataArray. use_cftime : bool, optional Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]` for the time axis. Dates are normally encoded as ``np.datetime64[ns]``; however, if dates fall before 1678 or after 2261, they are automatically encoded as ``cftime.DatetimeProlepticGregorian`` objects rather than ``np.datetime64[ns]``. pattern : str, regex pattern, optional If the filenames do match default naming conventions of {name}_{time}_l{layer}, a custom pattern can be defined here either as a string, or as a compiled regular expression pattern. See the examples below. Returns ------- xarray.DataArray A float32 xarray.DataArray of the values in the raster file(s). All metadata needed for writing the file to raster or other formats using imod.rasterio are included in the xarray.DataArray.attrs. Examples -------- Open a raster file: >>> da = imod.rasterio.open("example.tif") Open a raster file, relying on default naming conventions to identify layer: >>> da = imod.rasterio.open("example_l1.tif") Open an IDF file, relying on default naming conventions to identify layer and time: >>> head = imod.rasterio.open("head_20010101_l1.tif") Open multiple files, in this case files for the year 2001 for all layers, again relying on default conventions for naming: >>> head = imod.rasterio.open("head_2001*_l*.tif") The same, this time explicitly specifying ``name``, ``time``, and ``layer``: >>> head = imod.rasterio.open("head_2001*_l*.tif", pattern="{name}_{time}_l{layer}") The format string pattern will only work on tidy paths, where variables are separated by underscores. You can also pass a compiled regex pattern. Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered. >>> import re >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)", re.IGNORECASE) >>> head = imod.idf.open("headL11", pattern=pattern) However, this requires constructing regular expressions, which is generally a fiddly process. Regex notation is also impossible to remember. The website https://regex101.com is a nice help. Alternatively, the most pragmatic solution may be to just rename your files. """ return array_io.reading._open(path, use_cftime, pattern, header, _read)
def write_aaigrid(path: pathlib.Path, a: np.ndarray, profile: Dict) -> None: """ Fall-back function to write ESRII ASCII grids even if rasterio is not installed. This function takes care to mimick the idiosyncracies of the GDAL driver. Parameters ---------- path: str or Path path to the output raster a: np.ndarray The raster data. profile: dict The rasterio profile metadata. """ dtype = profile["dtype"] nodata = profile["nodata"] df = pd.DataFrame(a.astype(dtype)) # GDAL writes white space before every line. pandas doesn't support this, # but it will write nodata values -- encoded as NaN -- as white space. df.index = np.full(len(df), np.nan) if np.issubdtype(dtype, np.integer): is_float = False space = "" fmt = "%d" str_nodata = f"{int(nodata)}" elif np.issubdtype(dtype, np.floating): # For some reason, GDAL inserts a space before the nodata value if # dtype is float. is_float = True space = " " precision = profile.get("decimal_precision") digits = profile.get("significant_digits") # See: https://docs.python.org/3/library/string.html#formatspec if precision is not None: fmt = f"%.{precision}f" str_nodata = f"{nodata:.{precision}f}" elif digits is not None: fmt = f"%.{digits}g" str_nodata = f"{nodata:.{digits}g}" else: fmt = "%.20g" str_nodata = f"{nodata:.20g}" else: raise TypeError(f"invalid dtype: {dtype}") dx, _, xmin, _, dy, ymax = profile["transform"][:6] ymin = ymax + profile["height"] * dy header = ( f'ncols {profile["width"]}\n' f'nrows {profile["height"]}\n' f"xllcorner {xmin:.12f}\n" f"yllcorner {ymin:.12f}\n" f"cellsize {dx:.12f}\n" f"NODATA_value {space}{str_nodata}\n" ) # GDAL writes only a linefeed, not a carriage return. By default, Python # adds a carriage return as well, on Windows. This is disabled by # explicitly setting the newline argument. with f_open(path, "w", newline="") as f: f.write(header) first = df.iloc[0, 0] # is_float is the result of a typecheck above. # is_integer() checks whether a float has a decimal fraction: # (1.0).is_integer() -> True # (1.1).is_integer() -> False if is_float and first.is_integer(): # GDAL uses the "general" float (g) format by default. However, if # the first value is a float without decimals, it will write a # single trailing 0 for the first value, presumably to aid type # inference when reading values back in. All subsequent values are # written without decimals, however. precision = profile.get("decimal_precision", 1) # Write the first value, with trailing zero if needed. f.write(f" {first:.{precision}f} ") # Write remainder of first row. Since we've already written the # first value with a space in front of it, we skip writing the NaN # index value here. df.iloc[[0], 1:].to_csv( f, index=False, header=False, sep=" ", float_format=fmt ) # Write all other rows. df.iloc[1:].to_csv(f, index=True, header=False, sep=" ", float_format=fmt) else: df.to_csv(f, index=True, header=False, sep=" ", float_format=fmt) return def write(path, da, driver=None, nodata=np.nan, dtype=None): """Write ``xarray.DataArray`` to GDAL supported geospatial rasters using ``rasterio``. Parameters ---------- path: str or Path path to the output raster da: xarray DataArray The DataArray to be written. Should have only x and y dimensions. driver: str; optional Which GDAL format driver to use. The complete list is at https://gdal.org/drivers/raster/index.html. By default tries to guess from the file extension. nodata: float Nodata value to use. Should be convertible to the DataArray and GDAL dtype. Default value is np.nan Examples -------- Save ``xarray.DataArray`` in ASCII format: >>> imod.rasterio.write("example.asc", da) Save ``xarray.DataArray`` in ASCII format, with 6 significant digits: >>> da.attrs['SIGNIFICANT_DIGITS'] = 6 >>> imod.rasterio.write("example.asc", da) """ # Not directly related to iMOD, but provides a missing link, together # with xioxarray. # Note that this function can quickly become dstdated as # the xarray rasterio connection matures, see for instance: # https://github.com/pydata/xarray/issues/1736 # https://github.com/pydata/xarray/pull/1712 path = pathlib.Path(path) profile = da.attrs.copy() if driver is None: driver = _get_driver(path) flip = slice(None, None, -1) if not da.indexes["x"].is_monotonic_increasing: da = da.isel(x=flip) if not da.indexes["y"].is_monotonic_decreasing: da = da.isel(y=flip) # Only try to fill data that can contains nan's # Do this before casting to another type! ignore_nodata = (nodata is None) or np.isnan(nodata) if not ignore_nodata: if (da.dtype == np.float32) or (da.dtype == np.float64): # NaN is the default missing value in xarray # None is different in that the raster won't have a nodata value da = da.fillna(nodata) # Cast to dtype if dtype is given if dtype is not None: if da.dtype != dtype: da = da.astype(dtype) # Cast to supported pcraster dtypes if driver == "PCRaster": if da.dtype == "float64": da = da.astype("float32") elif da.dtype == "int64": da = da.astype("int32") elif da.dtype == "bool": da = da.astype("uint8") if "PCRASTER_VALUESCALE" not in profile: if da.dtype == "int32": profile["PCRASTER_VALUESCALE"] = "VS_NOMINAL" elif da.dtype == "uint8": profile["PCRASTER_VALUESCALE"] = "VS_BOOLEAN" else: profile["PCRASTER_VALUESCALE"] = "VS_SCALAR" extradims = list(filter(lambda dim: dim not in ("y", "x"), da.dims)) # TODO only equidistant IDFs are compatible with GDAL / rasterio # TODO try squeezing extradims here, such that 1 layer, 1 time, etc. is acccepted if extradims: raise ValueError(f"Only x and y dimensions supported, found {da.dims}") # transform will be affine object in next xarray profile["transform"] = imod.util.spatial.transform(da) profile["driver"] = driver profile["height"] = da.y.size profile["width"] = da.x.size profile["count"] = 1 profile["dtype"] = da.dtype profile["nodata"] = nodata # Allow writing ASCII grids even if rasterio isn't installed. This is # useful for e.g. MetaSWAP input. if isinstance(rasterio, MissingOptionalModule) and driver == "AAIGrid": write_aaigrid(path, da.values, profile) else: with rasterio.Env(): with rasterio.open(path, "w", **profile) as ds: ds.write(da.values, 1) return
[docs] def save(path, a, driver=None, nodata=np.nan, pattern=None, dtype=None): """ Write a xarray.DataArray to one or more rasterio supported files If the DataArray only has ``y`` and ``x`` dimensions, a single raster file is written, like the ``imod.rasterio.write`` function. This function is more general and also supports ``time`` and ``layer`` and other dimensions. It will split these up, give them their own filename according to the conventions in ``imod.util.path.compose``, and write them each. Parameters ---------- path : str or Path Path to the raster file to be written. This function decides on the actual filename(s) using conventions, so it only takes the directory and name from this parameter. a : xarray.DataArray DataArray to be written. It needs to have dimensions ('y', 'x'), and optionally ``layer`` and ``time``. driver: str, optional Which GDAL format driver to use. The complete list is at https://gdal.org/drivers/raster/index.html. By default tries to guess from the file extension. nodata : float, optional Nodata value in the saved raster files. Defaults to a value of nan. pattern : str Format string which defines how to create the filenames. See examples. Example ------- Consider a DataArray ``da`` that has dimensions 'layer', 'y' and 'x', with the 'layer' dimension consisting of layer 1 and 2:: save('path/to/head', da) This writes the following two tif files: 'path/to/head_l1.tif' and 'path/to/head_l2.tif'. It is possible to generate custom filenames using a format string. The default filenames would be generated by the following format string: save("example", pattern="{name}_l{layer}{extension}") If you desire zero-padded numbers that show up neatly sorted in a file manager, you may specify: save("example", pattern="{name}_l{layer:02d}{extension}") In this case, a 0 will be padded for single digit numbers ('1' will become '01'). To get a date with dashes, use the following pattern: "{name}_{time:%Y-%m-%d}_l{layer}{extension}" """ path = pathlib.Path(path) # defaults to geotiff if driver is None: if path.suffix == "": path = path.with_suffix(".tif") driver = "GTiff" else: driver = _get_driver(path) # Use a closure to skip the driver argument # so it takes the same arguments as the idf write def _write(path, a, nodata, dtype): return write(path, a, driver, nodata, dtype) array_io.writing._save(path, a, nodata, pattern, dtype, _write)