"""
Functions for reading and writing iMOD Data Files (IDFs) to ``xarray`` objects.
The primary functions to use are :func:`imod.idf.open` and
:func:`imod.idf.save`, though lower level functions are also available.
"""
import glob
import pathlib
import struct
import warnings
from collections import defaultdict
from collections.abc import Iterable
from pathlib import Path
from re import Pattern
from typing import Any
import numpy as np
import xarray as xr
import imod
from imod.formats import array_io
from imod.typing.structured import merge_partitions
# Make sure we can still use the built-in function...
f_open = open
def _read(path, headersize, nrow, ncol, nodata, dtype):
"""
Read a single IDF file to a numpy.ndarray
Parameters
----------
path : str or Path
Path to the IDF file to be read
headersize : int
byte size of header
nrow : int
ncol : int
nodata : np.float
Returns
-------
numpy.ndarray
A float numpy.ndarray with shape (nrow, ncol) of the values
in the IDF file. On opening all nodata values are changed
to NaN in the numpy.ndarray.
"""
with f_open(path, "rb") as f:
f.seek(headersize)
a = np.reshape(np.fromfile(f, dtype, nrow * ncol), (nrow, ncol))
return array_io.reading._to_nan(a, nodata)
def read(path, pattern=None):
"""
Read a single IDF file to a numpy.ndarray
Parameters
----------
path : str or Path
Path to the IDF file to be read
pattern : str, regex pattern, optional
If the filenames do match default naming conventions of
{name}_{time}_l{layer}, a custom pattern can be defined here either
as a string, or as a compiled regular expression pattern. Please refer
to the examples for ``imod.idf.open``.
Returns
-------
numpy.ndarray
A float numpy.ndarray with shape (nrow, ncol) of the values
in the IDF file. On opening all nodata values are changed
to NaN in the numpy.ndarray.
dict
A dict with all metadata.
"""
warnings.warn(
"The idf.read() function is deprecated. To get a numpy array of an IDF, "
"use instead: imod.idf.open(path).values",
FutureWarning,
)
attrs = header(path, pattern)
headersize = attrs.pop("headersize")
nrow = attrs.pop("nrow")
ncol = attrs.pop("ncol")
nodata = attrs.pop("nodata")
dtype = attrs.pop("dtype")
return _read(path, headersize, nrow, ncol, nodata, dtype), attrs
# Open IDFs for multiple times and/or layers into one DataArray
[docs]
def open(path, use_cftime=False, pattern=None):
r"""
Open one or more IDF files as an xarray.DataArray.
In accordance with xarray's design, ``open`` loads the data of IDF files
lazily. This means the data of the IDFs are not loaded into memory until the
data is needed. This allows for easier handling of large datasets, and
more efficient computations.
Parameters
----------
path : str, Path or list
This can be a single file, 'head_l1.idf', a glob pattern expansion,
'head_l*.idf', or a list of files, ['head_l1.idf', 'head_l2.idf'].
Note that each file needs to be of the same name (part before the
first underscore) but have a different layer and/or timestamp,
such that they can be combined in a single xarray.DataArray.
use_cftime : bool, optional
Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]`
for the time axis.
Dates are normally encoded as ``np.datetime64[ns]``; however, if dates
fall before 1678 or after 2261, they are automatically encoded as
``cftime.DatetimeProlepticGregorian`` objects rather than
``np.datetime64[ns]``.
pattern : str, regex pattern, optional
If the filenames do match default naming conventions of
{name}_{time}_l{layer}, a custom pattern can be defined here either
as a string, or as a compiled regular expression pattern. See the
examples below.
Returns
-------
xarray.DataArray
A float xarray.DataArray of the values in the IDF file(s).
All metadata needed for writing the file to IDF or other formats
using imod.rasterio are included in the xarray.DataArray.attrs.
Examples
--------
Open an IDF file:
>>> da = imod.idf.open("example.idf")
Open an IDF file, relying on default naming conventions to identify
layer:
>>> da = imod.idf.open("example_l1.idf")
Open an IDF file, relying on default naming conventions to identify layer
and time:
>>> head = imod.idf.open("head_20010101_l1.idf")
To ignore the naming conventions, specify ``pattern="{name}"``. This will
disable parsing of the filename into xarray coordinates.
>>> head = imod.idf.open("head_20010101_l1.idf", pattern="{name}")
Open multiple IDF files, in this case files for the year 2001 for all
layers, again relying on default conventions for naming:
>>> head = imod.idf.open("head_2001*_l*.idf")
The same, this time explicitly specifying ``name``, ``time``, and ``layer``:
>>> head = imod.idf.open("head_2001*_l*.idf", pattern="{name}_{time}_l{layer}")
The format string pattern will only work on tidy paths, where variables are
separated by underscores. You can also pass a compiled regex pattern.
Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered.
>>> import re
>>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)", re.IGNORECASE)
>>> head = imod.idf.open("headL11", pattern=pattern)
However, this requires constructing regular expressions, which is
generally a fiddly process. Regex notation is also impossible to
remember. The website https://regex101.com is a nice help. Alternatively,
the most pragmatic solution may be to just rename your files.
"""
return array_io.reading._open(path, use_cftime, pattern, header, _read)
def _more_than_one_unique_value(values: Iterable[Any]):
"""Returns if more than one unique value in list"""
return len(set(values)) != 1
[docs]
def open_subdomains(
path: str | Path, use_cftime: bool = False, pattern: str | Pattern = None
) -> xr.DataArray:
"""
Combine IDF files of multiple subdomains.
Parameters
----------
path : str or Path
Global path.
use_cftime : bool, optional
pattern : str, regex pattern, optional
If no pattern is provided, the function will first try:
"{name}_c{species}_{time}_l{layer}_p{subdomain}"
and if that fails:
"{name}_{time}_l{layer}_p{subdomain}"
Following the iMOD5/iMOD-WQ filename conventions.
Returns
-------
xarray.DataArray
"""
paths = sorted(glob.glob(str(path)))
if pattern is None:
# If no pattern provided test if
pattern = "{name}_c{species}_{time}_l{layer}_p{subdomain}"
re_pattern_species = imod.util.path._custom_pattern_to_regex_pattern(pattern)
has_species = re_pattern_species.search(paths[0])
if not has_species:
pattern = "{name}_{time}_l{layer}_p{subdomain}"
parsed = [imod.util.path.decompose(path, pattern) for path in paths]
grouped = defaultdict(list)
for match, path in zip(parsed, paths):
try:
key = match["subdomain"]
except KeyError as e:
raise KeyError(f"{e} in path: {path} with pattern: {pattern}")
grouped[key].append(path)
n_idf_per_subdomain = {
subdomain_id: len(path_ls) for subdomain_id, path_ls in grouped.items()
}
if _more_than_one_unique_value(n_idf_per_subdomain.values()):
raise ValueError(
f"Each subdomain must have the same number of IDF files, found: {n_idf_per_subdomain}"
)
das = []
for pathlist in grouped.values():
da = open(pathlist, use_cftime=use_cftime, pattern=pattern)
da = da.isel(subdomain=0, drop=True)
das.append(da)
name = das[0].name
return merge_partitions(das)[name] # as DataArray for backwards compatibility
[docs]
def open_dataset(globpath, use_cftime=False, pattern=None):
"""
Open a set of IDFs to a dict of xarray.DataArrays.
Compared to imod.idf.open, this function lets you open multiple parameters
at once (for example kh values and starting heads of a model), which will
each be a separate entry in a dictionary, with as key the parameter name,
and as value the xarray.DataArray.
Parameters
----------
globpath : str or Path
A glob pattern expansion such as ``'model/**/*.idf'``, which recursively
finds all IDF files under the model directory. Note that files with
the same name (part before the first underscore) wil be combined into
a single xarray.DataArray.
use_cftime : bool, optional
Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]`
for the time axis.
Dates are normally encoded as ``np.datetime64[ns]``; however, if dates
fall before 1679 or after 2262, they are automatically encoded as
``cftime.DatetimeProlepticGregorian`` objects rather than
``np.datetime64[ns]``.
pattern : str, regex pattern, optional
If the filenames do match default naming conventions of
{name}_{time}_l{layer}, a custom pattern can be defined here either
as a string, or as a compiled regular expression pattern. Please refer
to the examples for ``imod.idf.open``.
Returns
-------
dictionary
Dictionary of str (parameter name) to xarray.DataArray.
All metadata needed for writing the file to IDF or other formats
using imod.rasterio are included in the xarray.DataArray.attrs.
"""
# convert since for Path.glob non-relative patterns are unsupported
if isinstance(globpath, pathlib.Path):
globpath = str(globpath)
paths = [pathlib.Path(p) for p in glob.glob(globpath, recursive=True)]
n = len(paths)
if n == 0:
raise FileNotFoundError("Could not find any files matching {}".format(globpath))
# group the DataArrays together using their name
# note that directory names are ignored, and in case of duplicates, the last one wins
names = [imod.util.path.decompose(path, pattern)["name"] for path in paths]
unique_names = list(np.unique(names))
d = {}
for n in unique_names:
d[n] = [] # prepare empty lists to append to
for p, n in zip(paths, names):
d[n].append(p)
# load each group into a DataArray
das = [
array_io.reading._load(v, use_cftime, pattern, _read, header)
for v in d.values()
]
# store each DataArray under it's own name in a dictionary
dd = {da.name: da for da in das}
# Initially I wanted to return a xarray Dataset here,
# but then realised that it is not always aligned, and therefore not possible, see
# https://github.com/pydata/xarray/issues/1471#issuecomment-313719395
# It is not aligned when some parameters only have a non empty subset of a dimension,
# such as L2 + L3. This dict provides a similar interface anyway. If a Dataset is constructed
# from unaligned DataArrays it will make copies of the data, which we don't want.
return dd
def write(path, a, nodata=1.0e20, dtype=np.float32):
"""
Write a 2D xarray.DataArray to a IDF file
Parameters
----------
path : str or Path
Path to the IDF file to be written
a : xarray.DataArray
DataArray to be written. It needs to have exactly a.dims == ('y', 'x').
nodata : float, optional
Nodata value in the saved IDF files. Xarray uses nan values to represent
nodata, but these tend to work unreliably in iMOD(FLOW).
Defaults to a value of 1.0e20.
dtype : type, ``{np.float32, np.float64}``, default is ``np.float32``.
Whether to write single precision (``np.float32``) or double precision
(``np.float64``) IDF files.
"""
if not isinstance(a, xr.DataArray):
raise TypeError("Data to write must be an xarray.DataArray")
if not a.dims == ("y", "x"):
raise ValueError(
f"Dimensions must be exactly ('y', 'x'). Received {a.dims} instead."
)
flip = slice(None, None, -1)
if not a.indexes["x"].is_monotonic_increasing:
a = a.isel(x=flip)
if not a.indexes["y"].is_monotonic_decreasing:
a = a.isel(y=flip)
# TODO: check is_monotonic, but also for single col/row idfs...
# Header is fully doubled in size in case of double precision ...
# This means integers are also turned into 8 bytes
# and requires padding with some additional bytes
data_dtype = a.dtype
if dtype == np.float64:
if data_dtype != np.float64:
a = a.astype(np.float64)
reclenid = 2295
floatformat = "d"
intformat = "q"
doubleprecision = True
elif dtype == np.float32:
reclenid = 1271
floatformat = "f"
intformat = "i"
doubleprecision = False
if data_dtype != np.float32:
a = a.astype(np.float32)
else:
raise ValueError("Invalid dtype, IDF allows only np.float32 and np.float64")
# Only fillna if data can contain na values
if (data_dtype == np.float32) or (data_dtype == np.float64):
a = a.fillna(nodata)
with f_open(path, "wb") as f:
f.write(struct.pack("i", reclenid)) # Lahey RecordLength Ident.
if doubleprecision:
f.write(struct.pack("i", reclenid))
nrow = a.y.size
ncol = a.x.size
f.write(struct.pack(intformat, ncol))
f.write(struct.pack(intformat, nrow))
dx, xmin, xmax, dy, ymin, ymax = imod.util.spatial.spatial_reference(a)
f.write(struct.pack(floatformat, xmin))
f.write(struct.pack(floatformat, xmax))
f.write(struct.pack(floatformat, ymin))
f.write(struct.pack(floatformat, ymax))
f.write(struct.pack(floatformat, float(a.min()))) # dmin
f.write(struct.pack(floatformat, float(a.max()))) # dmax
f.write(struct.pack(floatformat, nodata))
if isinstance(dx, float) and isinstance(dy, float):
ieq = True # equidistant
else:
ieq = False # nonequidistant
f.write(struct.pack("?", not ieq)) # ieq
itb = False
if "z" in a.coords and "dz" in a.coords:
z = a.coords["z"]
dz = abs(a.coords["dz"])
try:
top = float(z + 0.5 * dz)
bot = float(z - 0.5 * dz)
itb = True
except TypeError: # not a scalar value
pass
f.write(struct.pack("?", itb))
f.write(struct.pack("xx")) # not used
if doubleprecision:
f.write(struct.pack("xxxx")) # not used
if ieq:
f.write(struct.pack(floatformat, abs(dx)))
f.write(struct.pack(floatformat, abs(dy)))
if itb:
f.write(struct.pack(floatformat, top))
f.write(struct.pack(floatformat, bot))
if not ieq:
np.abs(a.coords["dx"].values).astype(a.dtype).tofile(f)
np.abs(a.coords["dy"].values).astype(a.dtype).tofile(f)
a.values.tofile(f)
def _as_voxeldata(a):
"""
If "z" is present as a dimension, generate layer if necessary. Ensure that
layer is the dimension (via swap_dims). Infer "dz" if necessary, and if
possible.
Parameters
----------
a : xr.DataArray
Returns
-------
a : xr.DataArray
copy of input a, with swapped dims and dz added, if necessary.
"""
# Avoid side-effects
a = a.copy()
if "z" in a.coords:
if "z" in a.dims: # it's definitely 1D
# have to swap it with layer in this case
if "layer" not in a.coords:
a = a.assign_coords(layer=("z", np.arange(1, a["z"].size + 1)))
a = a.swap_dims({"z": "layer"})
# It'll raise an Error if it cannot infer dz
if "dz" not in a.coords:
dz, _, _ = imod.util.spatial.coord_reference(a["z"])
if isinstance(dz, float):
a = a.assign_coords(dz=dz)
else:
a = a.assign_coords(dz=("layer", dz))
elif len(a["z"].shape) == 1: # one dimensional
if "layer" in a.coords:
# Check if z depends only on layer
if tuple(a["z"].indexes.keys()) == ("layer",):
if "dz" not in a.coords:
# It'll raise an Error if it cannot infer dz
dz, _, _ = imod.util.spatial.coord_reference(a["z"])
if isinstance(dz, float):
a = a.assign_coords(dz=dz)
else:
a = a.assign_coords(dz=("layer", dz))
return a
[docs]
def save(path, a, nodata=1.0e20, pattern=None, dtype=np.float32):
"""
Write a xarray.DataArray to one or more IDF files
If the DataArray only has ``y`` and ``x`` dimensions, a single IDF file is
written, like the ``imod.idf.write`` function. This function is more general
and also supports ``time`` and ``layer`` dimensions. It will split these up,
give them their own filename according to the conventions in
``imod.util.path.compose``, and write them each.
Parameters
----------
path : str or Path
Path to the IDF file to be written. This function decides on the
actual filename(s) using conventions.
a : xarray.DataArray
DataArray to be written. It needs to have dimensions ('y', 'x'), and
optionally ``layer`` and ``time``.
nodata : float, optional
Nodata value in the saved IDF files. Xarray uses nan values to represent
nodata, but these tend to work unreliably in iMOD(FLOW).
Defaults to a value of 1.0e20.
pattern : str
Format string which defines how to create the filenames. See examples.
dtype : type, ``{np.float32, np.float64}``, default is ``np.float32``.
Whether to write single precision (``np.float32``) or double precision
(``np.float64``) IDF files.
Example
-------
Consider a DataArray ``da`` that has dimensions ``('layer', 'y', 'x')``, with the
layer dimension consisting of layer 1 and 2:
>>> imod.idf.save('path/to/head', da)
This writes the following two IDF files: ``path/to/head_l1.idf`` and
``path/to/head_l2.idf``.
To disable adding coordinates to the files, specify ``pattern="{name}"``:
>>> imod.idf.save('path/to/head', da, pattern="{name}")
The ".idf" extension will always be added automatically.
It is possible to generate custom filenames using a format string. The
default filenames would be generated by the following format string:
>>> imod.idf.save("example", pattern="{name}_l{layer}{extension}")
If you desire zero-padded numbers that show up neatly sorted in a
file manager, you may specify:
>>> imod.idf.save("example", pattern="{name}_l{layer:02d}{extension}")
In this case, a 0 will be padded for single digit numbers ('1' will become
'01').
To get a date with dashes, use the following pattern:
>>> pattern="{name}_{time:%Y-%m-%d}_l{layer}{extension}"
"""
# Cast datatype if necessary
if dtype not in (np.float32, np.float64):
raise ValueError("Invalid dtype, IDF allows only np.float32 and np.float64")
# Swap coordinates if possible, add "dz" if possible.
a = _as_voxeldata(a)
# Deal with path
path = pathlib.Path(path)
if path.suffix == "":
path = path.with_suffix(".idf")
array_io.writing._save(path, a, nodata, pattern, dtype, write)