Source code for xugrid.ugrid.polygonize

from typing import Tuple

import numpy as np
from scipy import sparse

from xugrid.constants import FILL_VALUE, IntArray


def _bbox_area(bounds):
    return (bounds[2] - bounds[0]) * (bounds[3] - bounds[1])


def _classify(
    i: IntArray, j: IntArray, face_values: np.ndarray
) -> Tuple[int, IntArray]:
    """
    Find out how many discrete polygons are created. Identify the connectivity,
    such that we can select a single polygon afterwards.

    Parameters
    ----------
    i: np.ndarray of int
        First face of the edge.
    j: np.ndarray of int
        Second face of the edge.
    face_values: np.ndarray

    Returns
    -------
    n_polygon: int
    polygon_id: np.ndarray of int
    """
    # Every face connects up to two faces. vi holds the values of the first face,
    # vj holds the value of the second face.
    vi = face_values[i]
    vj = face_values[j]
    n = face_values.size
    # For labelling, only those parts of the mesh that have the same value
    # should be connected with each other.
    # Since we dropped NaN values before, we needn't worry about those.
    is_connection = (i != FILL_VALUE) & (j != FILL_VALUE) & (vi == vj)
    i = i[is_connection]
    j = j[is_connection]
    ij = np.concatenate([i, j])
    ji = np.concatenate([j, i])
    coo_content = (ji, (ij, ji))
    # Make sure to explicitly set the matrix shape: otherwise, isolated
    # elements witout any connection might disappear, and connected_components
    # will not return a value for every face.
    coo_matrix = sparse.coo_matrix(coo_content, shape=(n, n))
    # We can classify the grid faces using this (reduced) connectivity
    return sparse.csgraph.connected_components(coo_matrix)



[docs]
def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame":  # type: ignore # noqa
    """
    Polygonize a UgridDataArray.

    This function creates vector polygons for all connected regions of cells
    (faces) in the Ugrid2d topology sharing a common value.

    The produced polygon edges will follow exactly the cell boundaries. When
    the data consists of many unique values (e.g. unbinned elevation data), the
    result will essentially be one polygon per face. In such cases, it is much
    more efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which
    directly converts every cell to a polygon. This function is meant for data
    with relatively few unique values such as classification results.

    Parameters
    ----------
    uda: UgridDataArray
        The DataArray should only contain the face dimension. Additional
        dimensions, such as time, are not allowed.

    Returns
    -------
    polygonized: GeoDataFrame
    """

    import geopandas as gpd
    import shapely

    facedim = uda.ugrid.grid.face_dimension
    if uda.dims != (facedim,):
        raise ValueError(
            "Cannot polygonize non-face dimensions. Expected only"
            f"({facedim},), but received {uda.dims}."
        )

    # First remove the NaN values. These will not be polygonized anyway.
    dropped = uda.dropna(dim=uda.ugrid.grid.face_dimension)
    face_values = dropped.to_numpy()
    grid = dropped.ugrid.grid
    i, j = grid.edge_face_connectivity.T
    n_polygon, polygon_id = _classify(i, j, face_values)

    # Now we identify for each label the subset of edges. These are the
    # "exterior" edges: either the exterior edge of the mesh identified by a
    # fill value, or by being connected to a cell with a different value.
    coordinates = grid.node_coordinates
    data_i = face_values[i]
    vi = polygon_id[i]
    vj = polygon_id[j]
    # Ensure that no result thas has been created by indexing with the
    # fill_value remains. Since polygon_id starts counting a 0, we may use -1.
    vi[i == FILL_VALUE] = FILL_VALUE
    vj[j == FILL_VALUE] = FILL_VALUE
    boundary = vi != vj

    polygons = []
    values = []
    for label in range(n_polygon):
        keep = ((vi == label) | (vj == label)) & boundary
        # The result of shapely polygonize is always a GeometryCollection.
        # Holes are included twice: once as holes in the largest body, and once
        # more as polygons on their own. We are interested in the largest
        # polygon, which we identify through its bounding box.
        edges = grid.edge_node_connectivity[keep]
        collection = shapely.polygonize(shapely.linestrings(coordinates[edges]))
        polygon = max(collection.geoms, key=lambda x: _bbox_area(x.bounds))
        # Find the first True value in keep, use that to fetch the polygon
        # value.
        value = data_i[keep.argmax()]
        polygons.append(polygon)
        values.append(value)

    return gpd.GeoDataFrame({"values": values}, geometry=polygons)