Source code for movement.validators.arrays

"""Validators for data arrays."""

from collections.abc import Hashable

import xarray as xr

from movement.utils.logging import log_error


[docs] def validate_dims_coords( data: xr.DataArray, required_dim_coords: dict[str, list[str] | list[Hashable]], exact_coords: bool = False, ) -> None: """Validate dimensions and coordinates in a data array. This function raises a ValueError if the specified dimensions and coordinates are not present in the input data array. By default, each dimension must contain *at least* the specified coordinates. Pass ``exact_coords=True`` to require that each dimension contains *exactly* the specified coordinates (and no others). Parameters ---------- data : xarray.DataArray The input data array to validate. required_dim_coords : dict of {str: list of str | list of Hashable} A dictionary mapping required dimensions to a list of required coordinate values along each dimension. exact_coords : bool, optional If False (default), checks only that the listed coordinates exist in each dimension. If True, checks that each dimension has exactly the specified coordinates and no more. The exactness check is completely skipped for dimensions with no required coordinates. Examples -------- Validate that a data array contains the dimension 'time'. No specific coordinates are required. >>> validate_dims_coords(data, {"time": []}) Validate that a data array contains the dimensions 'time' and 'space', and that the 'space' dimension contains the coordinates 'x' and 'y'. >>> validate_dims_coords(data, {"time": [], "space": ["x", "y"]}) Enforce that 'space' has *only* 'x' and 'y', and no other coordinates: >>> validate_dims_coords(data, {"space": ["x", "y"]}, exact_coords=True) Raises ------ ValueError If the input data does not contain the required dimension(s) and/or the required coordinate(s). """ # 1. Check that all required dimensions are present missing_dims = [dim for dim in required_dim_coords if dim not in data.dims] error_message = "" if missing_dims: error_message += ( f"Input data must contain {missing_dims} as dimensions.\n" ) # 2. For each dimension, check the presence of required coords for dim, coords in required_dim_coords.items(): dim_coords_in_data = data.coords.get(dim, []) missing_coords = [c for c in coords if c not in dim_coords_in_data] if missing_coords: error_message += ( f"Input data must contain {missing_coords} " f"in the '{dim}' coordinates.\n" ) # 3. If exact_coords is True, verify no extra coords exist if exact_coords and coords: extra_coords = [c for c in dim_coords_in_data if c not in coords] if extra_coords: error_message += ( f"Dimension '{dim}' must only contain " f"{coords} as coordinates, " f"but it also has {list(extra_coords)}.\n" ) if error_message: raise log_error(ValueError, error_message)