Source code for movement.utils.reports

"""Utility functions for reporting missing data."""

import logging

import xarray as xr

logger = logging.getLogger(__name__)



[docs]
def calculate_nan_stats(
    data: xr.DataArray,
    keypoint: str | None = None,
    individual: str | None = None,
) -> str:
    """Calculate NaN stats for a given keypoint and individual.

    This function calculates the number and percentage of NaN points
    for a given keypoint and individual in the input data. A keypoint
    is considered NaN if any of its ``space`` coordinates are NaN.

    Parameters
    ----------
    data : xarray.DataArray
        The input data containing ``keypoints`` and ``individuals``
        dimensions.
    keypoint : str, optional
        The name of the keypoint for which to generate the report.
        If ``None``, it is assumed that the input data contains only
        one keypoint and this keypoint is used.
        Default is ``None``.
    individual : str, optional
        The name of the individual for which to generate the report.
        If ``None``, it is assumed that the input data contains only
        one individual and this individual is used.
        Default is ``None``.

    Returns
    -------
    str
        A string containing the report.

    """
    selection_criteria = {}
    if individual is not None:
        selection_criteria["individuals"] = individual
    if keypoint is not None:
        selection_criteria["keypoints"] = keypoint
    selected_data = (
        data.sel(**selection_criteria) if selection_criteria else data
    )
    n_nans = selected_data.isnull().any(["space"]).sum(["time"]).item()
    n_points = selected_data.time.size
    percent_nans = round((n_nans / n_points) * 100, 1)
    return f"\n\t\t{keypoint}: {n_nans}/{n_points} ({percent_nans}%)"




[docs]
def report_nan_values(da: xr.DataArray, label: str | None = None) -> str:
    """Report the number and percentage of keypoints that are NaN.

    Numbers are reported for each individual and keypoint in the data.

    Parameters
    ----------
    da : xarray.DataArray
        The input data containing ``keypoints`` and ``individuals``
        dimensions.
    label : str, optional
        Label to identify the data in the report. If not provided,
        the name of the DataArray is used as the label.
        Default is ``None``.

    Returns
    -------
    str
        A string containing the report.

    """
    # Compile the report
    label = label or da.name
    nan_report = f"\nMissing points (marked as NaN) in {label}"
    # Check if the data has individuals and keypoints dimensions
    has_individuals_dim = "individuals" in da.dims
    has_keypoints_dim = "keypoints" in da.dims
    # Default values for individuals and keypoints
    individuals = da.individuals.values if has_individuals_dim else [None]
    keypoints = da.keypoints.values if has_keypoints_dim else [None]

    for ind in individuals:
        ind_name = ind if ind is not None else da.individuals.item()
        nan_report += f"\n\tIndividual: {ind_name}"
        for kp in keypoints:
            nan_report += calculate_nan_stats(da, keypoint=kp, individual=ind)
    # Write nan report to logger
    logger.info(nan_report)
    return nan_report