Source code for movement.utils.reports

"""Utility functions for reporting missing data."""

import logging

import xarray as xr

logger = logging.getLogger(__name__)


[docs] def calculate_nan_stats( data: xr.DataArray, keypoint: str | None = None, individual: str | None = None, ) -> str: """Calculate NaN stats for a given keypoint and individual. This function calculates the number and percentage of NaN points for a given keypoint and individual in the input data. A keypoint is considered NaN if any of its ``space`` coordinates are NaN. Parameters ---------- data : xarray.DataArray The input data containing ``keypoints`` and ``individuals`` dimensions. keypoint : str, optional The name of the keypoint for which to generate the report. If ``None``, it is assumed that the input data contains only one keypoint and this keypoint is used. Default is ``None``. individual : str, optional The name of the individual for which to generate the report. If ``None``, it is assumed that the input data contains only one individual and this individual is used. Default is ``None``. Returns ------- str A string containing the report. """ selection_criteria = {} if individual is not None: selection_criteria["individuals"] = individual if keypoint is not None: selection_criteria["keypoints"] = keypoint selected_data = ( data.sel(**selection_criteria) if selection_criteria else data ) n_nans = selected_data.isnull().any(["space"]).sum(["time"]).item() n_points = selected_data.time.size percent_nans = round((n_nans / n_points) * 100, 1) return f"\n\t\t{keypoint}: {n_nans}/{n_points} ({percent_nans}%)"
[docs] def report_nan_values(da: xr.DataArray, label: str | None = None) -> str: """Report the number and percentage of keypoints that are NaN. Numbers are reported for each individual and keypoint in the data. Parameters ---------- da : xarray.DataArray The input data containing ``keypoints`` and ``individuals`` dimensions. label : str, optional Label to identify the data in the report. If not provided, the name of the DataArray is used as the label. Default is ``None``. Returns ------- str A string containing the report. """ # Compile the report label = label or da.name nan_report = f"\nMissing points (marked as NaN) in {label}" # Check if the data has individuals and keypoints dimensions has_individuals_dim = "individuals" in da.dims has_keypoints_dim = "keypoints" in da.dims # Default values for individuals and keypoints individuals = da.individuals.values if has_individuals_dim else [None] keypoints = da.keypoints.values if has_keypoints_dim else [None] for ind in individuals: ind_name = ind if ind is not None else da.individuals.item() nan_report += f"\n\tIndividual: {ind_name}" for kp in keypoints: nan_report += calculate_nan_stats(da, keypoint=kp, individual=ind) # Write nan report to logger logger.info(nan_report) return nan_report