"""``attrs`` classes for validating data structures."""
from collections.abc import Iterable
from typing import Any
import attrs
import numpy as np
from attrs import converters, define, field, validators
from movement.utils.logging import log_error, log_warning
def _convert_to_list_of_str(value: str | Iterable[Any]) -> list[str]:
"""Try to coerce the value into a list of strings."""
if isinstance(value, str):
log_warning(
f"Invalid value ({value}). Expected a list of strings. "
"Converting to a list of length 1."
)
return [value]
elif isinstance(value, Iterable):
return [str(item) for item in value]
else:
raise log_error(
ValueError, f"Invalid value ({value}). Expected a list of strings."
)
def _convert_fps_to_none_if_invalid(fps: float | None) -> float | None:
"""Set fps to None if a non-positive float is passed."""
if fps is not None and fps <= 0:
log_warning(
f"Invalid fps value ({fps}). Expected a positive number. "
"Setting fps to None."
)
return None
return fps
def _validate_type_ndarray(value: Any) -> None:
"""Raise ValueError the value is a not numpy array."""
if not isinstance(value, np.ndarray):
raise log_error(
ValueError, f"Expected a numpy array, but got {type(value)}."
)
def _validate_array_shape(
attribute: attrs.Attribute, value: np.ndarray, expected_shape: tuple
):
"""Raise ValueError if the value does not have the expected shape."""
if value.shape != expected_shape:
raise log_error(
ValueError,
f"Expected '{attribute.name}' to have shape {expected_shape}, "
f"but got {value.shape}.",
)
def _validate_list_length(
attribute: attrs.Attribute, value: list | None, expected_length: int
):
"""Raise a ValueError if the list does not have the expected length."""
if (value is not None) and (len(value) != expected_length):
raise log_error(
ValueError,
f"Expected '{attribute.name}' to have length {expected_length}, "
f"but got {len(value)}.",
)
[docs]
@define(kw_only=True)
class ValidPosesDataset:
"""Class for validating poses data intended for a ``movement`` dataset.
The validator ensures that within the ``movement poses`` dataset:
- The required ``position_array`` is a numpy array
with the last dimension containing 2 or 3 spatial coordinates.
- The optional ``confidence_array``, if provided, is a numpy array
with its shape matching the first three dimensions of the
``position_array``; otherwise, it defaults to an array of NaNs.
- The optional ``individual_names`` and ``keypoint_names``,
if provided, match the number of individuals and keypoints
in the dataset, respectively; otherwise, default names are assigned.
- The optional ``fps`` is a positive float; otherwise, it defaults to None.
- The optional ``source_software`` is a string; otherwise,
it defaults to None.
Attributes
----------
position_array : np.ndarray
Array of shape (n_frames, n_individuals, n_keypoints, n_space)
containing the poses.
confidence_array : np.ndarray, optional
Array of shape (n_frames, n_individuals, n_keypoints) containing
the point-wise confidence scores.
If None (default), the scores will be set to an array of NaNs.
individual_names : list of str, optional
List of unique names for the individuals in the video. If None
(default), the individuals will be named "individual_0",
"individual_1", etc.
keypoint_names : list of str, optional
List of unique names for the keypoints in the skeleton. If None
(default), the keypoints will be named "keypoint_0", "keypoint_1",
etc.
fps : float, optional
Frames per second of the video. Defaults to None.
source_software : str, optional
Name of the software from which the poses were loaded.
Defaults to None.
Raises
------
ValueError
If the dataset does not meet the ``movement poses``
dataset requirements.
"""
# Required attributes
position_array: np.ndarray = field()
# Optional attributes
confidence_array: np.ndarray | None = field(default=None)
individual_names: list[str] | None = field(
default=None,
converter=converters.optional(_convert_to_list_of_str),
)
keypoint_names: list[str] | None = field(
default=None,
converter=converters.optional(_convert_to_list_of_str),
)
fps: float | None = field(
default=None,
converter=converters.pipe( # type: ignore
converters.optional(float), _convert_fps_to_none_if_invalid
),
)
source_software: str | None = field(
default=None,
validator=validators.optional(validators.instance_of(str)),
)
# Add validators
@position_array.validator
def _validate_position_array(self, attribute, value):
_validate_type_ndarray(value)
if value.ndim != 4:
raise log_error(
ValueError,
f"Expected '{attribute.name}' to have 4 dimensions, "
f"but got {value.ndim}.",
)
if value.shape[-1] not in [2, 3]:
raise log_error(
ValueError,
f"Expected '{attribute.name}' to have 2 or 3 spatial "
f"dimensions, but got {value.shape[-1]}.",
)
@confidence_array.validator
def _validate_confidence_array(self, attribute, value):
if value is not None:
_validate_type_ndarray(value)
_validate_array_shape(
attribute, value, expected_shape=self.position_array.shape[:-1]
)
@individual_names.validator
def _validate_individual_names(self, attribute, value):
if self.source_software == "LightningPose":
# LightningPose only supports a single individual
_validate_list_length(attribute, value, 1)
else:
_validate_list_length(
attribute, value, self.position_array.shape[1]
)
@keypoint_names.validator
def _validate_keypoint_names(self, attribute, value):
_validate_list_length(attribute, value, self.position_array.shape[2])
def __attrs_post_init__(self):
"""Assign default values to optional attributes (if None)."""
if self.confidence_array is None:
self.confidence_array = np.full(
(self.position_array.shape[:-1]), np.nan, dtype="float32"
)
log_warning(
"Confidence array was not provided."
"Setting to an array of NaNs."
)
if self.individual_names is None:
self.individual_names = [
f"individual_{i}" for i in range(self.position_array.shape[1])
]
log_warning(
"Individual names were not provided. "
f"Setting to {self.individual_names}."
)
if self.keypoint_names is None:
self.keypoint_names = [
f"keypoint_{i}" for i in range(self.position_array.shape[2])
]
log_warning(
"Keypoint names were not provided. "
f"Setting to {self.keypoint_names}."
)
[docs]
@define(kw_only=True)
class ValidBboxesDataset:
"""Class for validating bounding boxes' data for a ``movement`` dataset.
The validator considers 2D bounding boxes only. It ensures that
within the ``movement bboxes`` dataset:
- The required ``position_array`` and ``shape_array`` are numpy arrays,
with the last dimension containing 2 spatial coordinates.
- The optional ``confidence_array``, if provided, is a numpy array
with its shape matching the first two dimensions of the
``position_array``; otherwise, it defaults to an array of NaNs.
- The optional ``individual_names``, if provided, match the number of
individuals in the dataset; otherwise, default names are assigned.
- The optional ``frame_array``, if provided, is a column vector
with the frame numbers; otherwise, it defaults to an array of
0-based integers.
- The optional ``fps`` is a positive float; otherwise, it defaults to None.
- The optional ``source_software`` is a string; otherwise, it defaults to
None.
Attributes
----------
position_array : np.ndarray
Array of shape (n_frames, n_individuals, n_space)
containing the tracks of the bounding boxes' centroids.
shape_array : np.ndarray
Array of shape (n_frames, n_individuals, n_space)
containing the shape of the bounding boxes. The shape of a bounding
box is its width (extent along the x-axis of the image) and height
(extent along the y-axis of the image).
confidence_array : np.ndarray, optional
Array of shape (n_frames, n_individuals) containing
the confidence scores of the bounding boxes. If None (default), the
confidence scores are set to an array of NaNs.
individual_names : list of str, optional
List of individual names for the tracked bounding boxes in the video.
If None (default), bounding boxes are assigned names based on the size
of the ``position_array``. The names will be in the format of
``id_<N>``, where <N> is an integer from 0 to
``position_array.shape[1]-1``.
frame_array : np.ndarray, optional
Array of shape (n_frames, 1) containing the frame numbers for which
bounding boxes are defined. If None (default), frame numbers will
be assigned based on the first dimension of the ``position_array``,
starting from 0.
fps : float, optional
Frames per second defining the sampling rate of the data.
Defaults to None.
source_software : str, optional
Name of the software that generated the data. Defaults to None.
Raises
------
ValueError
If the dataset does not meet the ``movement bboxes`` dataset
requirements.
"""
# Required attributes
position_array: np.ndarray = field()
shape_array: np.ndarray = field()
# Optional attributes
confidence_array: np.ndarray | None = field(default=None)
individual_names: list[str] | None = field(
default=None,
converter=converters.optional(
_convert_to_list_of_str
), # force into list of strings if not
)
frame_array: np.ndarray | None = field(default=None)
fps: float | None = field(
default=None,
converter=converters.pipe( # type: ignore
converters.optional(float), _convert_fps_to_none_if_invalid
),
)
source_software: str | None = field(
default=None,
validator=validators.optional(validators.instance_of(str)),
)
# Validators
@position_array.validator
@shape_array.validator
def _validate_position_and_shape_arrays(self, attribute, value):
_validate_type_ndarray(value)
# check last dimension (spatial) has 2 coordinates
n_expected_spatial_coordinates = 2
if value.shape[-1] != n_expected_spatial_coordinates:
raise log_error(
ValueError,
f"Expected '{attribute.name}' to have 2 spatial coordinates, "
f"but got {value.shape[-1]}.",
)
@individual_names.validator
def _validate_individual_names(self, attribute, value):
if value is not None:
_validate_list_length(
attribute, value, self.position_array.shape[1]
)
# check n_individual_names are unique
# NOTE: combined with the requirement above, we are enforcing
# unique IDs per frame
if len(value) != len(set(value)):
raise log_error(
ValueError,
"individual_names passed to the dataset are not unique. "
f"There are {len(value)} elements in the list, but "
f"only {len(set(value))} are unique.",
)
@confidence_array.validator
def _validate_confidence_array(self, attribute, value):
if value is not None:
_validate_type_ndarray(value)
_validate_array_shape(
attribute, value, expected_shape=self.position_array.shape[:-1]
)
@frame_array.validator
def _validate_frame_array(self, attribute, value):
if value is not None:
_validate_type_ndarray(value)
# should be a column vector (n_frames, 1)
_validate_array_shape(
attribute,
value,
expected_shape=(self.position_array.shape[0], 1),
)
# check frames are continuous: exactly one frame number per row
if not np.all(np.diff(value, axis=0) == 1):
raise log_error(
ValueError,
f"Frame numbers in {attribute.name} are not continuous.",
)
# Define defaults
def __attrs_post_init__(self):
"""Assign default values to optional attributes (if None).
If no confidence_array is provided, set it to an array of NaNs.
If no individual names are provided, assign them unique IDs per frame,
starting with 0 ("id_0").
"""
# assign default confidence_array
if self.confidence_array is None:
self.confidence_array = np.full(
(self.position_array.shape[:-1]),
np.nan,
dtype="float32",
)
log_warning(
"Confidence array was not provided. "
"Setting to an array of NaNs."
)
# assign default individual_names
if self.individual_names is None:
self.individual_names = [
f"id_{i}" for i in range(self.position_array.shape[1])
]
log_warning(
"Individual names for the bounding boxes "
"were not provided. "
"Setting to 0-based IDs that are unique per frame: \n"
f"{self.individual_names}.\n"
)
# assign default frame_array
if self.frame_array is None:
n_frames = self.position_array.shape[0]
self.frame_array = np.arange(n_frames).reshape(-1, 1)
log_warning(
"Frame numbers were not provided. "
"Setting to an array of 0-based integers."
)