"""Load bounding boxes tracking data into ``movement``."""
import warnings
from pathlib import Path
from typing import Literal, cast
import numpy as np
import xarray as xr
from movement.io.load import register_loader
from movement.utils.logging import logger
from movement.validators.datasets import ValidBboxesInputs
from movement.validators.files import DEFAULT_FRAME_REGEXP, ValidVIATracksCSV
[docs]
def from_numpy(
position_array: np.ndarray,
shape_array: np.ndarray,
confidence_array: np.ndarray | None = None,
individual_names: list[str] | None = None,
frame_array: np.ndarray | None = None,
fps: float | None = None,
source_software: str | None = None,
) -> xr.Dataset:
"""Create a ``movement`` bounding boxes dataset from NumPy arrays.
Parameters
----------
position_array
Array of shape (n_frames, n_space, n_individuals)
containing the tracks of the bounding box centroids.
It will be converted to a :class:`xarray.DataArray` object
named "position".
shape_array
Array of shape (n_frames, n_space, n_individuals)
containing the shape of the bounding boxes. The shape of a bounding
box is its width (extent along the x-axis of the image) and height
(extent along the y-axis of the image). It will be converted to a
:class:`xarray.DataArray` object named "shape".
confidence_array
Array of shape (n_frames, n_individuals) containing
the confidence scores of the bounding boxes. If None (default), the
confidence scores are set to an array of NaNs. It will be converted
to a :class:`xarray.DataArray` object named "confidence".
individual_names
List of individual names for the tracked bounding boxes in the video.
If None (default), bounding boxes are assigned names based on the size
of the ``position_array``. The names will be in the format of
``id_<N>``, where <N> is an integer from 0 to
``position_array.shape[-1]-1`` (i.e., "id_0", "id_1"...).
frame_array
Array of shape (n_frames, 1) containing the frame numbers for which
bounding boxes are defined. If None (default), frame numbers will
be assigned based on the first dimension of the ``position_array``,
starting from 0. If a specific array of frame numbers is provided,
these need to be integers sorted in increasing order.
fps
The video sampling rate. If None (default), the ``time`` coordinates
of the resulting ``movement`` dataset will be in frame numbers. If
``fps`` is provided, the ``time`` coordinates will be in seconds. If
the ``time`` coordinates are in seconds, they will indicate the
elapsed time from the capture of the first frame (assumed to be frame
0).
source_software
Name of the software that generated the data. Defaults to None.
Returns
-------
xarray.Dataset
``movement`` dataset containing the position, shape, and confidence
scores of the tracked bounding boxes, and any associated metadata.
Examples
--------
Create random position data for two bounding boxes, ``id_0`` and ``id_1``,
with the same width (40 pixels) and height (30 pixels). These are tracked
in 2D space for 100 frames, which are numbered from the start frame 1200
to the end frame 1299. The confidence score for all bounding boxes is set
to 0.5.
>>> import numpy as np
>>> from movement.io import load_bboxes
>>> rng = np.random.default_rng(seed=42)
>>> ds = load_bboxes.from_numpy(
... position_array=rng.random((100, 2, 2)),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... frame_array=np.arange(1200, 1300).reshape(-1, 1),
... )
Create a dataset with the same data as above, but with the time
coordinates in seconds. We use a video sampling rate of 60 fps. The time
coordinates in the resulting dataset will indicate the elapsed time from
the capture of the 0th frame. So for the frames 1200, 1201, 1203,... 1299
the corresponding time coordinates in seconds will be 20, 20.0167,
20.033,... 21.65 s.
>>> ds = load_bboxes.from_numpy(
... position_array=rng.random((100, 2, 2)),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... frame_array=np.arange(1200, 1300).reshape(-1, 1),
... fps=60,
... )
Create a dataset with the same data as above, but express the time
coordinate in frames, and assume the first tracked frame is frame 0.
To do this, we simply omit the ``frame_array`` input argument.
>>> ds = load_bboxes.from_numpy(
... position_array=rng.random((100, 2, 2)),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... )
Create a dataset with the same data as above, but express the time
coordinate in seconds, and assume the first tracked frame is captured
at time = 0 seconds. To do this, we omit the ``frame_array`` input argument
and pass an ``fps`` value.
>>> ds = load_bboxes.from_numpy(
... position_array=rng.random((100, 2, 2)),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... fps=60,
... )
"""
valid_bboxes_inputs = ValidBboxesInputs(
position_array=position_array,
shape_array=shape_array,
confidence_array=confidence_array,
individual_names=individual_names,
frame_array=frame_array,
fps=fps,
source_software=source_software,
)
return valid_bboxes_inputs.to_dataset()
[docs]
def from_file(
file: Path | str,
source_software: Literal["VIA-tracks"],
fps: float | None = None,
use_frame_numbers_from_file: bool = False,
frame_regexp: str = DEFAULT_FRAME_REGEXP,
) -> xr.Dataset:
"""Create a ``movement`` bounding boxes dataset from a supported file.
.. deprecated:: 0.14.0
This function is deprecated and will be removed in a future release.
Use :func:`movement.io.load_dataset<movement.io.load.load_dataset>`
instead.
At the moment, we only support VIA tracks .csv files.
Parameters
----------
file
Path to the file containing the tracked bounding boxes. Currently
only VIA tracks .csv files are supported.
source_software
The source software of the file. Currently only files from the
VIA 2.0.12 annotator [1]_ ("VIA-tracks") are supported.
See .
fps
The video sampling rate. If None (default), the ``time`` coordinates
of the resulting ``movement`` dataset will be in frame numbers. If
``fps`` is provided, the ``time`` coordinates will be in seconds. If
the ``time`` coordinates are in seconds, they will indicate the
elapsed time from the capture of the first frame (assumed to be frame
0).
use_frame_numbers_from_file
If True, the frame numbers in the resulting dataset are
the same as the ones specified for each tracked bounding box in the
input file. This may be useful if the bounding boxes are tracked for a
subset of frames in a video, but you want to maintain the start of the
full video as the time origin. If False (default), the frame numbers
in the VIA tracks .csv file are instead mapped to a 0-based sequence of
consecutive integers.
frame_regexp
Regular expression pattern to extract the frame number from the frame
filename. By default, the frame number is expected to be encoded in
the filename as an integer number led by at least one zero, followed
by the file extension. Only used if ``use_frame_numbers_from_file`` is
True.
Returns
-------
xarray.Dataset
``movement`` dataset containing the position, shape, and confidence
scores of the tracked bounding boxes, and any associated metadata.
See Also
--------
movement.io.load_bboxes.from_via_tracks_file
References
----------
.. [1] https://www.robots.ox.ac.uk/~vgg/software/via/
Examples
--------
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in seconds, and assuming t = 0 seconds corresponds to
the first tracked frame in the file.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_file(
>>> "path/to/file.csv",
>>> source_software="VIA-tracks",
>>> fps=30,
>>> )
"""
warnings.warn(
"The function `movement.io.load_bboxes.from_file` is deprecated"
" and will be removed in a future release. "
"Please use `movement.io.load_dataset` instead.",
DeprecationWarning,
stacklevel=2,
)
if source_software == "VIA-tracks":
return from_via_tracks_file(
file,
fps,
use_frame_numbers_from_file=use_frame_numbers_from_file,
frame_regexp=frame_regexp,
)
else:
raise logger.error(
ValueError(f"Unsupported source software: {source_software}")
)
[docs]
@register_loader("VIA-tracks", file_validators=[ValidVIATracksCSV])
def from_via_tracks_file(
file: str | Path,
fps: float | None = None,
use_frame_numbers_from_file: bool = False,
frame_regexp: str = DEFAULT_FRAME_REGEXP,
) -> xr.Dataset:
"""Create a ``movement`` dataset from a VIA tracks .csv file.
Parameters
----------
file
Path to the VIA tracks .csv file with the tracked bounding boxes.
For more information on the VIA tracks .csv file format, see the VIA
tutorial for tracking [1]_.
fps
The video sampling rate. If None (default), the ``time`` coordinates
of the resulting ``movement`` dataset will be in frame numbers. If
``fps`` is provided, the ``time`` coordinates will be in seconds. If
the ``time`` coordinates are in seconds, they will indicate the
elapsed time from the capture of the first frame (assumed to be frame
0).
use_frame_numbers_from_file
If True, the frame numbers in the resulting dataset are
the same as the ones in the VIA tracks .csv file. This may be useful if
the bounding boxes are tracked for a subset of frames in a video,
but you want to maintain the start of the full video as the time
origin. If False (default), the frame numbers in the VIA tracks .csv
file are instead mapped to a 0-based sequence of consecutive integers.
frame_regexp
Regular expression pattern to extract the frame number from the frame
filename. By default, the frame number is expected to be encoded in
the filename as an integer number led by at least one zero, followed
by the file extension. Only used if ``use_frame_numbers_from_file`` is
True.
Returns
-------
xarray.Dataset
``movement`` dataset containing the position, shape, and confidence
scores of the tracked bounding boxes, and any associated metadata.
Notes
-----
Note that the x,y coordinates in the input VIA tracks .csv file
represent the the top-left corner of each bounding box. Instead the
corresponding ``movement`` dataset holds in its ``position`` array the
centroid of each bounding box.
Additionally, the bounding boxes IDs specified in the "track" field of
the VIA tracks .csv file are mapped to the ``individuals`` dimension in the
``movement`` dataset. The individual names follow the format ``id_<N>``,
with N being the bounding box ID.
References
----------
.. [1] https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html
Examples
--------
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in frames, and setting the first tracked frame in the
file as frame 0.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... )
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in seconds, and assuming t = 0 seconds corresponds to
the first tracked frame in the file.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... fps=30,
... )
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in frames, and using the same frame numbers as
in the VIA tracks .csv file.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... use_frame_numbers_from_file=True.
... )
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in seconds, and assuming t = 0 seconds corresponds to
the 0th frame in the full video.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... fps=30,
... use_frame_numbers_from_file=True,
... )
"""
# Note: the @register_loader decorator has already validated
# `file` as a ValidVIATracksCSV, and forwarded `frame_regexp`
# to it via _get_validator_kwargs.
valid_file = cast("ValidVIATracksCSV", file)
# Create an xarray.Dataset from the data
bboxes_arrays = _numpy_arrays_from_valid_via_object(valid_file)
ds = from_numpy(
position_array=bboxes_arrays["position_array"],
shape_array=bboxes_arrays["shape_array"],
confidence_array=bboxes_arrays["confidence_array"],
individual_names=[
f"id_{id}" for id in bboxes_arrays["ID_array"].flatten()
],
frame_array=(
bboxes_arrays["frame_array"]
if use_frame_numbers_from_file
else None
),
fps=fps,
source_software="VIA-tracks",
) # it validates the dataset via ValidBboxesInputs
# Add metadata as attributes
file_path = valid_file.file
ds.attrs["source_software"] = "VIA-tracks"
ds.attrs["source_file"] = file_path.as_posix()
logger.info(f"Loaded bounding boxes tracks from {file_path}:\n{ds}")
return ds
def _numpy_arrays_from_valid_via_object(
valid_via_file: ValidVIATracksCSV,
) -> dict:
"""Extract numpy arrays from VIA tracks file object.
The extracted numpy arrays are returned in a dictionary with the following
keys:
- position_array (n_frames, n_space, n_individuals):
contains the trajectories of the bounding box centroids.
- shape_array (n_frames, n_space, n_individuals):
contains the shape of the bounding boxes (width and height).
- confidence_array (n_frames, n_individuals):
contains the confidence score of each bounding box.
If no confidence scores are provided, they are set to an array of NaNs.
- ID_array (n_individuals, 1):
contains the integer IDs of the tracked bounding boxes.
- frame_array (n_frames, 1):
contains the frame numbers.
Parameters
----------
valid_via_file
A validated VIA tracks file object.
Returns
-------
dict
The validated bounding boxes arrays.
"""
# Get 1D data from the validator
x = valid_via_file.x
y = valid_via_file.y
w = valid_via_file.w
h = valid_via_file.h
ids = valid_via_file.ids
frame_numbers = valid_via_file.frame_numbers
confidence = valid_via_file.confidence
# Compute **sorted** unique IDs and frames
unique_ids = np.unique(ids)
unique_frames = np.unique(frame_numbers)
# Map each observation's ID and frame to an index in the output arrays
id_indices_per_obs = np.searchsorted(unique_ids, ids)
frame_indices_per_obs = np.searchsorted(unique_frames, frame_numbers)
# Initialise output dense arrays and fill with NaNs
n_individuals = len(unique_ids)
n_frames = len(unique_frames)
position_array = np.full(
(n_frames, 2, n_individuals), np.nan, dtype=np.float32
)
shape_array = np.full(
(n_frames, 2, n_individuals), np.nan, dtype=np.float32
)
confidence_array = np.full(
(n_frames, n_individuals), np.nan, dtype=np.float32
)
# Place sparse values directly into the output
# Position = centroid = top-left corner + half the bbox size
position_array[frame_indices_per_obs, 0, id_indices_per_obs] = x + w / 2
position_array[frame_indices_per_obs, 1, id_indices_per_obs] = y + h / 2
shape_array[frame_indices_per_obs, 0, id_indices_per_obs] = w
shape_array[frame_indices_per_obs, 1, id_indices_per_obs] = h
confidence_array[frame_indices_per_obs, id_indices_per_obs] = confidence
return {
"position_array": position_array,
"shape_array": shape_array,
"confidence_array": confidence_array,
"ID_array": unique_ids.reshape(-1, 1),
"frame_array": unique_frames.reshape(-1, 1),
}