"""Load bounding boxes' tracking data into ``movement``."""
import ast
import logging
import re
from collections.abc import Callable
from pathlib import Path
from typing import Literal
import numpy as np
import pandas as pd
import xarray as xr
from movement import MovementDataset
from movement.utils.logging import log_error
from movement.validators.datasets import ValidBboxesDataset
from movement.validators.files import ValidFile, ValidVIATracksCSV
logger = logging.getLogger(__name__)
[docs]
def from_numpy(
position_array: np.ndarray,
shape_array: np.ndarray,
confidence_array: np.ndarray | None = None,
individual_names: list[str] | None = None,
frame_array: np.ndarray | None = None,
fps: float | None = None,
source_software: str | None = None,
) -> xr.Dataset:
"""Create a ``movement`` bounding boxes dataset from NumPy arrays.
Parameters
----------
position_array : np.ndarray
Array of shape (n_frames, n_individuals, n_space)
containing the tracks of the bounding boxes' centroids.
It will be converted to a :class:`xarray.DataArray` object
named "position".
shape_array : np.ndarray
Array of shape (n_frames, n_individuals, n_space)
containing the shape of the bounding boxes. The shape of a bounding
box is its width (extent along the x-axis of the image) and height
(extent along the y-axis of the image). It will be converted to a
:class:`xarray.DataArray` object named "shape".
confidence_array : np.ndarray, optional
Array of shape (n_frames, n_individuals) containing
the confidence scores of the bounding boxes. If None (default), the
confidence scores are set to an array of NaNs. It will be converted
to a :class:`xarray.DataArray` object named "confidence".
individual_names : list of str, optional
List of individual names for the tracked bounding boxes in the video.
If None (default), bounding boxes are assigned names based on the size
of the ``position_array``. The names will be in the format of
``id_<N>``, where <N> is an integer from 0 to
``position_array.shape[1]-1`` (i.e., "id_0", "id_1"...).
frame_array : np.ndarray, optional
Array of shape (n_frames, 1) containing the frame numbers for which
bounding boxes are defined. If None (default), frame numbers will
be assigned based on the first dimension of the ``position_array``,
starting from 0. If a specific array of frame numbers is provided,
these need to be consecutive integers.
fps : float, optional
The video sampling rate. If None (default), the ``time`` coordinates
of the resulting ``movement`` dataset will be in frame numbers. If
``fps`` is provided, the ``time`` coordinates will be in seconds. If
the ``time`` coordinates are in seconds, they will indicate the
elapsed time from the capture of the first frame (assumed to be frame
0).
source_software : str, optional
Name of the software that generated the data. Defaults to None.
Returns
-------
xarray.Dataset
``movement`` dataset containing the position, shape, and confidence
scores of the tracked bounding boxes, and any associated metadata.
Examples
--------
Create random position data for two bounding boxes, ``id_0`` and ``id_1``,
with the same width (40 pixels) and height (30 pixels). These are tracked
in 2D space for 100 frames, which are numbered from the start frame 1200
to the end frame 1299. The confidence score for all bounding boxes is set
to 0.5.
>>> import numpy as np
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_numpy(
... position_array=np.random.rand(100, 2, 2),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... frame_array=np.arange(1200, 1300).reshape(-1, 1),
... )
Create a dataset with the same data as above, but with the time
coordinates in seconds. We use a video sampling rate of 60 fps. The time
coordinates in the resulting dataset will indicate the elapsed time from
the capture of the 0th frame. So for the frames 1200, 1201, 1203,... 1299
the corresponding time coordinates in seconds will be 20, 20.0167,
20.033,... 21.65 s.
>>> ds = load_bboxes.from_numpy(
... position_array=np.random.rand(100, 2, 2),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... frame_array=np.arange(1200, 1300).reshape(-1, 1),
... fps=60,
... )
Create a dataset with the same data as above, but express the time
coordinate in frames, and assume the first tracked frame is frame 0.
To do this, we simply omit the ``frame_array`` input argument.
>>> ds = load_bboxes.from_numpy(
... position_array=np.random.rand(100, 2, 2),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... )
Create a dataset with the same data as above, but express the time
coordinate in seconds, and assume the first tracked frame is captured
at time = 0 seconds. To do this, we omit the ``frame_array`` input argument
and pass an ``fps`` value.
>>> ds = load_bboxes.from_numpy(
... position_array=np.random.rand(100, 2, 2),
... shape_array=np.ones((100, 2, 2)) * [40, 30],
... confidence_array=np.ones((100, 2)) * 0.5,
... individual_names=["id_0", "id_1"],
... fps=60,
... )
"""
valid_bboxes_data = ValidBboxesDataset(
position_array=position_array,
shape_array=shape_array,
confidence_array=confidence_array,
individual_names=individual_names,
frame_array=frame_array,
fps=fps,
source_software=source_software,
)
return _ds_from_valid_data(valid_bboxes_data)
[docs]
def from_file(
file_path: Path | str,
source_software: Literal["VIA-tracks"],
fps: float | None = None,
use_frame_numbers_from_file: bool = False,
) -> xr.Dataset:
"""Create a ``movement`` bounding boxes dataset from a supported file.
At the moment, we only support VIA-tracks .csv files.
Parameters
----------
file_path : pathlib.Path or str
Path to the file containing the tracked bounding boxes. Currently
only VIA-tracks .csv files are supported.
source_software : "VIA-tracks".
The source software of the file. Currently only files from the
VIA 2.0.12 annotator [1]_ ("VIA-tracks") are supported.
See .
fps : float, optional
The video sampling rate. If None (default), the ``time`` coordinates
of the resulting ``movement`` dataset will be in frame numbers. If
``fps`` is provided, the ``time`` coordinates will be in seconds. If
the ``time`` coordinates are in seconds, they will indicate the
elapsed time from the capture of the first frame (assumed to be frame
0).
use_frame_numbers_from_file : bool, optional
If True, the frame numbers in the resulting dataset are
the same as the ones specified for each tracked bounding box in the
input file. This may be useful if the bounding boxes are tracked for a
subset of frames in a video, but you want to maintain the start of the
full video as the time origin. If False (default), the frame numbers
in the VIA tracks .csv file are instead mapped to a 0-based sequence of
consecutive integers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the position, shape, and confidence
scores of the tracked bounding boxes, and any associated metadata.
See Also
--------
movement.io.load_bboxes.from_via_tracks_file
References
----------
.. [1] https://www.robots.ox.ac.uk/~vgg/software/via/
Examples
--------
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in seconds, and assuming t = 0 seconds corresponds to
the first tracked frame in the file.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_file(
>>> "path/to/file.csv",
>>> source_software="VIA-tracks",
>>> fps=30,
>>> )
"""
if source_software == "VIA-tracks":
return from_via_tracks_file(
file_path,
fps,
use_frame_numbers_from_file=use_frame_numbers_from_file,
)
else:
raise log_error(
ValueError, f"Unsupported source software: {source_software}"
)
[docs]
def from_via_tracks_file(
file_path: Path | str,
fps: float | None = None,
use_frame_numbers_from_file: bool = False,
) -> xr.Dataset:
"""Create a ``movement`` dataset from a VIA tracks .csv file.
Parameters
----------
file_path : pathlib.Path or str
Path to the VIA tracks .csv file with the tracked bounding boxes.
For more information on the VIA tracks .csv file format, see the VIA
tutorial for tracking [1]_.
fps : float, optional
The video sampling rate. If None (default), the ``time`` coordinates
of the resulting ``movement`` dataset will be in frame numbers. If
``fps`` is provided, the ``time`` coordinates will be in seconds. If
the ``time`` coordinates are in seconds, they will indicate the
elapsed time from the capture of the first frame (assumed to be frame
0).
use_frame_numbers_from_file : bool, optional
If True, the frame numbers in the resulting dataset are
the same as the ones in the VIA tracks .csv file. This may be useful if
the bounding boxes are tracked for a subset of frames in a video,
but you want to maintain the start of the full video as the time
origin. If False (default), the frame numbers in the VIA tracks .csv
file are instead mapped to a 0-based sequence of consecutive integers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the position, shape, and confidence
scores of the tracked bounding boxes, and any associated metadata.
Notes
-----
The bounding boxes' IDs specified in the "track" field of the VIA
tracks .csv file are mapped to the "individual_name" column of the
``movement`` dataset. The individual names follow the format ``id_<N>``,
with N being the bounding box ID.
References
----------
.. [1] https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html
Examples
--------
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in frames, and setting the first tracked frame in the
file as frame 0.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... )
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in seconds, and assuming t = 0 seconds corresponds to
the first tracked frame in the file.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... fps=30,
... )
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in frames, and using the same frame numbers as
in the VIA tracks .csv file.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... use_frame_numbers_from_file=True.
... )
Create a dataset from the VIA tracks .csv file at "path/to/file.csv", with
the time coordinates in seconds, and assuming t = 0 seconds corresponds to
the 0th frame in the full video.
>>> from movement.io import load_bboxes
>>> ds = load_bboxes.from_via_tracks_file(
... "path/to/file.csv",
... fps=30,
... use_frame_numbers_from_file=True,
... )
"""
# General file validation
file = ValidFile(
file_path, expected_permission="r", expected_suffix=[".csv"]
)
# Specific VIA-tracks .csv file validation
via_file = ValidVIATracksCSV(file.path)
logger.debug(f"Validated VIA tracks .csv file {via_file.path}.")
# Create an xarray.Dataset from the data
bboxes_arrays = _numpy_arrays_from_via_tracks_file(via_file.path)
ds = from_numpy(
position_array=bboxes_arrays["position_array"],
shape_array=bboxes_arrays["shape_array"],
confidence_array=bboxes_arrays["confidence_array"],
individual_names=[
f"id_{id}" for id in bboxes_arrays["ID_array"].squeeze()
],
frame_array=(
bboxes_arrays["frame_array"]
if use_frame_numbers_from_file
else None
),
fps=fps,
source_software="VIA-tracks",
) # it validates the dataset via ValidBboxesDataset
# Add metadata as attributes
ds.attrs["source_software"] = "VIA-tracks"
ds.attrs["source_file"] = file.path.as_posix()
logger.info(f"Loaded tracks of the bounding boxes from {via_file.path}:")
logger.info(ds)
return ds
def _numpy_arrays_from_via_tracks_file(file_path: Path) -> dict:
"""Extract numpy arrays from the input VIA tracks .csv file.
The extracted numpy arrays are returned in a dictionary with the following
keys:
- position_array (n_frames, n_individuals, n_space):
contains the trajectories of the bounding boxes' centroids.
- shape_array (n_frames, n_individuals, n_space):
contains the shape of the bounding boxes (width and height).
- confidence_array (n_frames, n_individuals):
contains the confidence score of each bounding box.
If no confidence scores are provided, they are set to an array of NaNs.
- ID_array (n_individuals, 1):
contains the integer IDs of the tracked bounding boxes.
- frame_array (n_frames, 1):
contains the frame numbers.
Parameters
----------
file_path : pathlib.Path
Path to the VIA tracks .csv file containing the bounding boxes' tracks.
Returns
-------
dict
The validated bounding boxes' arrays.
"""
# Extract 2D dataframe from input data
# (sort data by ID and frame number, and
# fill empty frame-ID pairs with nans)
df = _df_from_via_tracks_file(file_path)
# Compute indices of the rows where the IDs switch
bool_id_diff_from_prev = df["ID"].ne(df["ID"].shift()) # pandas series
indices_id_switch = (
bool_id_diff_from_prev.loc[lambda x: x].index[1:].to_numpy()
)
# Stack position, shape and confidence arrays along ID axis
map_key_to_columns = {
"position_array": ["x", "y"],
"shape_array": ["w", "h"],
"confidence_array": ["confidence"],
}
array_dict = {}
for key in map_key_to_columns:
list_arrays = np.split(
df[map_key_to_columns[key]].to_numpy(),
indices_id_switch, # indices along axis=0
)
array_dict[key] = np.stack(list_arrays, axis=1).squeeze()
# Add remaining arrays to dict
array_dict["ID_array"] = df["ID"].unique().reshape(-1, 1)
array_dict["frame_array"] = df["frame_number"].unique().reshape(-1, 1)
return array_dict
def _df_from_via_tracks_file(file_path: Path) -> pd.DataFrame:
"""Load VIA tracks .csv file as a dataframe.
Read the VIA tracks .csv file as a pandas dataframe with columns:
- ID: the integer ID of the tracked bounding box.
- frame_number: the frame number of the tracked bounding box.
- x: the x-coordinate of the tracked bounding box centroid.
- y: the y-coordinate of the tracked bounding box centroid.
- w: the width of the tracked bounding box.
- h: the height of the tracked bounding box.
- confidence: the confidence score of the tracked bounding box.
The dataframe is sorted by ID and frame number, and for each ID,
empty frames are filled in with NaNs.
"""
# Read VIA tracks .csv file as a pandas dataframe
df_file = pd.read_csv(file_path, sep=",", header=0)
# Format to a 2D dataframe
df = pd.DataFrame(
{
"ID": _via_attribute_column_to_numpy(
df_file, "region_attributes", ["track"], int
),
"frame_number": _extract_frame_number_from_via_tracks_df(df_file),
"x": _via_attribute_column_to_numpy(
df_file, "region_shape_attributes", ["x"], float
),
"y": _via_attribute_column_to_numpy(
df_file, "region_shape_attributes", ["y"], float
),
"w": _via_attribute_column_to_numpy(
df_file, "region_shape_attributes", ["width"], float
),
"h": _via_attribute_column_to_numpy(
df_file, "region_shape_attributes", ["height"], float
),
"confidence": _extract_confidence_from_via_tracks_df(df_file),
}
)
# Sort dataframe by ID and frame number
df = df.sort_values(by=["ID", "frame_number"]).reset_index(drop=True)
# Fill in empty frames with nans
multi_index = pd.MultiIndex.from_product(
[df["ID"].unique(), df["frame_number"].unique()],
names=["ID", "frame_number"],
) # desired index: all combinations of ID and frame number
# Set index to (ID, frame number), fill in values with nans and
# reset to original index
df = (
df.set_index(["ID", "frame_number"]).reindex(multi_index).reset_index()
)
return df
def _extract_confidence_from_via_tracks_df(df) -> np.ndarray:
"""Extract confidence scores from the VIA tracks input dataframe.
Parameters
----------
df : pd.DataFrame
The VIA tracks input dataframe is the one obtained from
``df = pd.read_csv(file_path, sep=",", header=0)``.
Returns
-------
np.ndarray
A numpy array of size (n_bboxes, ) containing the bounding boxes
confidence scores.
"""
region_attributes_dicts = [
ast.literal_eval(d) for d in df.region_attributes
]
# Check if confidence is defined as a region attribute, else set to NaN
if all(["confidence" in d for d in region_attributes_dicts]):
bbox_confidence = _via_attribute_column_to_numpy(
df, "region_attributes", ["confidence"], float
)
else:
bbox_confidence = np.full((df.shape[0], 1), np.nan).squeeze()
return bbox_confidence
def _extract_frame_number_from_via_tracks_df(df) -> np.ndarray:
"""Extract frame numbers from the VIA tracks input dataframe.
Parameters
----------
df : pd.DataFrame
The VIA tracks input dataframe is the one obtained from
``df = pd.read_csv(file_path, sep=",", header=0)``.
Returns
-------
np.ndarray
A numpy array of size (n_frames, ) containing the frame numbers.
In the VIA tracks .csv file, the frame number is expected to be
defined as a 'file_attribute' , or encoded in the filename as an
integer number led by at least one zero, between "_" and ".", followed
by the file extension.
"""
# Extract frame number from file_attributes if exists
file_attributes_dicts = [ast.literal_eval(d) for d in df.file_attributes]
if all(["frame" in d for d in file_attributes_dicts]):
frame_array = _via_attribute_column_to_numpy(
df,
via_column_name="file_attributes",
list_keys=["frame"],
cast_fn=int,
)
# Else extract from filename
else:
pattern = r"_(0\d*)\.\w+$"
list_frame_numbers = [
int(re.search(pattern, f).group(1)) # type: ignore
if re.search(pattern, f)
else np.nan
for f in df["filename"]
]
frame_array = np.array(list_frame_numbers)
return frame_array
def _via_attribute_column_to_numpy(
df: pd.DataFrame,
via_column_name: str,
list_keys: list[str],
cast_fn: Callable = float,
) -> np.ndarray:
"""Convert values from VIA attribute-type column to a numpy array.
In the VIA tracks .csv file, the attribute-type columns are the columns
whose name includes the word ``attributes`` (i.e. ``file_attributes``,
``region_shape_attributes`` or ``region_attributes``). These columns hold
dictionary data.
Parameters
----------
df : pd.DataFrame
The pandas DataFrame containing the data from the VIA tracks .csv file.
This is the dataframe obtained from running
``df = pd.read_csv(file_path, sep=",", header=0)``.
via_column_name : str
The name of a column in the VIA tracks .csv file whose values are
literal dictionaries (i.e. ``file_attributes``,
``region_shape_attributes`` or ``region_attributes``).
list_keys : list[str]
The list of keys whose values we want to extract from the literal
dictionaries in the ``via_column_name`` column.
cast_fn : type, optional
The type function to cast the values to. By default ``float``.
Returns
-------
np.ndarray
A numpy array holding the extracted values. If ``len(list_keys) > 1``
the array is two-dimensional with shape ``(N, len(list_keys))``, where
``N`` is the number of rows in the input dataframe ``df``. If
``len(list_keys) == 1``, the resulting array will be one-dimensional,
with shape (N, ). Note that the computed array is squeezed before
returning.
"""
list_bbox_attr = []
for _, row in df.iterrows():
row_dict_data = ast.literal_eval(row[via_column_name])
list_bbox_attr.append(
tuple(cast_fn(row_dict_data[reg]) for reg in list_keys)
)
bbox_attr_array = np.array(list_bbox_attr)
return bbox_attr_array.squeeze()
def _ds_from_valid_data(data: ValidBboxesDataset) -> xr.Dataset:
"""Convert a validated bounding boxes dataset to an xarray Dataset.
Parameters
----------
data : movement.validators.datasets.ValidBboxesDataset
The validated bounding boxes dataset object.
Returns
-------
bounding boxes dataset containing the boxes tracks,
boxes shapes, confidence scores and associated metadata.
"""
# Create the time coordinate
time_coords = data.frame_array.squeeze() # type: ignore
time_unit = "frames"
# if fps is provided:
# time_coords is expressed in seconds, with the time origin
# set as frame 0 == time 0 seconds
if data.fps:
# Compute elapsed time from frame 0.
# Ignoring type error because `data.frame_array` is not None after
# ValidBboxesDataset.__attrs_post_init__() # type: ignore
time_coords = np.array(
[frame / data.fps for frame in data.frame_array.squeeze()] # type: ignore
)
time_unit = "seconds"
# Convert data to an xarray.Dataset
# with dimensions ('time', 'individuals', 'space')
DIM_NAMES = MovementDataset.dim_names["bboxes"]
n_space = data.position_array.shape[-1]
return xr.Dataset(
data_vars={
"position": xr.DataArray(data.position_array, dims=DIM_NAMES),
"shape": xr.DataArray(data.shape_array, dims=DIM_NAMES),
"confidence": xr.DataArray(
data.confidence_array, dims=DIM_NAMES[:-1]
),
},
coords={
DIM_NAMES[0]: time_coords,
DIM_NAMES[1]: data.individual_names,
DIM_NAMES[2]: ["x", "y", "z"][:n_space],
},
attrs={
"fps": data.fps,
"time_unit": time_unit,
"source_software": data.source_software,
"source_file": None,
"ds_type": "bboxes",
},
)