"""Load pose tracking data from various frameworks into ``movement``."""
from pathlib import Path
from typing import Literal
import h5py
import numpy as np
import pandas as pd
import pynwb
import xarray as xr
from sleap_io.io.slp import read_labels
from sleap_io.model.labels import Labels
from movement.utils.logging import logger
from movement.validators.datasets import ValidPosesDataset
from movement.validators.files import (
ValidAniposeCSV,
ValidDeepLabCutCSV,
ValidFile,
ValidHDF5,
ValidNWBFile,
)
[docs]
def from_numpy(
position_array: np.ndarray,
confidence_array: np.ndarray | None = None,
individual_names: list[str] | None = None,
keypoint_names: list[str] | None = None,
fps: float | None = None,
source_software: str | None = None,
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from NumPy arrays.
Parameters
----------
position_array : np.ndarray
Array of shape (n_frames, n_space, n_keypoints, n_individuals)
containing the poses. It will be converted to a
:class:`xarray.DataArray` object named "position".
confidence_array : np.ndarray, optional
Array of shape (n_frames, n_keypoints, n_individuals) containing
the point-wise confidence scores. It will be converted to a
:class:`xarray.DataArray` object named "confidence".
If None (default), the scores will be set to an array of NaNs.
individual_names : list of str, optional
List of unique names for the individuals in the video. If None
(default), the individuals will be named "id_0", "id_1", etc.
keypoint_names : list of str, optional
List of unique names for the keypoints in the skeleton. If None
(default), the keypoints will be named "keypoint_0", "keypoint_1",
etc.
fps : float, optional
Frames per second of the video. Defaults to None, in which case
the time coordinates will be in frame numbers.
source_software : str, optional
Name of the pose estimation software from which the data originate.
Defaults to None.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
Examples
--------
Create random position data for two individuals, ``Alice`` and ``Bob``,
with three keypoints each: ``snout``, ``centre``, and ``tail_base``.
These are tracked in 2D space over 100 frames, at 30 fps.
The confidence scores are set to 1 for all points.
>>> import numpy as np
>>> from movement.io import load_poses
>>> rng = np.random.default_rng(seed=42)
>>> ds = load_poses.from_numpy(
... position_array=rng.random((100, 2, 3, 2)),
... confidence_array=np.ones((100, 3, 2)),
... individual_names=["Alice", "Bob"],
... keypoint_names=["snout", "centre", "tail_base"],
... fps=30,
... )
"""
valid_data = ValidPosesDataset(
position_array=position_array,
confidence_array=confidence_array,
individual_names=individual_names,
keypoint_names=keypoint_names,
fps=fps,
source_software=source_software,
)
return _ds_from_valid_data(valid_data)
[docs]
def from_file(
file_path: Path | str,
source_software: Literal[
"DeepLabCut",
"SLEAP",
"LightningPose",
"Anipose",
"NWB",
],
fps: float | None = None,
**kwargs,
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from any supported file.
Parameters
----------
file_path : pathlib.Path or str
Path to the file containing predicted poses. The file format must
be among those supported by the ``from_dlc_file()``,
``from_slp_file()`` or ``from_lp_file()`` functions. One of these
these functions will be called internally, based on
the value of ``source_software``.
source_software : {"DeepLabCut", "SLEAP", "LightningPose", "Anipose", \
"NWB"}
The source software of the file.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
This argument is ignored when ``source_software`` is "NWB", as the
frame rate will be directly read or estimated from metadata in
the NWB file.
**kwargs : dict, optional
Additional keyword arguments to pass to the software-specific
loading functions that are listed under "See Also".
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
See Also
--------
movement.io.load_poses.from_dlc_file
movement.io.load_poses.from_sleap_file
movement.io.load_poses.from_lp_file
movement.io.load_poses.from_anipose_file
movement.io.load_poses.from_nwb_file
Examples
--------
>>> from movement.io import load_poses
>>> ds = load_poses.from_file(
... "path/to/file.h5", source_software="DeepLabCut", fps=30
... )
"""
if source_software == "DeepLabCut":
return from_dlc_file(file_path, fps)
elif source_software == "SLEAP":
return from_sleap_file(file_path, fps)
elif source_software == "LightningPose":
return from_lp_file(file_path, fps)
elif source_software == "Anipose":
return from_anipose_file(file_path, fps, **kwargs)
elif source_software == "NWB":
if fps is not None:
logger.warning(
"The fps argument is ignored when loading from an NWB file. "
"The frame rate will be directly read or estimated from "
"metadata in the file."
)
return from_nwb_file(file_path, **kwargs)
else:
raise logger.error(
ValueError(f"Unsupported source software: {source_software}")
)
[docs]
def from_dlc_style_df(
df: pd.DataFrame,
fps: float | None = None,
source_software: Literal["DeepLabCut", "LightningPose"] = "DeepLabCut",
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a DeepLabCut-style DataFrame.
Parameters
----------
df : pandas.DataFrame
DataFrame containing the pose tracks and confidence scores. Must
be formatted as in DeepLabCut output files (see Notes).
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
source_software : str, optional
Name of the pose estimation software from which the data originate.
Defaults to "DeepLabCut", but it can also be "LightningPose"
(because they use the same DataFrame format).
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
Notes
-----
The DataFrame must have a multi-index column with the following levels:
"scorer", ("individuals"), "bodyparts", "coords".
The "individuals" level may be omitted if there is only one individual
in the video.
The "coords" level contains either:
- the spatial coordinates "x", "y", and "likelihood"
(point-wise confidence scores), or
- the spatial coordinates "x", "y", and "z" (3D poses estimated by
`triangulating 2D poses from multiple DeepLabCut output files\
<https://deeplabcut.github.io/DeepLabCut/docs/Overviewof3D.html>`__).
The row index corresponds to the frame number.
See Also
--------
movement.io.load_poses.from_dlc_file
"""
# Read names of individuals and keypoints from the DataFrame
if "individuals" in df.columns.names:
individual_names = (
df.columns.get_level_values("individuals").unique().to_list()
)
else:
individual_names = ["individual_0"]
keypoint_names = (
df.columns.get_level_values("bodyparts").unique().to_list()
)
# Extract position (and confidence if present)
coord_names = df.columns.get_level_values("coords").unique().to_list()
n_coords = len(coord_names)
tracks = (
df.to_numpy()
.reshape((-1, len(individual_names), len(keypoint_names), n_coords))
.transpose(0, 3, 2, 1)
)
if "likelihood" in coord_names: # Coords: ['x', 'y', 'likelihood']
likelihood_index = coord_names.index("likelihood")
confidence_array = tracks[:, likelihood_index, :, :]
pos_idx = [j for j in range(n_coords) if j != likelihood_index]
position_array = tracks[:, pos_idx, :, :]
else: # Coords: ['x', 'y', 'z']
position_array = tracks
confidence_array = None
return from_numpy(
position_array=position_array,
confidence_array=confidence_array,
individual_names=individual_names,
keypoint_names=keypoint_names,
fps=fps,
source_software=source_software,
)
[docs]
def from_sleap_file(
file_path: Path | str, fps: float | None = None
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a SLEAP file.
Parameters
----------
file_path : pathlib.Path or str
Path to the file containing the SLEAP predictions in .h5
(analysis) format. Alternatively, a .slp (labels) file can
also be supplied (but this feature is experimental, see Notes).
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
Notes
-----
The SLEAP predictions are normally saved in .slp files, e.g.
"v1.predictions.slp". An analysis file, suffixed with ".h5" can be exported
from the .slp file, using either the command line tool `sleap-convert`
(with the "--format analysis" option enabled) or the SLEAP GUI (Choose
"Export Analysis HDF5…" from the "File" menu) [1]_. This is the
preferred format for loading pose tracks from SLEAP into *movement*.
You can also directly load the .slp file. However, if the file contains
multiple videos, only the pose tracks from the first video will be loaded.
If the file contains a mix of user-labelled and predicted instances, user
labels are prioritised over predicted instances to mirror SLEAP's approach
when exporting .h5 analysis files [2]_.
*movement* expects the tracks to be assigned and proofread before loading
them, meaning each track is interpreted as a single individual. If
no tracks are found in the file, *movement* assumes that this is a
single-individual track, and will assign a default individual name.
If multiple instances without tracks are present in a frame, the last
instance is selected [2]_.
Follow the SLEAP guide for tracking and proofreading [3]_.
References
----------
.. [1] https://docs.sleap.ai/latest/learnings/export-analysis/
.. [2] https://github.com/talmolab/sleap/blob/v1.3.3/sleap/info/write_tracking_h5.py#L59
.. [3] https://docs.sleap.ai/latest/guides/tracking-and-proofreading/
Examples
--------
>>> from movement.io import load_poses
>>> ds = load_poses.from_sleap_file("path/to/file.analysis.h5", fps=30)
"""
file = ValidFile(
file_path,
expected_permission="r",
expected_suffix=[".h5", ".slp"],
)
# Load and validate data
if file.path.suffix == ".h5":
ds = _ds_from_sleap_analysis_file(file.path, fps=fps)
else: # file.path.suffix == ".slp"
ds = _ds_from_sleap_labels_file(file.path, fps=fps)
# Add metadata as attrs
ds.attrs["source_file"] = file.path.as_posix()
logger.info(f"Loaded pose tracks from {file.path}:\n{ds}")
return ds
[docs]
def from_lp_file(
file_path: Path | str, fps: float | None = None
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a LightningPose file.
Parameters
----------
file_path : pathlib.Path or str
Path to the file containing the predicted poses, in .csv format.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
Examples
--------
>>> from movement.io import load_poses
>>> ds = load_poses.from_lp_file("path/to/file.csv", fps=30)
"""
return _ds_from_lp_or_dlc_file(
file_path=file_path, source_software="LightningPose", fps=fps
)
[docs]
def from_dlc_file(
file_path: Path | str, fps: float | None = None
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a DeepLabCut file.
Parameters
----------
file_path : pathlib.Path or str
Path to the file containing the predicted poses, either in .h5
or .csv format.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
See Also
--------
movement.io.load_poses.from_dlc_style_df
Examples
--------
>>> from movement.io import load_poses
>>> ds = load_poses.from_dlc_file("path/to/file.h5", fps=30)
Notes
-----
In ``movement``, pose data can only be loaded if all individuals have
the same set of keypoints (i.e., the same labeled body parts).
While DeepLabCut supports assigning keypoints that are not shared across
individuals (see the `DeepLabCut documentation for multi-animal projects
<https://deeplabcut.github.io/DeepLabCut/docs/maDLC_UserGuide.html#b-configure-the-project>`_),
this feature is not currently supported in ``movement``.
"""
return _ds_from_lp_or_dlc_file(
file_path=file_path, source_software="DeepLabCut", fps=fps
)
[docs]
def from_multiview_files(
file_path_dict: dict[str, Path | str],
source_software: Literal["DeepLabCut", "SLEAP", "LightningPose"],
fps: float | None = None,
) -> xr.Dataset:
"""Load and merge pose tracking data from multiple views (cameras).
Parameters
----------
file_path_dict : dict[str, Union[Path, str]]
A dict whose keys are the view names and values are the paths to load.
source_software : {'LightningPose', 'SLEAP', 'DeepLabCut'}
The source software of the file.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata, with an additional ``views`` dimension.
"""
views_list = list(file_path_dict.keys())
new_coord_views = xr.DataArray(views_list, dims="view")
dataset_list = [
from_file(f, source_software=source_software, fps=fps)
for f in file_path_dict.values()
]
return xr.concat(dataset_list, dim=new_coord_views)
def _ds_from_lp_or_dlc_file(
file_path: Path | str,
source_software: Literal["LightningPose", "DeepLabCut"],
fps: float | None = None,
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a LightningPose or DLC file.
Parameters
----------
file_path : pathlib.Path or str
Path to the file containing the predicted poses, either in .h5
or .csv format.
source_software : {'LightningPose', 'DeepLabCut'}
The source software of the file.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame numbers.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
"""
expected_suffix = [".csv"]
if source_software == "DeepLabCut":
expected_suffix.append(".h5")
file = ValidFile(
file_path, expected_permission="r", expected_suffix=expected_suffix
)
# Load the DeepLabCut poses into a DataFrame
df = (
_df_from_dlc_csv(file.path)
if file.path.suffix == ".csv"
else _df_from_dlc_h5(file.path)
)
logger.debug(f"Loaded poses from {file.path} into a DataFrame.")
# Convert the DataFrame to an xarray dataset
ds = from_dlc_style_df(df=df, fps=fps, source_software=source_software)
# Add metadata as attrs
ds.attrs["source_file"] = file.path.as_posix()
logger.info(f"Loaded pose tracks from {file.path}:\n{ds}")
return ds
def _ds_from_sleap_analysis_file(
file_path: Path, fps: float | None
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a SLEAP analysis (.h5) file.
Parameters
----------
file_path : pathlib.Path
Path to the SLEAP analysis file containing predicted pose tracks.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame units.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
"""
file = ValidHDF5(file_path, expected_datasets=["tracks"])
with h5py.File(file.path, "r") as f:
# Transpose to shape: (n_frames, n_space, n_keypoints, n_tracks)
tracks = f["tracks"][:].transpose(3, 1, 2, 0)
# Create an array of NaNs for the confidence scores
scores = np.full(tracks.shape[:1] + tracks.shape[2:], np.nan)
individual_names = [n.decode() for n in f["track_names"][:]] or None
if individual_names is None:
logger.warning(
f"Could not find SLEAP Track in {file.path}. "
"Assuming single-individual dataset and assigning "
"default individual name."
)
# If present, read the point-wise scores,
# and transpose to shape: (n_frames, n_keypoints, n_tracks)
if "point_scores" in f:
scores = f["point_scores"][:].T
return from_numpy(
position_array=tracks.astype(np.float32),
confidence_array=scores.astype(np.float32),
individual_names=individual_names,
keypoint_names=[n.decode() for n in f["node_names"][:]],
fps=fps,
source_software="SLEAP",
)
def _ds_from_sleap_labels_file(
file_path: Path, fps: float | None
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from a SLEAP labels (.slp) file.
Parameters
----------
file_path : pathlib.Path
Path to the SLEAP labels file containing predicted pose tracks.
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame units.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
"""
file = ValidHDF5(file_path, expected_datasets=["pred_points", "metadata"])
labels = read_labels(file.path.as_posix())
tracks_with_scores = _sleap_labels_to_numpy(labels)
individual_names = [track.name for track in labels.tracks] or None
if individual_names is None:
logger.warning(
f"Could not find SLEAP Track in {file.path}. "
"Assuming single-individual dataset and assigning "
"default individual name."
)
return from_numpy(
position_array=tracks_with_scores[:, :-1, :, :],
confidence_array=tracks_with_scores[:, -1, :, :],
individual_names=individual_names,
keypoint_names=[kp.name for kp in labels.skeletons[0].nodes],
fps=fps,
source_software="SLEAP",
)
def _sleap_labels_to_numpy(labels: Labels) -> np.ndarray:
"""Convert a SLEAP ``Labels`` object to a NumPy array.
The output array contains pose tracks and point-wise confidence scores.
Parameters
----------
labels : Labels
A SLEAP `Labels` object.
Returns
-------
numpy.ndarray
A NumPy array containing pose tracks and confidence scores,
with shape ``(n_frames, 3, n_nodes, n_tracks)``.
Notes
-----
This function only considers SLEAP instances in the first
video of the SLEAP `Labels` object. User-labelled instances are
prioritised over predicted instances, mirroring SLEAP's approach
when exporting .h5 analysis files [1]_.
This function is adapted from `Labels.numpy()` from the
`sleap_io` package [2]_.
References
----------
.. [1] https://github.com/talmolab/sleap/blob/v1.3.3/sleap/info/write_tracking_h5.py#L59
.. [2] https://github.com/talmolab/sleap-io
"""
# Select frames from the first video only
lfs = [lf for lf in labels.labeled_frames if lf.video == labels.videos[0]]
# Figure out frame index range
frame_idxs = [lf.frame_idx for lf in lfs]
first_frame = min(0, min(frame_idxs))
last_frame = max(0, max(frame_idxs))
n_tracks = len(labels.tracks) or 1 # If no tracks, assume 1 individual
individuals = labels.tracks or [None]
skeleton = labels.skeletons[-1] # Assume project only uses last skeleton
n_nodes = len(skeleton.nodes)
n_frames = int(last_frame - first_frame + 1)
tracks = np.full((n_frames, 3, n_nodes, n_tracks), np.nan, dtype="float32")
for lf in lfs:
i = int(lf.frame_idx - first_frame)
user_instances = lf.user_instances
predicted_instances = lf.predicted_instances
for j, ind in enumerate(individuals):
user_track_instances = [
inst for inst in user_instances if inst.track == ind
]
predicted_track_instances = [
inst for inst in predicted_instances if inst.track == ind
]
# Use user-labelled instance if available
if user_track_instances:
inst = user_track_instances[-1]
tracks[i, ..., j] = np.hstack(
(inst.numpy(), np.full((n_nodes, 1), np.nan))
).T
elif predicted_track_instances:
inst = predicted_track_instances[-1]
tracks[i, ..., j] = inst.numpy(scores=True).T
return tracks
def _df_from_dlc_csv(file_path: Path) -> pd.DataFrame:
"""Create a DeepLabCut-style DataFrame from a .csv file.
If poses are loaded from a DeepLabCut-style .csv file, the DataFrame
lacks the multi-index columns that are present in the .h5 file. This
function parses the .csv file to DataFrame with multi-index columns,
i.e. the same format as in the .h5 file.
Parameters
----------
file_path : pathlib.Path
Path to the DeepLabCut-style .csv file containing pose tracks.
Returns
-------
pandas.DataFrame
DeepLabCut-style DataFrame with multi-index columns.
"""
file = ValidDeepLabCutCSV(file_path)
possible_level_names = ["scorer", "individuals", "bodyparts", "coords"]
with open(file.path) as f:
# if line starts with a possible level name, split it into a list
# of strings, and add it to the list of header lines
header_lines = [
line.strip().split(",")
for line in f.readlines()
if line.split(",")[0] in possible_level_names
]
# Form multi-index column names from the header lines
level_names = [line[0] for line in header_lines]
column_tuples = list(
zip(*[line[1:] for line in header_lines], strict=False)
)
columns = pd.MultiIndex.from_tuples(column_tuples, names=level_names)
# Import the DeepLabCut poses as a DataFrame
df = pd.read_csv(
file.path,
skiprows=len(header_lines),
index_col=0,
names=np.array(columns),
)
df.columns.rename(level_names, inplace=True)
return df
def _df_from_dlc_h5(file_path: Path) -> pd.DataFrame:
"""Create a DeepLabCut-style DataFrame from a .h5 file.
Parameters
----------
file_path : pathlib.Path
Path to the DeepLabCut-style HDF5 file containing pose tracks.
Returns
-------
pandas.DataFrame
DeepLabCut-style DataFrame with multi-index columns.
"""
file = ValidHDF5(file_path, expected_datasets=["df_with_missing"])
# pd.read_hdf does not always return a DataFrame but we assume it does
# in this case (since we know what's in the "df_with_missing" dataset)
df = pd.DataFrame(pd.read_hdf(file.path, key="df_with_missing"))
return df
def _ds_from_valid_data(data: ValidPosesDataset) -> xr.Dataset:
"""Create a ``movement`` poses dataset from validated pose tracking data.
Parameters
----------
data : movement.io.tracks_validators.ValidPosesDataset
The validated data object.
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
"""
n_frames = data.position_array.shape[0]
n_space = data.position_array.shape[1]
dataset_attrs: dict[str, str | float | None] = {
"source_software": data.source_software,
"ds_type": "poses",
}
# Create the time coordinate, depending on the value of fps
if data.fps is not None:
time_coords = np.arange(n_frames, dtype=float) / data.fps
time_unit = "seconds"
dataset_attrs["fps"] = data.fps
else:
time_coords = np.arange(n_frames, dtype=int)
time_unit = "frames"
dataset_attrs["time_unit"] = time_unit
DIM_NAMES = ValidPosesDataset.DIM_NAMES
# Convert data to an xarray.Dataset
return xr.Dataset(
data_vars={
"position": xr.DataArray(data.position_array, dims=DIM_NAMES),
"confidence": xr.DataArray(
data.confidence_array, dims=DIM_NAMES[:1] + DIM_NAMES[2:]
),
},
coords={
DIM_NAMES[0]: time_coords,
DIM_NAMES[1]: ["x", "y", "z"][:n_space],
DIM_NAMES[2]: data.keypoint_names,
DIM_NAMES[3]: data.individual_names,
},
attrs=dataset_attrs,
)
[docs]
def from_anipose_style_df(
df: pd.DataFrame,
fps: float | None = None,
individual_name: str = "id_0",
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from an Anipose 3D dataframe.
Parameters
----------
df : pd.DataFrame
Anipose triangulation dataframe
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame units.
individual_name : str, optional
Name of the individual, by default "id_0"
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
Notes
-----
Reshape dataframe with columns keypoint1_x, keypoint1_y, keypoint1_z,
keypoint1_score,keypoint2_x, keypoint2_y, keypoint2_z,
keypoint2_score...to array of positions with dimensions
time, space, keypoints, individuals, and array of confidence (from scores)
with dimensions time, keypoints, individuals.
"""
keypoint_names = sorted(
list(
set(
[
col.rsplit("_", 1)[0]
for col in df.columns
if any(col.endswith(f"_{s}") for s in ["x", "y", "z"])
]
)
)
)
n_frames = len(df)
n_keypoints = len(keypoint_names)
# Initialize arrays and fill
position_array = np.zeros(
(n_frames, 3, n_keypoints, 1)
) # 1 for single individual
confidence_array = np.zeros((n_frames, n_keypoints, 1))
for i, kp in enumerate(keypoint_names):
for j, coord in enumerate(["x", "y", "z"]):
position_array[:, j, i, 0] = df[f"{kp}_{coord}"]
confidence_array[:, i, 0] = df[f"{kp}_score"]
individual_names = [individual_name]
return from_numpy(
position_array=position_array,
confidence_array=confidence_array,
individual_names=individual_names,
keypoint_names=keypoint_names,
source_software="Anipose",
fps=fps,
)
[docs]
def from_anipose_file(
file_path: Path | str,
fps: float | None = None,
individual_name: str = "id_0",
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from an Anipose 3D .csv file.
Parameters
----------
file_path : pathlib.Path
Path to the Anipose triangulation .csv file
fps : float, optional
The number of frames per second in the video. If None (default),
the ``time`` coordinates will be in frame units.
individual_name : str, optional
Name of the individual, by default "id_0"
Returns
-------
xarray.Dataset
``movement`` dataset containing the pose tracks, confidence scores,
and associated metadata.
Notes
-----
We currently do not load all information, only x, y, z, and score
(confidence) for each keypoint. Future versions will load n of cameras
and error.
"""
file = ValidFile(
file_path,
expected_permission="r",
expected_suffix=[".csv"],
)
anipose_file = ValidAniposeCSV(file.path)
anipose_df = pd.read_csv(anipose_file.path)
return from_anipose_style_df(
anipose_df, fps=fps, individual_name=individual_name
)
[docs]
def from_nwb_file(
file: str | Path | pynwb.file.NWBFile,
processing_module_key: str = "behavior",
pose_estimation_key: str = "PoseEstimation",
) -> xr.Dataset:
"""Create a ``movement`` poses dataset from an NWB file.
The input can be a path to an NWB file on disk or a
:class:`pynwb.file.NWBFile` object.
The data will be extracted from the NWB file's
:class:`pynwb.base.ProcessingModule`
(specified by ``processing_module_key``) that contains the
``ndx_pose.PoseEstimation`` object (specified by ``pose_estimation_key``)
formatted according to the ``ndx-pose`` NWB extension [1]_.
Parameters
----------
file : str | Path | pynwb.file.NWBFile
Path to the NWB file on disk (ending in ".nwb"),
or an NWBFile object.
processing_module_key : str, optional
Name of the :class:`pynwb.base.ProcessingModule` in the NWB file that
contains the pose estimation data. Default is "behavior".
pose_estimation_key: str, optional
Name of the ``ndx_pose.PoseEstimation`` object in the processing
module (specified by ``processing_module_key``).
Default is "PoseEstimation".
Returns
-------
xarray.Dataset
A single-individual ``movement`` dataset containing the pose tracks,
confidence scores, and associated metadata.
References
----------
.. [1] https://github.com/rly/ndx-pose
Examples
--------
Open an NWB file and load pose tracks from the
:class:`pynwb.file.NWBFile` object:
>>> import pynwb
>>> import xarray as xr
>>> from movement.io import load_poses
>>> with pynwb.NWBHDF5IO("path/to/file.nwb", mode="r") as io:
... nwb_file = io.read()
... ds = load_poses.from_nwb_file(nwb_file)
Or, directly load pose tracks from an NWB file on disk:
>>> ds = load_poses.from_nwb_file("path/to/file.nwb")
Load two single-individual datasets from two NWB files and merge them
into a multi-individual dataset:
>>> ds_singles = [
... load_poses.from_nwb_file(f) for f in ["id1.nwb", "id2.nwb"]
... ]
>>> ds_multi = xr.merge(ds_singles)
"""
file = ValidNWBFile(file).file
if isinstance(file, Path):
with pynwb.NWBHDF5IO(file, mode="r") as io:
nwbfile_object = io.read()
ds = _ds_from_nwb_object(
nwbfile_object, processing_module_key, pose_estimation_key
)
ds.attrs["source_file"] = file
else: # file is an NWBFile object
ds = _ds_from_nwb_object(
file, processing_module_key, pose_estimation_key
)
return ds
def _ds_from_nwb_object(
nwb_file: pynwb.file.NWBFile,
processing_module_key: str = "behavior",
pose_estimation_key: str = "PoseEstimation",
) -> xr.Dataset:
"""Extract a ``movement`` poses dataset from an NWBFile object.
Parameters
----------
nwb_file : pynwb.file.NWBFile
An NWBFile object.
processing_module_key : str, optional
Name of the :class:`pynwb.base.ProcessingModule` in the NWB file that
contains the pose estimation data. Default is "behavior".
pose_estimation_key: str, optional
Name of the ``ndx_pose.PoseEstimation`` object in the processing
module (specified by ``processing_module_key``).
Default is "PoseEstimation".
Returns
-------
xarray.Dataset
A single-individual ``movement`` poses dataset
"""
pose_estimation = nwb_file.processing[processing_module_key][
pose_estimation_key
]
source_software = pose_estimation.source_software
pose_estimation_series = pose_estimation.pose_estimation_series
single_keypoint_datasets = []
for keypoint, pes in pose_estimation_series.items():
# Extract position and confidence data for each keypoint
position_data = np.asarray(pes.data) # shape: (n_frames, n_space)
confidence_data = ( # shape: (n_frames,)
np.asarray(pes.confidence)
if getattr(pes, "confidence", None) is not None
else np.full(position_data.shape[0], np.nan)
)
# Compute fps from time differences between timestamps
# if rate is not available
fps = pes.rate or float(np.nanmedian(1 / np.diff(pes.timestamps)))
single_keypoint_datasets.append(
# create movement dataset with 1 keypoint and 1 individual
from_numpy(
position_data[:, :, np.newaxis, np.newaxis],
confidence_data[:, np.newaxis, np.newaxis],
individual_names=[nwb_file.identifier],
keypoint_names=[keypoint],
fps=round(fps, 6),
source_software=source_software,
)
)
return xr.merge(single_keypoint_datasets)