Source code for sportslabkit.camera.camera

"""Create a camera object that can be used to read frames from a video file."""

from __future__ import annotations

from collections.abc import Generator, Mapping, Sequence
from xml.etree import ElementTree

import cv2 as cv
import numpy as np
from numpy.typing import ArrayLike, NDArray

from sportslabkit.camera.calibrate import find_intrinsic_camera_parameters
from sportslabkit.camera.videoreader import VideoReader
from sportslabkit.types.types import PathLike
from sportslabkit.utils import logger


[docs]class Camera(VideoReader):
    def __init__(
        self,
        video_path: PathLike,
        threaded: bool = False,
        queue_size: int = 10,
        keypoint_xml: str | None = None,
        x_range: Sequence[float] | None = (0, 105),
        y_range: Sequence[float] | None = (0, 68),
        camera_matrix: ArrayLike | None = None,
        camera_matrix_path: str | None = None,
        distortion_coefficients: str | None = None,
        distortion_coefficients_path: str | None = None,
        calibration_video_path: str | None = None,
        calibration_method: str = "zhang",
        label: str = "",
        verbose: int = 0,
    ):
        """Class for handling camera calibration and undistortion.

        Args:
            video_path (str): path to video file.
            threaded (bool, optional): whether to use a threaded video reader. Defaults to False.
            queue_size (int, optional): size of queue for threaded video reader. Defaults to 10.
            keypoint_xml (str): path to file containing a mapping from pitch coordinates to video.
            x_range (Sequence[float]): pitch range to consider in x direction.
            y_range (Sequence[float]): pitch range to consider in y direction.
            camera_matrix (Optional[Union[str, np.ndarray]]): numpy array or path to file containing camera matrix.
            distortion_coefficients (Optional[Union[str, np.ndarray]]): numpy array or path to file containing distortion coefficients.
            calibration_video_path (Optional[str]): path to video file with checkerboard to use for calibration.
            label (str, optional): label for camera. Defaults to "".
            verbose (int, optional): verbosity level. Defaults to 0.
        Attributes:
            camera_matrix (np.ndarray): numpy array containing camera matrix.
            distortion_coefficients (np.ndarray): numpy array containing distortion coefficients.
            keypoint_map (Mapping): mapping from pitch coordinates to video.
            H (np.ndarray): homography matrix from image to pitch.
            w (int): width of video.
            h (int): height of video.

        """
        if threaded:
            logger.warning("Threaded video reader is buggy. Use at your own risk.")
        super().__init__(video_path, threaded, queue_size)
        self.label = label

        self.video_path = str(video_path)
        self.calibration_method = calibration_method

        self.camera_matrix = camera_matrix
        self.distortion_coefficients = distortion_coefficients
        self.camera_matrix_path = camera_matrix_path
        self.distortion_coefficients_path = distortion_coefficients_path
        self.calibration_video_path = calibration_video_path
        self.load_calibration_params()

        self.x_range = x_range
        self.y_range = y_range

        # Remove leading singleton dimension when returning single frames. Defaults to True.
        self.remove_leading_singleton = True

        if keypoint_xml is not None:
            source_keypoints, target_keypoints = read_pitch_keypoints(keypoint_xml, "video")
            self.source_keypoints = source_keypoints
            self.target_keypoints = target_keypoints

            ## TODO: add option to not undistort points maybe?
            source_keypoints = self.undistort_points(source_keypoints).squeeze()
            proj_error = np.linalg.norm(self.video2pitch(source_keypoints) - target_keypoints, axis=-1).mean()
            logger.debug(f"Camera `{self.label}`: projection error = {proj_error:.2f}m")
        else:
            self.source_keypoints = None
            self.target_keypoints = None

[docs]    def load_calibration_params(self):
        # self.mapx, self.mapy = find_intrinsic_camera_parameters(calibration_video_path, return_mappings=True)
        calibration_video_path = self.calibration_video_path

        if self.camera_matrix_path:
            np.load(self.camera_matrix_path)
        if self.distortion_coefficients_path:
            np.load(self.distortion_coefficients_path)

        if self.camera_matrix is None or self.distortion_coefficients is None:
            if calibration_video_path is not None:
                (
                    self.camera_matrix,
                    self.distortion_coefficients,
                    self.mapx,
                    self.mapy,
                ) = find_intrinsic_camera_parameters(calibration_video_path)

                self.camera_matrix_path = calibration_video_path + ".camera_matrix.npy"
                self.distortion_coefficients_path = calibration_video_path + ".distortion_coefficients.npy"

                # save this somewhere else
                # np.save(self.camera_matrix_path, self.camera_matrix)
[docs]                # np.save(self.distortion_coefficients_path, self.distortion_coefficients)
            else:
                self.camera_matrix = np.eye(3)
                self.distortion_coefficients = np.zeros(4)
                dim = (self.frame_width, self.frame_height)
                newcameramtx, _ = cv.getOptimalNewCameraMatrix(
                    self.camera_matrix, self.distortion_coefficients, dim, 1, dim
                )
                self.mapx, self.mapy = cv.initUndistortRectifyMap(
                    self.camera_matrix,
                    self.distortion_coefficients,
                    None,
[docs]                    newcameramtx,
                    dim,
                    5,
                )

[docs]    def get_frame(self, frame_idx: int) -> np.ndarray:
        """Get frame from video.

        Args:
            frame (int): frame

        Returns:
            np.ndarray: frame
        """
        return self[frame_idx]

[docs]    def iter_frames(self, calibrate: bool = False, crop: bool = False) -> Generator[NDArray, None, None]:
        """Iterate over frames of video.

        Yields:
            NDArray: frame of video.
        """
        return self

[docs]    def batch_frames(
        self, batch_size: int = 32, calibrate: bool = False, crop: bool = False
    ) -> Generator[NDArray, None, None]:
        """Iterate over frames of video.

        Yields:
            NDArray: frame of video.
        """
        frames = []
        for frame in self:
            frames.append(frame)
            if len(frames) == batch_size:
                yield np.stack(frames)
                frames = []
        if len(frames) > 0:
            yield np.stack(frames)

[docs]    def video2pitch(self, pts: ArrayLike) -> NDArray[np.float64]:
        """Convert image coordinates to pitch coordinates.

        Args:
            video_pts (np.ndarray): points in image coordinate space

        Returns:
            np.ndarray: points in pitch coordinate

        """
        if pts.ndim == 1:
            pts = pts.reshape(1, -1)

        pitch_pts = cv.perspectiveTransform(np.asarray([pts], dtype=np.float32), self.H)
        return pitch_pts

[docs]    def pitch2video(self, pitch_pts: ArrayLike) -> NDArray[np.float64]:
        """Converts pitch coordinates to image coordinates.

        Args:
            pitch_pts (ArrayLike): coordinates in pitch coordinate space.

        Raises:
            NotImplementedError: this method is not implemented.

        Returns:
            NDArray[np.float64]: ...

        """
        # TODO: implement this
        raise NotImplementedError

[docs]    def undistort_points(self, points: ArrayLike) -> NDArray[np.float64]:
        """Undistort points with the camera matrix and distortion coefficients.

        Args:
            points (ArrayLike): [description]

        Returns:
            NDArray[np.float64]: [description]

        Note:
            Not to be confused with video2pitch which uses a homography transformation.
        """

        mtx = self.camera_matrix
        dist = self.distortion_coefficients
        w = self.w
        h = self.h

        if self.calibration_method == "zhang":
            newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1, (w, h))
            dst = cv.undistortPoints(points, mtx, dist, None, newcameramtx)
            dst = dst.reshape(-1, 2)
            x, y, w, h = roi
            dst = dst - np.asarray([x, y])
        elif self.calibration_method == "fisheye":
            mtx_new = cv.fisheye.estimateNewCameraMatrixForUndistortRectify(mtx, dist, (w, h), np.eye(3), balance=1.0)
            points = np.expand_dims(points, axis=1)
            dst = np.squeeze(cv.fisheye.undistortPoints(points, mtx, dist, P=mtx_new))
        return dst

[docs]    def pitch_contour(self, frame_num):
        pass

[docs]    def undistort_image(self, image: NDArray) -> NDArray:
        undistorted_image = cv.remap(
            image,
            self.mapx,
            self.mapy,
            interpolation=cv.INTER_LINEAR,
            borderMode=cv.BORDER_CONSTANT,
        )
        return undistorted_image

    @property
    def dtype(self):
        return np.uint8

    @property
    def shape(self):
        return (self.number_of_frames, *self.frame_shape)

    @property
    def ndim(self):
        return len(self.shape) + 1

    @property
    def size(self):
        return np.product(self.shape)

[docs]    def min(self):
        return 0

[docs]    def max(self):
        return 255

    @property
    def keypoint_map(self) -> dict[tuple[int, int], tuple[int, int]]:
        """Get dictionary of pitch keypoints in pitch space to pixel space.

        Returns:
            Dict: dictionary of pitch keypoints in pitch space to pixel space.

        """
        if self.source_keypoints is None:
            return None
        return {tuple(key): value for key, value in zip(self.target_keypoints, self.source_keypoints)}

    @property
    def A(self) -> NDArray[np.float64]:
        """Calculate the affine transformation matrix from pitch to video space.

        Returns:
            NDArray[np.float64]: affine transformation matrix.

        """

        A, *_ = cv.estimateAffinePartial2D(self.source_keypoints, self.target_keypoints)
        return A

    @property
    def H(self) -> NDArray[np.float64]:
        """Calculate the homography transformation matrix from pitch to video space.

        Returns:
            NDArray[np.float64]: homography transformation matrix.

        """

        H, *_ = cv.findHomography(self.source_keypoints, self.target_keypoints, cv.RANSAC, 5.0)
        return H


[docs]def read_pitch_keypoints(xmlfile: str, annot_type: str) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
    """Read pitch keypoints from xml file.

    Args:
        xmlfile (str): path to xml file.
        annot_type (str): type of annotation. Either 'pitch' or 'video'.

    Raises:
        ValueError: if annotation type is not 'pitch' or 'video'.

    Returns:
        Tuple[NDArray[np.float64], NDArray[np.float64]]: pitch keypoints and video keypoints.

    """
    tree = ElementTree.parse(xmlfile)
    root = tree.getroot()

    src = []
    dst = []

    if annot_type == "video":
        for child in root:
            for c in child:
                d = c.attrib
                if d != {}:
                    dst.append(eval(d["label"]))
                    src.append(eval(d["points"]))

    elif annot_type == "frame":
        for child in root:
            d = child.attrib
            if d != {}:
                dst.append(eval(d["label"]))
                src.append(eval(child[0].attrib["points"]))
    else:
        raise ValueError("Annotation type must be `video` or `frame`.")

    src = np.asarray(src)
    dst = np.asarray(dst)

    assert src.size != 0, "No keypoints found in XML file."
    return src, dst


[docs]def load_cameras(camera_info: list[Mapping]) -> list[Camera]:
    """Load cameras from a list of dictionaries containing camera information.

    Args:
        camera_info (List[Mapping]): list of dictionaries containing camera information.

    Returns:
        List[Camera]: list of cameras objects.

    """
    cameras = []
    for cam_info in camera_info:
        camera = Camera(
            video_path=cam_info.video_path,
            keypoint_xml=cam_info.keypoint_xml,
            camera_matrix=cam_info.camera_matrix,
            camera_matrix_path=cam_info.camera_matrix_path,
            distortion_coefficients=cam_info.distortion_coefficients,
            distortion_coefficients_path=cam_info.distortion_coefficients_path,
            calibration_video_path=cam_info.calibration_video_path,
            x_range=cam_info.x_range,
            y_range=cam_info.y_range,
            label=cam_info.label,
        )
        cameras.append(camera)
    return cameras