Source code for sportslabkit.camera.camera

"""Create a camera object that can be used to read frames from a video file."""

from __future__ import annotations

from collections.abc import Generator, Mapping, Sequence
from xml.etree import ElementTree

import cv2 as cv
import numpy as np
from numpy.typing import ArrayLike, NDArray

from sportslabkit.camera.calibrate import find_intrinsic_camera_parameters
from sportslabkit.camera.videoreader import VideoReader
from sportslabkit.types.types import PathLike
from sportslabkit.utils import logger


[docs]class Camera(VideoReader): def __init__( self, video_path: PathLike, threaded: bool = False, queue_size: int = 10, keypoint_xml: str | None = None, x_range: Sequence[float] | None = (0, 105), y_range: Sequence[float] | None = (0, 68), camera_matrix: ArrayLike | None = None, camera_matrix_path: str | None = None, distortion_coefficients: str | None = None, distortion_coefficients_path: str | None = None, calibration_video_path: str | None = None, calibration_method: str = "zhang", label: str = "", verbose: int = 0, ): """Class for handling camera calibration and undistortion. Args: video_path (str): path to video file. threaded (bool, optional): whether to use a threaded video reader. Defaults to False. queue_size (int, optional): size of queue for threaded video reader. Defaults to 10. keypoint_xml (str): path to file containing a mapping from pitch coordinates to video. x_range (Sequence[float]): pitch range to consider in x direction. y_range (Sequence[float]): pitch range to consider in y direction. camera_matrix (Optional[Union[str, np.ndarray]]): numpy array or path to file containing camera matrix. distortion_coefficients (Optional[Union[str, np.ndarray]]): numpy array or path to file containing distortion coefficients. calibration_video_path (Optional[str]): path to video file with checkerboard to use for calibration. label (str, optional): label for camera. Defaults to "". verbose (int, optional): verbosity level. Defaults to 0. Attributes: camera_matrix (np.ndarray): numpy array containing camera matrix. distortion_coefficients (np.ndarray): numpy array containing distortion coefficients. keypoint_map (Mapping): mapping from pitch coordinates to video. H (np.ndarray): homography matrix from image to pitch. w (int): width of video. h (int): height of video. """ if threaded: logger.warning("Threaded video reader is buggy. Use at your own risk.") super().__init__(video_path, threaded, queue_size) self.label = label self.video_path = str(video_path) self.calibration_method = calibration_method self.camera_matrix = camera_matrix self.distortion_coefficients = distortion_coefficients self.camera_matrix_path = camera_matrix_path self.distortion_coefficients_path = distortion_coefficients_path self.calibration_video_path = calibration_video_path self.load_calibration_params() self.x_range = x_range self.y_range = y_range # Remove leading singleton dimension when returning single frames. Defaults to True. self.remove_leading_singleton = True if keypoint_xml is not None: source_keypoints, target_keypoints = read_pitch_keypoints(keypoint_xml, "video") self.source_keypoints = source_keypoints self.target_keypoints = target_keypoints ## TODO: add option to not undistort points maybe? source_keypoints = self.undistort_points(source_keypoints).squeeze() proj_error = np.linalg.norm(self.video2pitch(source_keypoints) - target_keypoints, axis=-1).mean() logger.debug(f"Camera `{self.label}`: projection error = {proj_error:.2f}m") else: self.source_keypoints = None self.target_keypoints = None
[docs] def load_calibration_params(self): # self.mapx, self.mapy = find_intrinsic_camera_parameters(calibration_video_path, return_mappings=True) calibration_video_path = self.calibration_video_path if self.camera_matrix_path: np.load(self.camera_matrix_path) if self.distortion_coefficients_path: np.load(self.distortion_coefficients_path) if self.camera_matrix is None or self.distortion_coefficients is None: if calibration_video_path is not None: ( self.camera_matrix, self.distortion_coefficients, self.mapx, self.mapy, ) = find_intrinsic_camera_parameters(calibration_video_path) self.camera_matrix_path = calibration_video_path + ".camera_matrix.npy" self.distortion_coefficients_path = calibration_video_path + ".distortion_coefficients.npy" # save this somewhere else # np.save(self.camera_matrix_path, self.camera_matrix)
[docs] # np.save(self.distortion_coefficients_path, self.distortion_coefficients) else: self.camera_matrix = np.eye(3)
self.distortion_coefficients = np.zeros(4) dim = (self.frame_width, self.frame_height) newcameramtx, _ = cv.getOptimalNewCameraMatrix( self.camera_matrix, self.distortion_coefficients, dim, 1, dim ) self.mapx, self.mapy = cv.initUndistortRectifyMap( self.camera_matrix, self.distortion_coefficients, None,
[docs] newcameramtx, dim, 5, )
[docs] def get_frame(self, frame_idx: int) -> np.ndarray: """Get frame from video. Args: frame (int): frame Returns: np.ndarray: frame """
return self[frame_idx]
[docs] def iter_frames(self, calibrate: bool = False, crop: bool = False) -> Generator[NDArray, None, None]: """Iterate over frames of video. Yields: NDArray: frame of video. """ return self
[docs] def batch_frames( self, batch_size: int = 32, calibrate: bool = False, crop: bool = False ) -> Generator[NDArray, None, None]: """Iterate over frames of video. Yields: NDArray: frame of video. """ frames = [] for frame in self: frames.append(frame) if len(frames) == batch_size: yield np.stack(frames) frames = [] if len(frames) > 0: yield np.stack(frames)
[docs] def video2pitch(self, pts: ArrayLike) -> NDArray[np.float64]: """Convert image coordinates to pitch coordinates. Args: video_pts (np.ndarray): points in image coordinate space Returns: np.ndarray: points in pitch coordinate """ if pts.ndim == 1: pts = pts.reshape(1, -1) pitch_pts = cv.perspectiveTransform(np.asarray([pts], dtype=np.float32), self.H) return pitch_pts
[docs] def pitch2video(self, pitch_pts: ArrayLike) -> NDArray[np.float64]: """Converts pitch coordinates to image coordinates. Args: pitch_pts (ArrayLike): coordinates in pitch coordinate space. Raises: NotImplementedError: this method is not implemented. Returns: NDArray[np.float64]: ... """ # TODO: implement this raise NotImplementedError
[docs] def undistort_points(self, points: ArrayLike) -> NDArray[np.float64]: """Undistort points with the camera matrix and distortion coefficients. Args: points (ArrayLike): [description] Returns: NDArray[np.float64]: [description] Note: Not to be confused with video2pitch which uses a homography transformation. """ mtx = self.camera_matrix dist = self.distortion_coefficients w = self.w h = self.h if self.calibration_method == "zhang": newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1, (w, h)) dst = cv.undistortPoints(points, mtx, dist, None, newcameramtx) dst = dst.reshape(-1, 2) x, y, w, h = roi dst = dst - np.asarray([x, y]) elif self.calibration_method == "fisheye": mtx_new = cv.fisheye.estimateNewCameraMatrixForUndistortRectify(mtx, dist, (w, h), np.eye(3), balance=1.0) points = np.expand_dims(points, axis=1) dst = np.squeeze(cv.fisheye.undistortPoints(points, mtx, dist, P=mtx_new)) return dst
[docs] def pitch_contour(self, frame_num): pass
[docs] def undistort_image(self, image: NDArray) -> NDArray: undistorted_image = cv.remap( image, self.mapx, self.mapy, interpolation=cv.INTER_LINEAR, borderMode=cv.BORDER_CONSTANT, ) return undistorted_image
@property def dtype(self): return np.uint8 @property def shape(self): return (self.number_of_frames, *self.frame_shape) @property def ndim(self): return len(self.shape) + 1 @property def size(self): return np.product(self.shape)
[docs] def min(self): return 0
[docs] def max(self): return 255
@property def keypoint_map(self) -> dict[tuple[int, int], tuple[int, int]]: """Get dictionary of pitch keypoints in pitch space to pixel space. Returns: Dict: dictionary of pitch keypoints in pitch space to pixel space. """ if self.source_keypoints is None: return None return {tuple(key): value for key, value in zip(self.target_keypoints, self.source_keypoints)} @property def A(self) -> NDArray[np.float64]: """Calculate the affine transformation matrix from pitch to video space. Returns: NDArray[np.float64]: affine transformation matrix. """ A, *_ = cv.estimateAffinePartial2D(self.source_keypoints, self.target_keypoints) return A @property def H(self) -> NDArray[np.float64]: """Calculate the homography transformation matrix from pitch to video space. Returns: NDArray[np.float64]: homography transformation matrix. """ H, *_ = cv.findHomography(self.source_keypoints, self.target_keypoints, cv.RANSAC, 5.0) return H
[docs]def read_pitch_keypoints(xmlfile: str, annot_type: str) -> tuple[NDArray[np.float64], NDArray[np.float64]]: """Read pitch keypoints from xml file. Args: xmlfile (str): path to xml file. annot_type (str): type of annotation. Either 'pitch' or 'video'. Raises: ValueError: if annotation type is not 'pitch' or 'video'. Returns: Tuple[NDArray[np.float64], NDArray[np.float64]]: pitch keypoints and video keypoints. """ tree = ElementTree.parse(xmlfile) root = tree.getroot() src = [] dst = [] if annot_type == "video": for child in root: for c in child: d = c.attrib if d != {}: dst.append(eval(d["label"])) src.append(eval(d["points"])) elif annot_type == "frame": for child in root: d = child.attrib if d != {}: dst.append(eval(d["label"])) src.append(eval(child[0].attrib["points"])) else: raise ValueError("Annotation type must be `video` or `frame`.") src = np.asarray(src) dst = np.asarray(dst) assert src.size != 0, "No keypoints found in XML file." return src, dst
[docs]def load_cameras(camera_info: list[Mapping]) -> list[Camera]: """Load cameras from a list of dictionaries containing camera information. Args: camera_info (List[Mapping]): list of dictionaries containing camera information. Returns: List[Camera]: list of cameras objects. """ cameras = [] for cam_info in camera_info: camera = Camera( video_path=cam_info.video_path, keypoint_xml=cam_info.keypoint_xml, camera_matrix=cam_info.camera_matrix, camera_matrix_path=cam_info.camera_matrix_path, distortion_coefficients=cam_info.distortion_coefficients, distortion_coefficients_path=cam_info.distortion_coefficients_path, calibration_video_path=cam_info.calibration_video_path, x_range=cam_info.x_range, y_range=cam_info.y_range, label=cam_info.label, ) cameras.append(camera) return cameras