Source code for sportslabkit.metrics.object_detection

from __future__ import annotations

import sys
from typing import Any

import numpy as np
import pandas as pd

from sportslabkit.dataframe import BBoxDataFrame


[docs]X_INDEX = 0  # xmin
[docs]Y_INDEX = 1  # ymin
[docs]W_INDEX = 2  # width
[docs]H_INDEX = 3  # height
[docs]CONFIDENCE_INDEX = 4
[docs]CLASS_ID_INDEX = 5
[docs]IMAGE_NAME_INDEX = 6


def _getArea(box: list[int]) -> int:
    """Return area of box.

    Args:
        box (list[int]): box of object

    Returns:
        area (int): area of box
    """

    area = (box[2] - box[0]) * (box[3] - box[1])
    return area


def _boxesIntersect(boxA: list[int], boxB: list[int]) -> bool:
    """Checking the position of two boxes.

    Args:
        boxA (list[int]): box of object
        boxB (list[int]): box of object

    Returns:
        bool: True if boxes intersect, False otherwise
    """

    if boxA[0] > boxB[2]:
        return False  # boxA is right of boxB
    if boxB[0] > boxA[2]:
        return False  # boxA is left of boxB
    if boxA[3] < boxB[1]:
        return False  # boxA is above boxB
    if boxA[1] > boxB[3]:
        return False  # boxA is below boxB
    return True


def _getIntersectionArea(boxA: list[int], boxB: list[int]) -> int:
    """Return intersection area of two boxes.

    Args:
        boxA (list[int]): box of object
        boxB (list[int]): box of object

    Returns:
        intersection_area (int): area of intersection
    """
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    intersection_area = (xB - xA) * (yB - yA)
    # intersection area
    return intersection_area


def _getUnionAreas(boxA: list[int], boxB: list[int], interArea: float | None = None) -> float:
    area_A = _getArea(boxA)
    area_B = _getArea(boxB)
    if interArea is None:
        interArea = _getIntersectionArea(boxA, boxB)
    return float(area_A + area_B - interArea)


# 11-point interpolated average precision
[docs]def ElevenPointInterpolatedAP(rec: Any, prec: Any) -> list[Any]:
    """Calculate 11-point interpolated average precision.

    Args:
        rec (np.ndarray[np.float64]): recall array
        prec (np.ndarray[np.float64]): precision array

    Returns:
        Interp_ap_info (list[Any]): List containing information necessary for ap calculation
    """
    mrec = []
    # mrec.append(0)
    for e in rec:
        mrec.append(e)
    mpre = []
    for e in prec:
        mpre.append(e)
    # [mpre.append(e) for e in prec]
    recallValues = np.linspace(0, 1, 11)
    recallValues = list(recallValues[::-1])
    rhoInterp = []
    recallValid = []
    # For each recallValues (0, 0.1, 0.2, ... , 1)
    for r in recallValues:
        # Obtain all recall values higher or equal than r
        argGreaterRecalls = np.argwhere(mrec[:] >= r)
        pmax = 0
        # If there are recalls above r
        if argGreaterRecalls.size != 0:
            pmax = max(mpre[argGreaterRecalls.min() :])
        recallValid.append(r)
        rhoInterp.append(pmax)
    # By definition AP = sum(max(precision whose recall is above r))/11
    ap = sum(rhoInterp) / 11
    # Generating values for the plot

    Interp_ap_info = [ap, rhoInterp, recallValid, None]
    return Interp_ap_info


def _convert_xywh_to_x1y1x2y2(bbox: list[int]) -> list[int]:
    x, y, w, h = bbox
    return [x, y, x + w, y + h]


[docs]def iou_score(bbox_det: list[int], bbox_gt: list[int]) -> float:
    x1_det, y1_det, x2_det, y2_det = bbox_det
    x1_gt, y1_gt, x2_gt, y2_gt = bbox_gt

    inter_area_x = max(min(x2_det, x2_gt) - max(x1_det, x1_gt), 0)
    inter_area_y = max(min(y2_det, y2_gt) - max(y1_det, y1_gt), 0)
    intersection = inter_area_x * inter_area_y

    union = (x2_det - x1_det) * (y2_det - y1_det) + (x2_gt - x1_gt) * (y2_gt - y1_gt) - intersection
    iou = intersection / union

    return iou


[docs]def iou_scores(
    bbox_dets: list[int] | list[list[int]],
    bbox_gts: list[int] | list[list[int]],
    xywh: bool = False,
    average: bool = True,
) -> list[float]:
    if isinstance(bbox_dets[0], int):
        bbox_dets = [bbox_dets]
        bbox_gts = [bbox_gts]

    assert len(bbox_dets) == len(
        bbox_gts
    ), f"The number of detected ({len(bbox_dets)}) and ground truth ({len(bbox_gts)} bounding boxes must be equal."

    # Convert input bounding boxes to (x1, y1, x2, y2) format if needed
    if xywh:
        bbox_dets = [_convert_xywh_to_x1y1x2y2(bbox_det) for bbox_det in bbox_dets]
        bbox_gts = [_convert_xywh_to_x1y1x2y2(bbox_gt) for bbox_gt in bbox_gts]

    scores = [iou_score(bbox_det, bbox_gt) for bbox_det, bbox_gt in zip(bbox_dets, bbox_gts)]
    if average:
        return sum(scores) / len(scores)
    return scores


# TODO: Deprecate this function
[docs]def convert_to_x1y1x2y2(bbox: list[int]) -> list[int]:
    """Convert bbox to x1y1x2y2 format."""
    x1 = bbox[0]
    y1 = bbox[1]
    x2 = bbox[0] + bbox[2]
    y2 = bbox[1] + bbox[3]
    return [x1, y1, x2, y2]


[docs]def convert_bboxes(
    bboxes: pd.DataFrame | BBoxDataFrame | list | tuple,
) -> list[float, float, float, float, float, str, str]:
    """Convert bboxes to tuples of (xmin, ymin, width, height, confidence, class_id, image_name).

    Args:
        bboxes (pd.DataFrame | BBoxDataFrame | list | tuple): bboxes to convert.

    Returns:
        list[float, float, float, float, float, str, str]: converted bboxes.
    """

    if isinstance(bboxes, pd.DataFrame) or isinstance(bboxes, BBoxDataFrame):
        bboxes = bboxes.values.tolist()
    elif isinstance(bboxes, list):
        bboxes = [tuple(bbox) for bbox in bboxes]

    # try to convert to correct type if plausible
    for i, bbox in enumerate(bboxes):
        try:
            bboxes[i] = (
                float(bbox[0]),
                float(bbox[1]),
                float(bbox[2]),
                float(bbox[3]),
                float(bbox[4]),
                str(bbox[5]),
                str(bbox[6]),
            )
        except IndexError:
            raise IndexError(
                f"bbox must have 7 elements (xmin, ymin, width, height, confidence, class_id, image_name), but {len(bbox)} elements found."
            )
        except ValueError as e:
            expected_types = (
                "float",
                "float",
                "float",
                "float",
                "float",
                "str",
                "str",
            )
            actual_types = tuple(type(x).__name__ for x in bbox)
            labels = (
                "xmin",
                "ymin",
                "width",
                "height",
                "confidence",
                "class_id",
                "image_name",
            )

            comparison = "\n".join(
                [
                    f"{label:<10} {expected:<10} {actual}"
                    for label, expected, actual in zip(labels, expected_types, actual_types)
                ]
            )
            msg = f"Expected types and actual types don't match:\n\nLabel      Expected   Actual\n{comparison}\n\nOriginal error message: {str(e)}"
            raise ValueError(msg)

    validate_bboxes(bboxes)
    return bboxes


[docs]def validate_bboxes(bboxes: list[float, float, float, float, float, str, str], is_gt=False) -> None:
    for bbox in bboxes:
        assert (
            len(bbox) == 7
        ), f"bbox must have 7 elements (xmin, ymin, width, height, confidence, class_id, image_name), but {len(bbox)} elements found."

        assert isinstance(bbox[0], (int, float)), f"xmin must be int or float, but {type(bbox[0])} found."
        assert isinstance(bbox[1], (int, float)), f"ymin must be int or float, but {type(bbox[1])} found."
        assert isinstance(bbox[2], (int, float)), f"width must be int or float, but {type(bbox[2])} found."
        assert isinstance(bbox[3], (int, float)), f"height must be int or float, but {type(bbox[3])} found."
        if is_gt:
            assert bbox[4] == 1, f"confidence must be 1 for ground truth bbox, but {bbox[4]} found."
        else:
            assert isinstance(bbox[4], (int, float)), f"confidence must be int or float, but {type(bbox[4])} found."
        assert isinstance(bbox[5], (str)), f"class_id must be str, but {type(bbox[5])} found."
        assert isinstance(bbox[6], (str)), f"image_name must be str, but {type(bbox[6])} found."


[docs]def ap_score(
    bboxes_det_per_class: list[list[float, float, float, float, float, str, str]],
    bboxes_gt_per_class: list[list[float, float, float, float, float, str, str]],
    iou_threshold: float,
) -> dict[str, Any]:
    """Calculate average precision.

    Args:
        bboxes_det_per_class(list): bbox of detected object per class.
        bboxes_gt_per_class(list): bbox of ground truth object per class.
        IOUThreshold(float): iou threshold. it is usually set to 50%, 75% or 95%.

    Returns:
        ap(dict): dict containing information about average precision

    Note:
        bboxes_det_per_class: [bbox_det_1, bbox_det_2, ...]
        bboxes_gt_per_class: [bbox_gt_1, bbox_gt_2, ...]

        #The elements of each bbox variable are as follows, each element basically corresponding to a property of the BoundingBox class of Object-Detection-Metrics.
        https://github.com/rafaelpadilla/Object-Detection-Metrics/blob/master/lib/BoundingBox.py

        ----
        bbox_det_n(tuple): (xmin, ymin, width, height, confidence, class_id, image_name)
        bbox_gt_n(tuple): (xmin, ymin, width, height, 1.0, class_id, image_name)

        xmin(float): xmin
        ymin(float): ymin
        width(float): width
        height(float): height
        confidence(float): class confidence
        class_id(str): class id
        image_name(str): image name

        #index variable, this is written as a global variable in the `def main()` function.
        X_INDEX = 0
        Y_INDEX = 1
        W_INDEX = 2
        H_INDEX = 3
        CONFIDENCE_INDEX = 4
        CLASS_ID_INDEX = 5
        IMAGE_NAME_INDEX = 6
    """
    assert len(bboxes_gt_per_class) != 0, "It must contain at least one Grand Truth."

    class_id = bboxes_gt_per_class[0][CLASS_ID_INDEX]
    n_dets = len(bboxes_det_per_class)
    n_gts = len(bboxes_gt_per_class)

    if len(bboxes_det_per_class) == 0:
        return {
            "class": class_id,
            "precision": [],
            "recall": [],
            "AP": 0.0,
            "interpolated precision": [],
            "interpolated recall": [],
            "total positives": 0,
            "total TP": 0,
            "total FP": 0,
        }

    validate_bboxes(bboxes_det_per_class, is_gt=False)
    validate_bboxes(bboxes_gt_per_class, is_gt=True)

    # check that class_id is the same for all bboxes
    for bbox_det in bboxes_det_per_class:
        assert (
            bbox_det[CLASS_ID_INDEX] == class_id
        ), f"class_id must be the same for all bboxes, but {bbox_det[CLASS_ID_INDEX]} found."
    for bbox_gt in bboxes_gt_per_class:
        assert (
            bbox_gt[CLASS_ID_INDEX] == class_id
        ), f"class_id must be the same for all bboxes, but {bbox_gt[CLASS_ID_INDEX]} found."

    # create dictionary with bbox_gts for each image
    # s.t. gts = {image_name_1: [bbox_gt_1, bbox_gt_2, ...], image_name_2: [bbox_gt_1, bbox_gt_2, ...], ...}
    gts: dict[str, Any] = {}
    for bbox_gt in bboxes_gt_per_class:
        image_name = bbox_gt[IMAGE_NAME_INDEX]
        gts[image_name] = gts.get(image_name, []) + [bbox_gt]

    # Sort detections by decreasing confidence
    bboxes_det_per_class = sorted(bboxes_det_per_class, key=lambda x: x[CONFIDENCE_INDEX], reverse=True)

    # create dictionary with amount of gts for each image
    det = {key: np.zeros(len(gt)) for key, gt in gts.items()}

    iouMax_list = []

    # Loop through detections
    TP = np.zeros(len(bboxes_det_per_class))
    FP = np.zeros(len(bboxes_det_per_class))
    for d, bbox_det in enumerate(bboxes_det_per_class):
        # Find ground truth image
        image_name = bbox_det[IMAGE_NAME_INDEX]
        gt_bboxes = gts.get(image_name, [])
        iouMax = sys.float_info.min

        bbox_det = [
            bbox_det[X_INDEX],
            bbox_det[Y_INDEX],
            bbox_det[W_INDEX],
            bbox_det[H_INDEX],
        ]
        bbox_det = convert_to_x1y1x2y2(bbox_det)

        for j, bbox_gt in enumerate(gt_bboxes):
            bbox_gt = [
                bbox_gt[X_INDEX],
                bbox_gt[Y_INDEX],
                bbox_gt[W_INDEX],
                bbox_gt[H_INDEX],
            ]

            # convert x,y,w,h to x1,y1,x2,y2
            bbox_gt = convert_to_x1y1x2y2(bbox_gt)

            iou = iou_score(bbox_det, bbox_gt)

            if iou > iouMax:
                iouMax = iou
                jmax = j
                iouMax_list.append(iouMax)

        # Assign detection as true positive/don't care/false positive
        if iouMax >= iou_threshold and det[image_name][jmax] != 1:
            TP[d] = 1  # count as true positive
            det[image_name][jmax] = 1  # flag as already 'seen'
        else:
            FP[d] = 1  # count as false positive

    # compute precision, recall and average precision
    acc_FP = np.cumsum(FP)
    acc_TP = np.cumsum(TP)
    rec = acc_TP / n_gts
    prec = np.divide(acc_TP, (acc_FP + acc_TP))

    # Depending on the method, call the right implementation
    [ap_, mpre_, mrec_, _] = ElevenPointInterpolatedAP(rec, prec)

    return {
        "class": class_id,
        "precision": list(prec),
        "recall": list(rec),
        "AP": ap_,
        "interpolated precision": mpre_,
        "interpolated recall": mrec_,
        "total positives": n_dets,
        "total TP": int(np.sum(TP)),
        "total FP": int(np.sum(FP)),
    }


[docs]def ap_score_range(
    bboxes_det_per_class: list[float, float, float, float, float, str, str],
    bboxes_gt_per_class: list[float, float, float, float, float, str, str],
    start_threshold: float = 0.5,
    end_threshold: float = 0.95,
    step: float = 0.05,
) -> float:
    """Calculate average precision in the specified range.

    Args:
        bboxes_det_per_class(list): bbox of detected object per class.
        bboxes_gt_per_class(list): bbox of ground truth object per class.
        start_threshold(float): start threshold of IOU. default is 0.5.
        end_threshold(float): end threshold of IOU. default is 0.95.
        step(float): step of updating threshold. default is 0.05.

    Returns:
        ap_results(list): list of average precision in the specified range.
        ap_range(float): average of ap in the specified range.

    """

    ap_list = []
    for iou_threshold in np.arange(start_threshold, end_threshold + step, step):
        ap_result = ap_score(bboxes_det_per_class, bboxes_gt_per_class, iou_threshold)
        ap_list.append(ap_result["AP"])

    ap_range = np.mean(ap_list)

    return ap_range


[docs]def map_score(
    bboxes_det: pd.DataFrame | BBoxDataFrame | list | tuple,
    bboxes_gt: pd.DataFrame | BBoxDataFrame | list | tuple,
    iou_threshold: float,
) -> float:
    """Calculate mean average precision.

    Args:
        det_df(pd.DataFrame): dataframe of detected object.
        gt_df(pd.DataFrame): dataframe of ground truth object.
        IOUThreshold(float): iou threshold

    Returns:
        map(float): mean average precision
    """

    # convert to 2-dim list from df
    bboxes_det = convert_bboxes(bboxes_det)
    bboxes_gt = convert_bboxes(bboxes_gt)

    ap_list = []
    class_list = []

    # calculate ap
    for bbox_gt in bboxes_gt:
        if bbox_gt[CLASS_ID_INDEX] not in class_list:
            class_list.append(bbox_gt[CLASS_ID_INDEX])

    classes = sorted(class_list)
    for class_id in classes:
        bboxes_det_per_class = [
            detection_per_class
            for detection_per_class in bboxes_det
            if detection_per_class[CLASS_ID_INDEX] == class_id
        ]
        bboxes_gt_per_class = [
            groundTruth_per_class
            for groundTruth_per_class in bboxes_gt
            if groundTruth_per_class[CLASS_ID_INDEX] == class_id
        ]
        ap = ap_score(bboxes_det_per_class, bboxes_gt_per_class, iou_threshold)
        ap_list.append(ap["AP"])

    # calculate map
    map = np.mean(ap_list)
    return map


[docs]def map_score_range(
    bboxes_det: pd.DataFrame | BBoxDataFrame | list | tuple,
    bboxes_gt: pd.DataFrame | BBoxDataFrame | list | tuple,
    start_threshold: float = 0.5,
    end_threshold: float = 0.95,
    step: float = 0.05,
) -> float:
    """Calculate mean average precision.

    Args:
        det_df(pd.DataFrame): dataframe of detected object.
        gt_df(pd.DataFrame): dataframe of ground truth object.
        start_threshold(float): start threshold of IOU. default is 0.5.
        end_threshold(float): end threshold of IOU. default is 0.95.
        step(float): step of updating threshold. default is 0.05.

    Returns:
        map_range(float): average of map in the specified range. (0.5 to 0.95 in increments of 0.05)

    """
    map_list = []
    for iou_threshold in np.arange(start_threshold, end_threshold + step, step):
        map_result = map_score(bboxes_det, bboxes_gt, iou_threshold)
        map_list.append(map_result)

    map_range = np.mean(map_list)

    return map_range