2024-04-21

计算机视觉常用概念

IoU 交并比

给定pred_bbox和gt_bbox，计算交集面积与并集的比值。
bbox的表示方法是[x1,y1,x2,y2]

import numpy as np

def get_IoU(pred_bbox, gt_bbox):
    """
    :param pred_bbox: predicted bbox coordinate
    :param gt_bbox: ground truth bbox coordinate
    :return: iou score
    """
    ix1 = max(pred_bbox[0], gt_bbox[0])
    iy1 = max(pred_bbox[1], gt_bbox[1])
    ix2 = min(pred_bbox[2], gt_bbox[2])
    iy2 = min(pred_bbox[3], gt_bbox[3])
    iw = np.maximum(ix2 - ix1 + 1, 0)
    ih = np.maximum(iy2 - iy1 + 1, 0)

    inter = iw * ih

    union = (pred_bbox[2] - pred_bbox[0]) * (pred_bbox[3] - pred_bbox[1]) + (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1]) - inter

    return inter / union

def get_max_IoU(pred_bboxes, gt_bbox):
    """
    given 1 gt bbox, >1 pred bboxes, return max iou score for the given gt bbox and pred_bboxes
    :param pred_bbox: predict bboxes coordinates, we need to find the max iou score with gt bbox for these pred bboxes
    :param gt_bbox: ground truth bbox coordinate
    :return: max iou score
    """

    if pred_bboxes.shape[0] > 0:
        # -----0---- get coordinates of inters, but with multiple predict bboxes
        ix1 = np.maximum(pred_bboxes[:, 0], gt_bbox[0])
        iy1 = np.maximum(pred_bboxes[:, 1], gt_bbox[1])
        ix2 = np.minimum(pred_bboxes[:, 2], gt_bbox[2])
        iy2 = np.minimum(pred_bboxes[:, 3], gt_bbox[3])
        iw = np.maximum(ix2 - ix1 + 1., 0.) # max用于过滤无交集情况，+1是当出现边界框边缘接触的时候留下数值
        ih = np.maximum(iy2 - iy1 + 1., 0.)

        # -----1----- intersection
        inters = iw * ih

        # -----2----- union, uni = S1 + S2 - inters
        uni = ((gt_bbox[2] - gt_bbox[0] + 1.) * (gt_bbox[3] - gt_bbox[1] + 1.) +
               (pred_bboxes[:, 2] - pred_bboxes[:, 0] + 1.) * (pred_bboxes[:, 3] - pred_bboxes[:, 1] + 1.) -
               inters)

        # -----3----- iou, get max score and max iou index
        overlaps = inters / uni
        ovmax = np.max(overlaps)
        jmax = np.argmax(overlaps)

    return overlaps, ovmax, jmax

if __name__ == "__main__":

    # test1
    pred_bbox = np.array([50, 50, 90, 100])   # top-left: <50, 50>, bottom-down: <90, 100>, <x-axis, y-axis>
    gt_bbox = np.array([110, 110, 150, 150])
    print (get_IoU(pred_bbox, gt_bbox))
    
    # test2
    pred_bboxes = np.array([[15, 18, 47, 60],
                          [50, 50, 90, 100],
                          [70, 80, 120, 145],
                          [130, 160, 250, 280],
                          [25.6, 66.1, 113.3, 147.8]])
    gt_bbox = np.array([70, 80, 120, 150])
    print (get_max_IoU(pred_bboxes, gt_bbox))

NMS 非极大值抑制

预测结果可能是这样的
x1, y1, x2, y2, score, class;
x1, y1, x2, y2, score, class;

按score降序排列
提取bboxes，即x1, y1, x2, y2组成的矩阵
计算IoU

新建一个空矩阵，将原来的结果矩阵中高IoU和重复名称的取出放入，在剩下的里面重复取高IoU和重名的

import numpy as np
import cv2

from draw_bbox import draw_box


def nms(bboxes, scores, iou_thresh):
    """
    :param bboxes: 检测框列表
    :param scores: 置信度列表
    :param iou_thresh: IOU阈值
    :return:
    """

    x1 = bboxes[:, 0]
    y1 = bboxes[:, 1]
    x2 = bboxes[:, 2]
    y2 = bboxes[:, 3]
    areas = (y2 - y1) * (x2 - x1)

    # 结果列表
    result = []
    index = scores.argsort()[::-1]  # 对检测框按照置信度进行从高到低的排序，并获取索引
    # 下面的操作为了安全，都是对索引处理
    while index.size > 0:
        # 当检测框不为空一直循环
        i = index[0]
        result.append(i)  # 将置信度最高的加入结果列表

        # 计算其他边界框与该边界框的IOU
        x11 = np.maximum(x1[i], x1[index[1:]])
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])
        w = np.maximum(0, x22 - x11 + 1)
        h = np.maximum(0, y22 - y11 + 1)
        overlaps = w * h
        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
        # 只保留满足IOU阈值的索引
        idx = np.where(ious <= iou_thresh)[0]
        index = index[idx + 1]  # 处理剩余的边框
    bboxes, scores = bboxes[result], scores[result]
    return bboxes, scores


if __name__ == '__main__':
    raw_img = cv2.imread('test.png')
    # 这里为了编码方便，将检测的结果直接作为变量
    bboxes = [[183, 625, 269, 865], [197, 603, 296, 853], [190, 579, 295, 864], [537, 507, 618, 713], [535, 523, 606, 687]]
    confidences = [0.7, 0.9, 0.95, 0.9, 0.6]

    # 进行nms处理
    bboxes, scores = nms(np.array(bboxes), np.array(confidences), 0.5)

mAP

mean average precision 用于判断模型在目标识别方面的能力。
presision = TP / (TP + FP) 如模型认为positive有100个，其中有80个是对的，就是80 /（80+20）分母是做预测的次数
recall = TP / (TP + FN) 如模型认为positive有100个，而样本总共是200个，就是 100 / (80+120) 分母是所有样本的数量

average precision 使用一个IoU阈值划定是否Positive。这样就能计算每次增加一个样本的precision和变动的recall，能绘出曲线。而曲线下的面积是AP，范围0到1.
AP(COCO) 考虑不同 IoU 的阈值。一般可能会写成 AP[.50:.05:.95]，代表会分別计算当 IoU = 0.5、IoU = 0.55、…、IoU = 0.9 與 IoU = 0.95 的 interpolated AP，最后再将这些 AP 取平均得到最后的 AP。
mAP 计算每一个类别的AP之后再平均就是了。

黄河水澄的技术专栏

分享有用的知识

计算机视觉常用概念

IoU 交并比

NMS 非极大值抑制

mAP