Author Avatar
patrickcty 2月 15, 2021

目标检测、实例分割以及关键点检测都可以使用 COCO API 来进行评测,主要方法有两种:

  1. 按照 COCO 的格式保存预测的结果,然后评测
  2. 不保存预测结果,直接评测



  1. Box 是按照 [x, y, w, h] 的格式来保存
  2. Mask 是用 RLE 格式来保存,使用如下代码来转换
import numpy as np
import pycocotools.mask as mask_util

masks = masks > 0.5 # 首先转化为二值 mask,这里假设 mask 通道为 1
rle = mask_util.encode(np.array(segmap_masked[:, :, np.newaxis], dtype=np.uint8, order="F"))[0]
rle['counts'] = rle["counts"].decode("utf-8")
dataset_results.append({'image_id': all_imgs[i]['id'], 'category_id': 1,
                        'segmentation': rle, "score": float(cls_scores[k])})


from pycocotools.coco impor COCO
from pycocotools.cocoeval import COCOeval

cocoGt = COCO(PATH_TO_GT_JSON)    
cocoDt = cocoGt.loadRes(PATH_TO_RESULT_JSON)
# 最后一个参数是数据格式,按照具体任务来指定
cocoEval = COCOeval(cocoGt, cocoDt, "segm")


主要思想还是先创建一个空的 COCOeval 对象,然后每次预测的时候更新结果到其中,最后再直接评测,这个实现起来可能比上面的要麻烦,这里直接贴上 DETR 里面的部分代码:

# 每次预测后更新结果
def update(self, predictions):
    img_ids = [predictions['image_id']]

    for iou_type in self.iou_types:
        results = self.prepare(predictions, iou_type)

        # suppress pycocotools prints
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull):
                # 将预测结果转换为 COCO 的对象
                coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
        coco_eval = self.coco_eval[iou_type]

        # 将结果保存到 COCOeval 对象中
        coco_eval.cocoDt = coco_dt
        coco_eval.params.imgIds = list(img_ids)
        # 进行 evaluate 操作,作用等同于上面代码段中的 cocoEval.evaluate()
        img_ids, eval_imgs = evaluate(coco_eval)




def evaluate(sess, net, inputs, test_collect, data_loader, base_ds):

    iou_types = ('segm', )
    coco_evaluator = CocoEvaluator(base_ds, iou_types)

    for im, image_id in data_loader:
        cat_prob, boxes, seg_pred, masks = im_detect(sess, net, inputs, im, test_collect)

        cls_scores = cat_prob[:, 1]
        segmaps = np.zeros([len(seg_pred), im.shape[0], im.shape[1]])
        for k in range(len(seg_pred)):
            img_for_single_instance = copy.deepcopy(im)

            segmap = seg_pred[k, :, :, 1]
            segmap = cv2.resize(segmap, (img_for_single_instance.shape[1], img_for_single_instance.shape[0]),
            segmap_masked = segmap * masks[k]
            segmaps[k] = segmap_masked

        res = {'scores': cls_scores, 'segmaps': segmaps, 'image_id': image_id}

        # 每次预测后更新结果
        if coco_evaluator is not None:

    # accumulate predictions from all images
    if coco_evaluator is not None:

DETR COCOEvaluator,有删改

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
COCO evaluator that works in distributed mode.

Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
The difference is that there is less copy-pasting from pycocotools
in the end of the file, as python3 can suppress prints with contextlib
import os
import contextlib
import copy
import numpy as np

from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util

class CocoEvaluator(object):
    def __init__(self, coco_gt, iou_types):
        assert isinstance(iou_types, (list, tuple))
        coco_gt = copy.deepcopy(coco_gt)
        self.coco_gt = coco_gt

        self.iou_types = iou_types
        self.coco_eval = {}
        for iou_type in iou_types:
            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)

        self.img_ids = []
        self.eval_imgs = {k: [] for k in iou_types}

    def update(self, predictions):
        img_ids = [predictions['image_id']]

        for iou_type in self.iou_types:
            results = self.prepare(predictions, iou_type)

            # suppress pycocotools prints
            with open(os.devnull, 'w') as devnull:
                with contextlib.redirect_stdout(devnull):
                    coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
            coco_eval = self.coco_eval[iou_type]

            coco_eval.cocoDt = coco_dt
            coco_eval.params.imgIds = list(img_ids)
            img_ids, eval_imgs = evaluate(coco_eval)

            # print('eeee', eval_imgs)

    def synchronize_between_processes(self):
        for iou_type in self.iou_types:
            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])

    def accumulate(self):
        for coco_eval in self.coco_eval.values():

    def summarize(self):
        for iou_type, coco_eval in self.coco_eval.items():
            print("IoU metric: {}".format(iou_type))

    def prepare(self, predictions, iou_type):
        if iou_type == "bbox":
            return self.prepare_for_coco_detection(predictions)
        elif iou_type == "segm":
            return self.prepare_for_coco_segmentation(predictions)
            raise ValueError("Unknown iou type {}".format(iou_type))

    def prepare_for_coco_detection(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

                        "image_id": original_id,
                        "category_id": labels[k],
                        "bbox": box,
                        "score": scores[k],
                    for k, box in enumerate(boxes)
        return coco_results

    def prepare_for_coco_segmentation(self, predictions):
        coco_results = []
        for idx in range(len(predictions['scores'])):
            masks = predictions['segmaps'][idx]
            masks = masks > 0.5
            scores = predictions['scores'][idx]
            rle = mask_util.encode(np.array(masks[:, :, np.newaxis], dtype=np.uint8, order="F"))[0]
            rle['counts'] = rle["counts"].decode("utf-8")

                    "image_id": predictions['image_id'],
                    "category_id": 1,
                    "segmentation": rle,
                    "score": scores,

        return coco_results

def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return np.stack((xmin, ymin, xmax - xmin, ymax - ymin), axis=1)

def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
    img_ids = list(img_ids)
    eval_imgs = list(eval_imgs.flatten())

    coco_eval.evalImgs = eval_imgs
    coco_eval.params.imgIds = img_ids
    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)

# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined

def evaluate(self):
    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
    :return: None
    # tic = time.time()
    # print('Running per image evaluation...')
    p = self.params
    # add backward compatibility if useSegm is specified in params
    if p.useSegm is not None:
        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
    # print('Evaluate annotation type *{}*'.format(p.iouType))
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)
    self.params = p

    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]

    if p.iouType == 'segm' or p.iouType == 'bbox':
        computeIoU = self.computeIoU
    elif p.iouType == 'keypoints':
        computeIoU = self.computeOks
    self.ious = {
        (imgId, catId): computeIoU(imgId, catId)
        for imgId in p.imgIds
        for catId in catIds}

    evaluateImg = self.evaluateImg
    maxDet = p.maxDets[-1]
    evalImgs = [
        evaluateImg(imgId, catId, areaRng, maxDet)
        for catId in catIds
        for areaRng in p.areaRng
        for imgId in p.imgIds
    # this is NOT in the pycocotools code, but could be done outside
    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
    self._paramsEval = copy.deepcopy(self.params)
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format(toc-tic))
    return p.imgIds, evalImgs

# end of straight copy from pycocotools, just removing the prints