Initial commit
fbshipit-source-id: da6be2f26e3a1202f4bffde8cb980e2dcb851294
This commit is contained in:
658
sam3/eval/demo_eval.py
Normal file
658
sam3/eval/demo_eval.py
Normal file
@@ -0,0 +1,658 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
|
||||
|
||||
"""
|
||||
This evaluator is based upon COCO evaluation, but evaluates the model in a "demo" setting.
|
||||
This means that the model's predictions are thresholded and evaluated as "hard" predictions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pycocotools.mask as maskUtils
|
||||
from pycocotools.cocoeval import COCOeval
|
||||
|
||||
from sam3.eval.coco_eval import CocoEvaluator
|
||||
from sam3.train.masks_ops import compute_F_measure
|
||||
from sam3.train.utils.distributed import is_main_process
|
||||
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
|
||||
|
||||
class DemoEval(COCOeval):
|
||||
"""
|
||||
This evaluator is based upon COCO evaluation, but evaluates the model in a "demo" setting.
|
||||
This means that the model's predictions are thresholded and evaluated as "hard" predictions.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
coco_gt=None,
|
||||
coco_dt=None,
|
||||
iouType="bbox",
|
||||
threshold=0.5,
|
||||
compute_JnF=False,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
coco_gt (COCO): ground truth COCO API
|
||||
coco_dt (COCO): detections COCO API
|
||||
iou_type (str): type of IoU to evaluate
|
||||
threshold (float): threshold for predictions
|
||||
"""
|
||||
super().__init__(coco_gt, coco_dt, iouType)
|
||||
self.threshold = threshold
|
||||
|
||||
self.params.useCats = False
|
||||
self.params.areaRng = [[0**2, 1e5**2]]
|
||||
self.params.areaRngLbl = ["all"]
|
||||
self.params.maxDets = [100000]
|
||||
self.compute_JnF = compute_JnF
|
||||
|
||||
def computeIoU(self, imgId, catId):
|
||||
# Same as the original COCOeval.computeIoU, but without sorting
|
||||
p = self.params
|
||||
if p.useCats:
|
||||
gt = self._gts[imgId, catId]
|
||||
dt = self._dts[imgId, catId]
|
||||
else:
|
||||
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
|
||||
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
|
||||
if len(gt) == 0 and len(dt) == 0:
|
||||
return []
|
||||
|
||||
if p.iouType == "segm":
|
||||
g = [g["segmentation"] for g in gt]
|
||||
d = [d["segmentation"] for d in dt]
|
||||
elif p.iouType == "bbox":
|
||||
g = [g["bbox"] for g in gt]
|
||||
d = [d["bbox"] for d in dt]
|
||||
else:
|
||||
raise Exception("unknown iouType for iou computation")
|
||||
|
||||
# compute iou between each dt and gt region
|
||||
iscrowd = [int(o["iscrowd"]) for o in gt]
|
||||
ious = maskUtils.iou(d, g, iscrowd)
|
||||
return ious
|
||||
|
||||
def evaluateImg(self, imgId, catId, aRng, maxDet):
|
||||
"""
|
||||
perform evaluation for single category and image
|
||||
:return: dict (single image results)
|
||||
"""
|
||||
p = self.params
|
||||
assert not p.useCats, "This evaluator does not support per-category evaluation."
|
||||
assert catId == -1
|
||||
all_gts = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
|
||||
keep_gt = np.array([not g["ignore"] for g in all_gts], dtype=bool)
|
||||
gt = [g for g in all_gts if not g["ignore"]]
|
||||
all_dts = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
|
||||
keep_dt = np.array([d["score"] >= self.threshold for d in all_dts], dtype=bool)
|
||||
dt = [d for d in all_dts if d["score"] >= self.threshold]
|
||||
if len(gt) == 0 and len(dt) == 0:
|
||||
# This is a "true negative" case, where there are no GTs and no predictions
|
||||
# The box-level metrics are ill-defined, so we don't add them to this dict
|
||||
return {
|
||||
"image_id": imgId,
|
||||
"IL_TP": 0,
|
||||
"IL_TN": 1,
|
||||
"IL_FP": 0,
|
||||
"IL_FN": 0,
|
||||
"IL_perfect_neg": np.ones((len(p.iouThrs),), dtype=np.int64),
|
||||
"num_dt": len(dt),
|
||||
}
|
||||
|
||||
if len(gt) > 0 and len(dt) == 0:
|
||||
# This is a "false negative" case, where there are GTs but no predictions
|
||||
return {
|
||||
"image_id": imgId,
|
||||
"IL_TP": 0,
|
||||
"IL_TN": 0,
|
||||
"IL_FP": 0,
|
||||
"IL_FN": 1,
|
||||
"TPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
|
||||
"FPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
|
||||
"FNs": np.ones((len(p.iouThrs),), dtype=np.int64) * len(gt),
|
||||
"local_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
|
||||
"local_positive_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
|
||||
"IL_perfect_pos": np.zeros((len(p.iouThrs),), dtype=np.int64),
|
||||
"num_dt": len(dt),
|
||||
}
|
||||
|
||||
# Load pre-computed ious
|
||||
ious = self.ious[(imgId, catId)]
|
||||
|
||||
# compute matching
|
||||
if len(ious) == 0:
|
||||
ious = np.zeros((len(dt), len(gt)))
|
||||
else:
|
||||
ious = ious[keep_dt, :][:, keep_gt]
|
||||
assert ious.shape == (len(dt), len(gt))
|
||||
|
||||
matched_dt, matched_gt = linear_sum_assignment(-ious)
|
||||
|
||||
match_scores = ious[matched_dt, matched_gt]
|
||||
|
||||
if self.compute_JnF and len(match_scores) > 0:
|
||||
j_score = match_scores.mean()
|
||||
f_measure = 0
|
||||
for dt_id, gt_id in zip(matched_dt, matched_gt):
|
||||
f_measure += compute_F_measure(
|
||||
gt_boundary_rle=gt[gt_id]["boundary"],
|
||||
gt_dilated_boundary_rle=gt[gt_id]["dilated_boundary"],
|
||||
dt_boundary_rle=dt[dt_id]["boundary"],
|
||||
dt_dilated_boundary_rle=dt[dt_id]["dilated_boundary"],
|
||||
)
|
||||
f_measure /= len(match_scores) + 1e-9
|
||||
JnF = (j_score + f_measure) * 0.5
|
||||
else:
|
||||
j_score = f_measure = JnF = -1
|
||||
|
||||
TPs, FPs, FNs = [], [], []
|
||||
IL_perfect = []
|
||||
for thresh in p.iouThrs:
|
||||
TP = (match_scores >= thresh).sum()
|
||||
FP = len(dt) - TP
|
||||
FN = len(gt) - TP
|
||||
assert (
|
||||
FP >= 0 and FN >= 0
|
||||
), f"FP: {FP}, FN: {FN}, TP: {TP}, match_scores: {match_scores}, len(dt): {len(dt)}, len(gt): {len(gt)}, ious: {ious}"
|
||||
TPs.append(TP)
|
||||
FPs.append(FP)
|
||||
FNs.append(FN)
|
||||
|
||||
if FP == FN and FP == 0:
|
||||
IL_perfect.append(1)
|
||||
else:
|
||||
IL_perfect.append(0)
|
||||
|
||||
TPs = np.array(TPs, dtype=np.int64)
|
||||
FPs = np.array(FPs, dtype=np.int64)
|
||||
FNs = np.array(FNs, dtype=np.int64)
|
||||
IL_perfect = np.array(IL_perfect, dtype=np.int64)
|
||||
|
||||
# compute precision recall and F1
|
||||
precision = TPs / (TPs + FPs + 1e-4)
|
||||
assert np.all(precision <= 1)
|
||||
recall = TPs / (TPs + FNs + 1e-4)
|
||||
assert np.all(recall <= 1)
|
||||
F1 = 2 * precision * recall / (precision + recall + 1e-4)
|
||||
|
||||
result = {
|
||||
"image_id": imgId,
|
||||
"TPs": TPs,
|
||||
"FPs": FPs,
|
||||
"FNs": FNs,
|
||||
"local_F1s": F1,
|
||||
"IL_TP": (len(gt) > 0) and (len(dt) > 0),
|
||||
"IL_FP": (len(gt) == 0) and (len(dt) > 0),
|
||||
"IL_TN": (len(gt) == 0) and (len(dt) == 0),
|
||||
"IL_FN": (len(gt) > 0) and (len(dt) == 0),
|
||||
("IL_perfect_pos" if len(gt) > 0 else "IL_perfect_neg"): IL_perfect,
|
||||
"F": f_measure,
|
||||
"J": j_score,
|
||||
"J&F": JnF,
|
||||
"num_dt": len(dt),
|
||||
}
|
||||
if len(gt) > 0 and len(dt) > 0:
|
||||
result["local_positive_F1s"] = F1
|
||||
return result
|
||||
|
||||
def accumulate(self, p=None):
|
||||
"""
|
||||
Accumulate per image evaluation results and store the result in self.eval
|
||||
:param p: input params for evaluation
|
||||
:return: None
|
||||
"""
|
||||
if not self.evalImgs:
|
||||
print("Please run evaluate() first")
|
||||
# allows input customized parameters
|
||||
if p is None:
|
||||
p = self.params
|
||||
|
||||
setImgIds = set(p.imgIds)
|
||||
|
||||
# TPs, FPs, FNs
|
||||
TPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
|
||||
FPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
|
||||
pmFPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
|
||||
FNs = np.zeros((len(p.iouThrs),), dtype=np.int64)
|
||||
local_F1s = np.zeros((len(p.iouThrs),), dtype=np.float64)
|
||||
|
||||
# Image level metrics
|
||||
IL_TPs = 0
|
||||
IL_FPs = 0
|
||||
IL_TNs = 0
|
||||
IL_FNs = 0
|
||||
IL_perfects_neg = np.zeros((len(p.iouThrs),), dtype=np.int64)
|
||||
IL_perfects_pos = np.zeros((len(p.iouThrs),), dtype=np.int64)
|
||||
|
||||
# JnF metric
|
||||
total_J = 0
|
||||
total_F = 0
|
||||
total_JnF = 0
|
||||
|
||||
valid_img_count = 0
|
||||
total_pos_count = 0
|
||||
total_neg_count = 0
|
||||
valid_J_count = 0
|
||||
valid_F1_count = 0
|
||||
valid_F1_count_w0dt = 0
|
||||
for res in self.evalImgs:
|
||||
if res["image_id"] not in setImgIds:
|
||||
continue
|
||||
IL_TPs += res["IL_TP"]
|
||||
IL_FPs += res["IL_FP"]
|
||||
IL_TNs += res["IL_TN"]
|
||||
IL_FNs += res["IL_FN"]
|
||||
if "IL_perfect_neg" in res:
|
||||
IL_perfects_neg += res["IL_perfect_neg"]
|
||||
total_neg_count += 1
|
||||
else:
|
||||
assert "IL_perfect_pos" in res
|
||||
IL_perfects_pos += res["IL_perfect_pos"]
|
||||
total_pos_count += 1
|
||||
|
||||
if "TPs" not in res:
|
||||
continue
|
||||
|
||||
TPs += res["TPs"]
|
||||
FPs += res["FPs"]
|
||||
FNs += res["FNs"]
|
||||
valid_img_count += 1
|
||||
|
||||
if "local_positive_F1s" in res:
|
||||
local_F1s += res["local_positive_F1s"]
|
||||
pmFPs += res["FPs"]
|
||||
valid_F1_count_w0dt += 1
|
||||
if res["num_dt"] > 0:
|
||||
valid_F1_count += 1
|
||||
|
||||
if "J" in res and res["J"] > -1e-9:
|
||||
total_J += res["J"]
|
||||
total_F += res["F"]
|
||||
total_JnF += res["J&F"]
|
||||
valid_J_count += 1
|
||||
|
||||
# compute precision recall and F1
|
||||
precision = TPs / (TPs + FPs + 1e-4)
|
||||
positive_micro_precision = TPs / (TPs + pmFPs + 1e-4)
|
||||
assert np.all(precision <= 1)
|
||||
recall = TPs / (TPs + FNs + 1e-4)
|
||||
assert np.all(recall <= 1)
|
||||
F1 = 2 * precision * recall / (precision + recall + 1e-4)
|
||||
positive_micro_F1 = (
|
||||
2
|
||||
* positive_micro_precision
|
||||
* recall
|
||||
/ (positive_micro_precision + recall + 1e-4)
|
||||
)
|
||||
|
||||
IL_rec = IL_TPs / (IL_TPs + IL_FNs + 1e-6)
|
||||
IL_prec = IL_TPs / (IL_TPs + IL_FPs + 1e-6)
|
||||
IL_F1 = 2 * IL_prec * IL_rec / (IL_prec + IL_rec + 1e-6)
|
||||
IL_FPR = IL_FPs / (IL_FPs + IL_TNs + 1e-6)
|
||||
IL_MCC = float(IL_TPs * IL_TNs - IL_FPs * IL_FNs) / (
|
||||
(
|
||||
float(IL_TPs + IL_FPs)
|
||||
* float(IL_TPs + IL_FNs)
|
||||
* float(IL_TNs + IL_FPs)
|
||||
* float(IL_TNs + IL_FNs)
|
||||
)
|
||||
** 0.5
|
||||
+ 1e-6
|
||||
)
|
||||
IL_perfect_pos = IL_perfects_pos / (total_pos_count + 1e-9)
|
||||
IL_perfect_neg = IL_perfects_neg / (total_neg_count + 1e-9)
|
||||
|
||||
total_J = total_J / (valid_J_count + 1e-9)
|
||||
total_F = total_F / (valid_J_count + 1e-9)
|
||||
total_JnF = total_JnF / (valid_J_count + 1e-9)
|
||||
|
||||
self.eval = {
|
||||
"params": p,
|
||||
"TPs": TPs,
|
||||
"FPs": FPs,
|
||||
"positive_micro_FPs": pmFPs,
|
||||
"FNs": FNs,
|
||||
"precision": precision,
|
||||
"positive_micro_precision": positive_micro_precision,
|
||||
"recall": recall,
|
||||
"F1": F1,
|
||||
"positive_micro_F1": positive_micro_F1,
|
||||
"positive_macro_F1": local_F1s / valid_F1_count,
|
||||
"positive_w0dt_macro_F1": local_F1s / valid_F1_count_w0dt,
|
||||
"IL_recall": IL_rec,
|
||||
"IL_precision": IL_prec,
|
||||
"IL_F1": IL_F1,
|
||||
"IL_FPR": IL_FPR,
|
||||
"IL_MCC": IL_MCC,
|
||||
"IL_perfect_pos": IL_perfect_pos,
|
||||
"IL_perfect_neg": IL_perfect_neg,
|
||||
"J": total_J,
|
||||
"F": total_F,
|
||||
"J&F": total_JnF,
|
||||
}
|
||||
self.eval["CGF1"] = self.eval["positive_macro_F1"] * self.eval["IL_MCC"]
|
||||
self.eval["CGF1_w0dt"] = (
|
||||
self.eval["positive_w0dt_macro_F1"] * self.eval["IL_MCC"]
|
||||
)
|
||||
self.eval["CGF1_micro"] = self.eval["positive_micro_F1"] * self.eval["IL_MCC"]
|
||||
|
||||
def summarize(self):
|
||||
"""
|
||||
Compute and display summary metrics for evaluation results.
|
||||
Note this functin can *only* be applied on the default parameter setting
|
||||
"""
|
||||
if not self.eval:
|
||||
raise Exception("Please run accumulate() first")
|
||||
|
||||
def _summarize(iouThr=None, metric=""):
|
||||
p = self.params
|
||||
iStr = " {:<18} @[ IoU={:<9}] = {:0.3f}"
|
||||
titleStr = "Average " + metric
|
||||
iouStr = (
|
||||
"{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
|
||||
if iouThr is None
|
||||
else "{:0.2f}".format(iouThr)
|
||||
)
|
||||
|
||||
s = self.eval[metric]
|
||||
# IoU
|
||||
if iouThr is not None:
|
||||
t = np.where(iouThr == p.iouThrs)[0]
|
||||
s = s[t]
|
||||
|
||||
if len(s[s > -1]) == 0:
|
||||
mean_s = -1
|
||||
else:
|
||||
mean_s = np.mean(s[s > -1])
|
||||
print(iStr.format(titleStr, iouStr, mean_s))
|
||||
return mean_s
|
||||
|
||||
def _summarize_single(metric=""):
|
||||
titleStr = "Average " + metric
|
||||
iStr = " {:<35} = {:0.3f}"
|
||||
s = self.eval[metric]
|
||||
print(iStr.format(titleStr, s))
|
||||
return s
|
||||
|
||||
def _summarizeDets():
|
||||
# note: the index of these metrics are also used in video Demo F1 evaluation
|
||||
# when adding new metrics, please update the index in video Demo F1 evaluation
|
||||
# in "evaluate" method of the "VideoDemoF1Evaluator" class
|
||||
stats = np.zeros((len(DEMO_METRICS),))
|
||||
stats[0] = _summarize(metric="CGF1")
|
||||
stats[1] = _summarize(metric="precision")
|
||||
stats[2] = _summarize(metric="recall")
|
||||
stats[3] = _summarize(metric="F1")
|
||||
stats[4] = _summarize(metric="positive_macro_F1")
|
||||
stats[5] = _summarize_single(metric="IL_precision")
|
||||
stats[6] = _summarize_single(metric="IL_recall")
|
||||
stats[7] = _summarize_single(metric="IL_F1")
|
||||
stats[8] = _summarize_single(metric="IL_FPR")
|
||||
stats[9] = _summarize_single(metric="IL_MCC")
|
||||
stats[10] = _summarize(metric="IL_perfect_pos")
|
||||
stats[11] = _summarize(metric="IL_perfect_neg")
|
||||
stats[12] = _summarize(iouThr=0.5, metric="CGF1")
|
||||
stats[13] = _summarize(iouThr=0.5, metric="precision")
|
||||
stats[14] = _summarize(iouThr=0.5, metric="recall")
|
||||
stats[15] = _summarize(iouThr=0.5, metric="F1")
|
||||
stats[16] = _summarize(iouThr=0.5, metric="positive_macro_F1")
|
||||
stats[17] = _summarize(iouThr=0.5, metric="IL_perfect_pos")
|
||||
stats[18] = _summarize(iouThr=0.5, metric="IL_perfect_neg")
|
||||
stats[19] = _summarize(iouThr=0.75, metric="CGF1")
|
||||
stats[20] = _summarize(iouThr=0.75, metric="precision")
|
||||
stats[21] = _summarize(iouThr=0.75, metric="recall")
|
||||
stats[22] = _summarize(iouThr=0.75, metric="F1")
|
||||
stats[23] = _summarize(iouThr=0.75, metric="positive_macro_F1")
|
||||
stats[24] = _summarize(iouThr=0.75, metric="IL_perfect_pos")
|
||||
stats[25] = _summarize(iouThr=0.75, metric="IL_perfect_neg")
|
||||
stats[26] = _summarize_single(metric="J")
|
||||
stats[27] = _summarize_single(metric="F")
|
||||
stats[28] = _summarize_single(metric="J&F")
|
||||
stats[29] = _summarize(metric="CGF1_micro")
|
||||
stats[30] = _summarize(metric="positive_micro_precision")
|
||||
stats[31] = _summarize(metric="positive_micro_F1")
|
||||
stats[32] = _summarize(iouThr=0.5, metric="CGF1_micro")
|
||||
stats[33] = _summarize(iouThr=0.5, metric="positive_micro_precision")
|
||||
stats[34] = _summarize(iouThr=0.5, metric="positive_micro_F1")
|
||||
stats[35] = _summarize(iouThr=0.75, metric="CGF1_micro")
|
||||
stats[36] = _summarize(iouThr=0.75, metric="positive_micro_precision")
|
||||
stats[37] = _summarize(iouThr=0.75, metric="positive_micro_F1")
|
||||
stats[38] = _summarize(metric="CGF1_w0dt")
|
||||
stats[39] = _summarize(metric="positive_w0dt_macro_F1")
|
||||
stats[40] = _summarize(iouThr=0.5, metric="CGF1_w0dt")
|
||||
stats[41] = _summarize(iouThr=0.5, metric="positive_w0dt_macro_F1")
|
||||
stats[42] = _summarize(iouThr=0.75, metric="CGF1_w0dt")
|
||||
stats[43] = _summarize(iouThr=0.75, metric="positive_w0dt_macro_F1")
|
||||
return stats
|
||||
|
||||
summarize = _summarizeDets
|
||||
self.stats = summarize()
|
||||
|
||||
|
||||
DEMO_METRICS = [
|
||||
"CGF1",
|
||||
"Precision",
|
||||
"Recall",
|
||||
"F1",
|
||||
"Macro_F1",
|
||||
"IL_Precision",
|
||||
"IL_Recall",
|
||||
"IL_F1",
|
||||
"IL_FPR",
|
||||
"IL_MCC",
|
||||
"IL_perfect_pos",
|
||||
"IL_perfect_neg",
|
||||
"CGF1@0.5",
|
||||
"Precision@0.5",
|
||||
"Recall@0.5",
|
||||
"F1@0.5",
|
||||
"Macro_F1@0.5",
|
||||
"IL_perfect_pos@0.5",
|
||||
"IL_perfect_neg@0.5",
|
||||
"CGF1@0.75",
|
||||
"Precision@0.75",
|
||||
"Recall@0.75",
|
||||
"F1@0.75",
|
||||
"Macro_F1@0.75",
|
||||
"IL_perfect_pos@0.75",
|
||||
"IL_perfect_neg@0.75",
|
||||
"J",
|
||||
"F",
|
||||
"J&F",
|
||||
"CGF1_micro",
|
||||
"positive_micro_Precision",
|
||||
"positive_micro_F1",
|
||||
"CGF1_micro@0.5",
|
||||
"positive_micro_Precision@0.5",
|
||||
"positive_micro_F1@0.5",
|
||||
"CGF1_micro@0.75",
|
||||
"positive_micro_Precision@0.75",
|
||||
"positive_micro_F1@0.75",
|
||||
"CGF1_w0dt",
|
||||
"positive_w0dt_macro_F1",
|
||||
"CGF1_w0dt@0.5",
|
||||
"positive_w0dt_macro_F1@0.5",
|
||||
"CGF1_w0dt@0.75",
|
||||
"positive_w0dt_macro_F1@0.75",
|
||||
]
|
||||
|
||||
|
||||
class DemoEvaluator(CocoEvaluator):
|
||||
def __init__(
|
||||
self,
|
||||
coco_gt,
|
||||
iou_types,
|
||||
dump_dir: Optional[str],
|
||||
postprocessor,
|
||||
threshold=0.5,
|
||||
average_by_rarity=False,
|
||||
gather_pred_via_filesys=False,
|
||||
exhaustive_only=False,
|
||||
all_exhaustive_only=True,
|
||||
compute_JnF=False,
|
||||
metrics_dump_dir: Optional[str] = None,
|
||||
):
|
||||
self.iou_types = iou_types
|
||||
self.threshold = threshold
|
||||
super().__init__(
|
||||
coco_gt=coco_gt,
|
||||
iou_types=iou_types,
|
||||
useCats=False,
|
||||
dump_dir=dump_dir,
|
||||
postprocessor=postprocessor,
|
||||
# average_by_rarity=average_by_rarity,
|
||||
gather_pred_via_filesys=gather_pred_via_filesys,
|
||||
exhaustive_only=exhaustive_only,
|
||||
all_exhaustive_only=all_exhaustive_only,
|
||||
metrics_dump_dir=metrics_dump_dir,
|
||||
)
|
||||
|
||||
self.use_self_evaluate = True
|
||||
self.compute_JnF = compute_JnF
|
||||
|
||||
def _lazy_init(self):
|
||||
if self.initialized:
|
||||
return
|
||||
super()._lazy_init()
|
||||
self.use_self_evaluate = True
|
||||
self.reset()
|
||||
|
||||
def select_best_scoring(self, scorings):
|
||||
# This function is used for "oracle" type evaluation.
|
||||
# It accepts the evaluation results with respect to several ground truths, and picks the best
|
||||
if len(scorings) == 1:
|
||||
return scorings[0]
|
||||
|
||||
assert (
|
||||
scorings[0].ndim == 3
|
||||
), f"Expecting results in [numCats, numAreas, numImgs] format, got {scorings[0].shape}"
|
||||
assert (
|
||||
scorings[0].shape[0] == 1
|
||||
), f"Expecting a single category, got {scorings[0].shape[0]}"
|
||||
|
||||
for scoring in scorings:
|
||||
assert (
|
||||
scoring.shape == scorings[0].shape
|
||||
), f"Shape mismatch: {scoring.shape}, {scorings[0].shape}"
|
||||
|
||||
selected_imgs = []
|
||||
for img_id in range(scorings[0].shape[-1]):
|
||||
best = scorings[0][:, :, img_id]
|
||||
|
||||
for scoring in scorings[1:]:
|
||||
current = scoring[:, :, img_id]
|
||||
if "local_F1s" in best[0, 0] and "local_F1s" in current[0, 0]:
|
||||
# we were able to compute a F1 score for this particular image in both evaluations
|
||||
# best["local_F1s"] contains the results at various IoU thresholds. We simply take the average for comparision
|
||||
best_score = best[0, 0]["local_F1s"].mean()
|
||||
current_score = current[0, 0]["local_F1s"].mean()
|
||||
if current_score > best_score:
|
||||
best = current
|
||||
|
||||
else:
|
||||
# If we're here, it means that in that in some evaluation we were not able to get a valid local F1
|
||||
# This happens when both the predictions and targets are empty. In that case, we can assume it's a perfect prediction
|
||||
if "local_F1s" not in current[0, 0]:
|
||||
best = current
|
||||
selected_imgs.append(best)
|
||||
result = np.stack(selected_imgs, axis=-1)
|
||||
assert result.shape == scorings[0].shape
|
||||
return result
|
||||
|
||||
def summarize(self):
|
||||
self._lazy_init()
|
||||
logging.info("Demo evaluator: Summarizing")
|
||||
if not is_main_process():
|
||||
return {}
|
||||
outs = {}
|
||||
prefix = "oracle_" if len(self.coco_evals) > 1 else ""
|
||||
# if self.rarity_buckets is None:
|
||||
self.accumulate(self.eval_img_ids)
|
||||
for iou_type, coco_eval in self.coco_evals[0].items():
|
||||
print("Demo metric, IoU type={}".format(iou_type))
|
||||
coco_eval.summarize()
|
||||
|
||||
if "bbox" in self.coco_evals[0]:
|
||||
for i, value in enumerate(self.coco_evals[0]["bbox"].stats):
|
||||
outs[f"coco_eval_bbox_{prefix}{DEMO_METRICS[i]}"] = value
|
||||
if "segm" in self.coco_evals[0]:
|
||||
for i, value in enumerate(self.coco_evals[0]["segm"].stats):
|
||||
outs[f"coco_eval_masks_{prefix}{DEMO_METRICS[i]}"] = value
|
||||
# else:
|
||||
# total_stats = {}
|
||||
# for bucket, img_list in self.rarity_buckets.items():
|
||||
# self.accumulate(imgIds=img_list)
|
||||
# bucket_name = RARITY_BUCKETS[bucket]
|
||||
# for iou_type, coco_eval in self.coco_evals[0].items():
|
||||
# print(
|
||||
# "Demo metric, IoU type={}, Rarity bucket={}".format(
|
||||
# iou_type, bucket_name
|
||||
# )
|
||||
# )
|
||||
# coco_eval.summarize()
|
||||
|
||||
# if "bbox" in self.coco_evals[0]:
|
||||
# if "bbox" not in total_stats:
|
||||
# total_stats["bbox"] = np.zeros_like(
|
||||
# self.coco_evals[0]["bbox"].stats
|
||||
# )
|
||||
# total_stats["bbox"] += self.coco_evals[0]["bbox"].stats
|
||||
# for i, value in enumerate(self.coco_evals[0]["bbox"].stats):
|
||||
# outs[
|
||||
# f"coco_eval_bbox_{bucket_name}_{prefix}{DEMO_METRICS[i]}"
|
||||
# ] = value
|
||||
# if "segm" in self.coco_evals[0]:
|
||||
# if "segm" not in total_stats:
|
||||
# total_stats["segm"] = np.zeros_like(
|
||||
# self.coco_evals[0]["segm"].stats
|
||||
# )
|
||||
# total_stats["segm"] += self.coco_evals[0]["segm"].stats
|
||||
# for i, value in enumerate(self.coco_evals[0]["segm"].stats):
|
||||
# outs[
|
||||
# f"coco_eval_masks_{bucket_name}_{prefix}{DEMO_METRICS[i]}"
|
||||
# ] = value
|
||||
|
||||
# if "bbox" in total_stats:
|
||||
# total_stats["bbox"] /= len(self.rarity_buckets)
|
||||
# for i, value in enumerate(total_stats["bbox"]):
|
||||
# outs[f"coco_eval_bbox_{prefix}{DEMO_METRICS[i]}"] = value
|
||||
# if "segm" in total_stats:
|
||||
# total_stats["segm"] /= len(self.rarity_buckets)
|
||||
# for i, value in enumerate(total_stats["segm"]):
|
||||
# outs[f"coco_eval_masks_{prefix}{DEMO_METRICS[i]}"] = value
|
||||
|
||||
return outs
|
||||
|
||||
def accumulate(self, imgIds=None):
|
||||
self._lazy_init()
|
||||
logging.info(
|
||||
f"demo evaluator: Accumulating on {len(imgIds) if imgIds is not None else 'all'} images"
|
||||
)
|
||||
if not is_main_process():
|
||||
return
|
||||
|
||||
if imgIds is not None:
|
||||
for coco_eval in self.coco_evals[0].values():
|
||||
coco_eval.params.imgIds = list(imgIds)
|
||||
|
||||
for coco_eval in self.coco_evals[0].values():
|
||||
coco_eval.accumulate()
|
||||
|
||||
def reset(self):
|
||||
self.coco_evals = [{} for _ in range(len(self.coco_gts))]
|
||||
for i, coco_gt in enumerate(self.coco_gts):
|
||||
for iou_type in self.iou_types:
|
||||
self.coco_evals[i][iou_type] = DemoEval(
|
||||
coco_gt=coco_gt,
|
||||
iouType=iou_type,
|
||||
threshold=self.threshold,
|
||||
compute_JnF=self.compute_JnF,
|
||||
)
|
||||
self.coco_evals[i][iou_type].useCats = False
|
||||
self.img_ids = []
|
||||
self.eval_imgs = {k: [] for k in self.iou_types}
|
||||
if self.dump is not None:
|
||||
self.dump = []
|
||||
Reference in New Issue
Block a user