Initial commit

fbshipit-source-id: da6be2f26e3a1202f4bffde8cb980e2dcb851294
2025-11-18 23:07:42 -08:00
commit a13e358df4
504 changed files with 122758 additions and 0 deletions
--- a/sam3/eval/demo_eval.py
+++ b/sam3/eval/demo_eval.py
@@ -0,0 +1,658 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+This evaluator is based upon COCO evaluation, but evaluates the model in a "demo" setting.
+This means that the model's predictions are thresholded and evaluated as "hard" predictions.
+"""
+
+import logging
+from typing import Optional
+
+import numpy as np
+import pycocotools.mask as maskUtils
+from pycocotools.cocoeval import COCOeval
+
+from sam3.eval.coco_eval import CocoEvaluator
+from sam3.train.masks_ops import compute_F_measure
+from sam3.train.utils.distributed import is_main_process
+
+from scipy.optimize import linear_sum_assignment
+
+
+class DemoEval(COCOeval):
+    """
+    This evaluator is based upon COCO evaluation, but evaluates the model in a "demo" setting.
+    This means that the model's predictions are thresholded and evaluated as "hard" predictions.
+    """
+
+    def __init__(
+        self,
+        coco_gt=None,
+        coco_dt=None,
+        iouType="bbox",
+        threshold=0.5,
+        compute_JnF=False,
+    ):
+        """
+        Args:
+            coco_gt (COCO): ground truth COCO API
+            coco_dt (COCO): detections COCO API
+            iou_type (str): type of IoU to evaluate
+            threshold (float): threshold for predictions
+        """
+        super().__init__(coco_gt, coco_dt, iouType)
+        self.threshold = threshold
+
+        self.params.useCats = False
+        self.params.areaRng = [[0**2, 1e5**2]]
+        self.params.areaRngLbl = ["all"]
+        self.params.maxDets = [100000]
+        self.compute_JnF = compute_JnF
+
+    def computeIoU(self, imgId, catId):
+        # Same as the original COCOeval.computeIoU, but without sorting
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+
+        if p.iouType == "segm":
+            g = [g["segmentation"] for g in gt]
+            d = [d["segmentation"] for d in dt]
+        elif p.iouType == "bbox":
+            g = [g["bbox"] for g in gt]
+            d = [d["bbox"] for d in dt]
+        else:
+            raise Exception("unknown iouType for iou computation")
+
+        # compute iou between each dt and gt region
+        iscrowd = [int(o["iscrowd"]) for o in gt]
+        ious = maskUtils.iou(d, g, iscrowd)
+        return ious
+
+    def evaluateImg(self, imgId, catId, aRng, maxDet):
+        """
+        perform evaluation for single category and image
+        :return: dict (single image results)
+        """
+        p = self.params
+        assert not p.useCats, "This evaluator does not support per-category evaluation."
+        assert catId == -1
+        all_gts = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+        keep_gt = np.array([not g["ignore"] for g in all_gts], dtype=bool)
+        gt = [g for g in all_gts if not g["ignore"]]
+        all_dts = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        keep_dt = np.array([d["score"] >= self.threshold for d in all_dts], dtype=bool)
+        dt = [d for d in all_dts if d["score"] >= self.threshold]
+        if len(gt) == 0 and len(dt) == 0:
+            # This is a "true negative" case, where there are no GTs and no predictions
+            # The box-level metrics are ill-defined, so we don't add them to this dict
+            return {
+                "image_id": imgId,
+                "IL_TP": 0,
+                "IL_TN": 1,
+                "IL_FP": 0,
+                "IL_FN": 0,
+                "IL_perfect_neg": np.ones((len(p.iouThrs),), dtype=np.int64),
+                "num_dt": len(dt),
+            }
+
+        if len(gt) > 0 and len(dt) == 0:
+            # This is a "false negative" case, where there are GTs but no predictions
+            return {
+                "image_id": imgId,
+                "IL_TP": 0,
+                "IL_TN": 0,
+                "IL_FP": 0,
+                "IL_FN": 1,
+                "TPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "FPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "FNs": np.ones((len(p.iouThrs),), dtype=np.int64) * len(gt),
+                "local_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "local_positive_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "IL_perfect_pos": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "num_dt": len(dt),
+            }
+
+        # Load pre-computed ious
+        ious = self.ious[(imgId, catId)]
+
+        # compute matching
+        if len(ious) == 0:
+            ious = np.zeros((len(dt), len(gt)))
+        else:
+            ious = ious[keep_dt, :][:, keep_gt]
+        assert ious.shape == (len(dt), len(gt))
+
+        matched_dt, matched_gt = linear_sum_assignment(-ious)
+
+        match_scores = ious[matched_dt, matched_gt]
+
+        if self.compute_JnF and len(match_scores) > 0:
+            j_score = match_scores.mean()
+            f_measure = 0
+            for dt_id, gt_id in zip(matched_dt, matched_gt):
+                f_measure += compute_F_measure(
+                    gt_boundary_rle=gt[gt_id]["boundary"],
+                    gt_dilated_boundary_rle=gt[gt_id]["dilated_boundary"],
+                    dt_boundary_rle=dt[dt_id]["boundary"],
+                    dt_dilated_boundary_rle=dt[dt_id]["dilated_boundary"],
+                )
+            f_measure /= len(match_scores) + 1e-9
+            JnF = (j_score + f_measure) * 0.5
+        else:
+            j_score = f_measure = JnF = -1
+
+        TPs, FPs, FNs = [], [], []
+        IL_perfect = []
+        for thresh in p.iouThrs:
+            TP = (match_scores >= thresh).sum()
+            FP = len(dt) - TP
+            FN = len(gt) - TP
+            assert (
+                FP >= 0 and FN >= 0
+            ), f"FP: {FP}, FN: {FN}, TP: {TP}, match_scores: {match_scores}, len(dt): {len(dt)}, len(gt): {len(gt)}, ious: {ious}"
+            TPs.append(TP)
+            FPs.append(FP)
+            FNs.append(FN)
+
+            if FP == FN and FP == 0:
+                IL_perfect.append(1)
+            else:
+                IL_perfect.append(0)
+
+        TPs = np.array(TPs, dtype=np.int64)
+        FPs = np.array(FPs, dtype=np.int64)
+        FNs = np.array(FNs, dtype=np.int64)
+        IL_perfect = np.array(IL_perfect, dtype=np.int64)
+
+        # compute precision recall and F1
+        precision = TPs / (TPs + FPs + 1e-4)
+        assert np.all(precision <= 1)
+        recall = TPs / (TPs + FNs + 1e-4)
+        assert np.all(recall <= 1)
+        F1 = 2 * precision * recall / (precision + recall + 1e-4)
+
+        result = {
+            "image_id": imgId,
+            "TPs": TPs,
+            "FPs": FPs,
+            "FNs": FNs,
+            "local_F1s": F1,
+            "IL_TP": (len(gt) > 0) and (len(dt) > 0),
+            "IL_FP": (len(gt) == 0) and (len(dt) > 0),
+            "IL_TN": (len(gt) == 0) and (len(dt) == 0),
+            "IL_FN": (len(gt) > 0) and (len(dt) == 0),
+            ("IL_perfect_pos" if len(gt) > 0 else "IL_perfect_neg"): IL_perfect,
+            "F": f_measure,
+            "J": j_score,
+            "J&F": JnF,
+            "num_dt": len(dt),
+        }
+        if len(gt) > 0 and len(dt) > 0:
+            result["local_positive_F1s"] = F1
+        return result
+
+    def accumulate(self, p=None):
+        """
+        Accumulate per image evaluation results and store the result in self.eval
+        :param p: input params for evaluation
+        :return: None
+        """
+        if not self.evalImgs:
+            print("Please run evaluate() first")
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+
+        setImgIds = set(p.imgIds)
+
+        # TPs, FPs, FNs
+        TPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        FPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        pmFPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        FNs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        local_F1s = np.zeros((len(p.iouThrs),), dtype=np.float64)
+
+        # Image level metrics
+        IL_TPs = 0
+        IL_FPs = 0
+        IL_TNs = 0
+        IL_FNs = 0
+        IL_perfects_neg = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        IL_perfects_pos = np.zeros((len(p.iouThrs),), dtype=np.int64)
+
+        # JnF metric
+        total_J = 0
+        total_F = 0
+        total_JnF = 0
+
+        valid_img_count = 0
+        total_pos_count = 0
+        total_neg_count = 0
+        valid_J_count = 0
+        valid_F1_count = 0
+        valid_F1_count_w0dt = 0
+        for res in self.evalImgs:
+            if res["image_id"] not in setImgIds:
+                continue
+            IL_TPs += res["IL_TP"]
+            IL_FPs += res["IL_FP"]
+            IL_TNs += res["IL_TN"]
+            IL_FNs += res["IL_FN"]
+            if "IL_perfect_neg" in res:
+                IL_perfects_neg += res["IL_perfect_neg"]
+                total_neg_count += 1
+            else:
+                assert "IL_perfect_pos" in res
+                IL_perfects_pos += res["IL_perfect_pos"]
+                total_pos_count += 1
+
+            if "TPs" not in res:
+                continue
+
+            TPs += res["TPs"]
+            FPs += res["FPs"]
+            FNs += res["FNs"]
+            valid_img_count += 1
+
+            if "local_positive_F1s" in res:
+                local_F1s += res["local_positive_F1s"]
+                pmFPs += res["FPs"]
+                valid_F1_count_w0dt += 1
+                if res["num_dt"] > 0:
+                    valid_F1_count += 1
+
+            if "J" in res and res["J"] > -1e-9:
+                total_J += res["J"]
+                total_F += res["F"]
+                total_JnF += res["J&F"]
+                valid_J_count += 1
+
+        # compute precision recall and F1
+        precision = TPs / (TPs + FPs + 1e-4)
+        positive_micro_precision = TPs / (TPs + pmFPs + 1e-4)
+        assert np.all(precision <= 1)
+        recall = TPs / (TPs + FNs + 1e-4)
+        assert np.all(recall <= 1)
+        F1 = 2 * precision * recall / (precision + recall + 1e-4)
+        positive_micro_F1 = (
+            2
+            * positive_micro_precision
+            * recall
+            / (positive_micro_precision + recall + 1e-4)
+        )
+
+        IL_rec = IL_TPs / (IL_TPs + IL_FNs + 1e-6)
+        IL_prec = IL_TPs / (IL_TPs + IL_FPs + 1e-6)
+        IL_F1 = 2 * IL_prec * IL_rec / (IL_prec + IL_rec + 1e-6)
+        IL_FPR = IL_FPs / (IL_FPs + IL_TNs + 1e-6)
+        IL_MCC = float(IL_TPs * IL_TNs - IL_FPs * IL_FNs) / (
+            (
+                float(IL_TPs + IL_FPs)
+                * float(IL_TPs + IL_FNs)
+                * float(IL_TNs + IL_FPs)
+                * float(IL_TNs + IL_FNs)
+            )
+            ** 0.5
+            + 1e-6
+        )
+        IL_perfect_pos = IL_perfects_pos / (total_pos_count + 1e-9)
+        IL_perfect_neg = IL_perfects_neg / (total_neg_count + 1e-9)
+
+        total_J = total_J / (valid_J_count + 1e-9)
+        total_F = total_F / (valid_J_count + 1e-9)
+        total_JnF = total_JnF / (valid_J_count + 1e-9)
+
+        self.eval = {
+            "params": p,
+            "TPs": TPs,
+            "FPs": FPs,
+            "positive_micro_FPs": pmFPs,
+            "FNs": FNs,
+            "precision": precision,
+            "positive_micro_precision": positive_micro_precision,
+            "recall": recall,
+            "F1": F1,
+            "positive_micro_F1": positive_micro_F1,
+            "positive_macro_F1": local_F1s / valid_F1_count,
+            "positive_w0dt_macro_F1": local_F1s / valid_F1_count_w0dt,
+            "IL_recall": IL_rec,
+            "IL_precision": IL_prec,
+            "IL_F1": IL_F1,
+            "IL_FPR": IL_FPR,
+            "IL_MCC": IL_MCC,
+            "IL_perfect_pos": IL_perfect_pos,
+            "IL_perfect_neg": IL_perfect_neg,
+            "J": total_J,
+            "F": total_F,
+            "J&F": total_JnF,
+        }
+        self.eval["CGF1"] = self.eval["positive_macro_F1"] * self.eval["IL_MCC"]
+        self.eval["CGF1_w0dt"] = (
+            self.eval["positive_w0dt_macro_F1"] * self.eval["IL_MCC"]
+        )
+        self.eval["CGF1_micro"] = self.eval["positive_micro_F1"] * self.eval["IL_MCC"]
+
+    def summarize(self):
+        """
+        Compute and display summary metrics for evaluation results.
+        Note this functin can *only* be applied on the default parameter setting
+        """
+        if not self.eval:
+            raise Exception("Please run accumulate() first")
+
+        def _summarize(iouThr=None, metric=""):
+            p = self.params
+            iStr = " {:<18} @[ IoU={:<9}] = {:0.3f}"
+            titleStr = "Average " + metric
+            iouStr = (
+                "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+                if iouThr is None
+                else "{:0.2f}".format(iouThr)
+            )
+
+            s = self.eval[metric]
+            # IoU
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+
+            if len(s[s > -1]) == 0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s > -1])
+            print(iStr.format(titleStr, iouStr, mean_s))
+            return mean_s
+
+        def _summarize_single(metric=""):
+            titleStr = "Average " + metric
+            iStr = " {:<35} = {:0.3f}"
+            s = self.eval[metric]
+            print(iStr.format(titleStr, s))
+            return s
+
+        def _summarizeDets():
+            # note: the index of these metrics are also used in video Demo F1 evaluation
+            # when adding new metrics, please update the index in video Demo F1 evaluation
+            # in "evaluate" method of the "VideoDemoF1Evaluator" class
+            stats = np.zeros((len(DEMO_METRICS),))
+            stats[0] = _summarize(metric="CGF1")
+            stats[1] = _summarize(metric="precision")
+            stats[2] = _summarize(metric="recall")
+            stats[3] = _summarize(metric="F1")
+            stats[4] = _summarize(metric="positive_macro_F1")
+            stats[5] = _summarize_single(metric="IL_precision")
+            stats[6] = _summarize_single(metric="IL_recall")
+            stats[7] = _summarize_single(metric="IL_F1")
+            stats[8] = _summarize_single(metric="IL_FPR")
+            stats[9] = _summarize_single(metric="IL_MCC")
+            stats[10] = _summarize(metric="IL_perfect_pos")
+            stats[11] = _summarize(metric="IL_perfect_neg")
+            stats[12] = _summarize(iouThr=0.5, metric="CGF1")
+            stats[13] = _summarize(iouThr=0.5, metric="precision")
+            stats[14] = _summarize(iouThr=0.5, metric="recall")
+            stats[15] = _summarize(iouThr=0.5, metric="F1")
+            stats[16] = _summarize(iouThr=0.5, metric="positive_macro_F1")
+            stats[17] = _summarize(iouThr=0.5, metric="IL_perfect_pos")
+            stats[18] = _summarize(iouThr=0.5, metric="IL_perfect_neg")
+            stats[19] = _summarize(iouThr=0.75, metric="CGF1")
+            stats[20] = _summarize(iouThr=0.75, metric="precision")
+            stats[21] = _summarize(iouThr=0.75, metric="recall")
+            stats[22] = _summarize(iouThr=0.75, metric="F1")
+            stats[23] = _summarize(iouThr=0.75, metric="positive_macro_F1")
+            stats[24] = _summarize(iouThr=0.75, metric="IL_perfect_pos")
+            stats[25] = _summarize(iouThr=0.75, metric="IL_perfect_neg")
+            stats[26] = _summarize_single(metric="J")
+            stats[27] = _summarize_single(metric="F")
+            stats[28] = _summarize_single(metric="J&F")
+            stats[29] = _summarize(metric="CGF1_micro")
+            stats[30] = _summarize(metric="positive_micro_precision")
+            stats[31] = _summarize(metric="positive_micro_F1")
+            stats[32] = _summarize(iouThr=0.5, metric="CGF1_micro")
+            stats[33] = _summarize(iouThr=0.5, metric="positive_micro_precision")
+            stats[34] = _summarize(iouThr=0.5, metric="positive_micro_F1")
+            stats[35] = _summarize(iouThr=0.75, metric="CGF1_micro")
+            stats[36] = _summarize(iouThr=0.75, metric="positive_micro_precision")
+            stats[37] = _summarize(iouThr=0.75, metric="positive_micro_F1")
+            stats[38] = _summarize(metric="CGF1_w0dt")
+            stats[39] = _summarize(metric="positive_w0dt_macro_F1")
+            stats[40] = _summarize(iouThr=0.5, metric="CGF1_w0dt")
+            stats[41] = _summarize(iouThr=0.5, metric="positive_w0dt_macro_F1")
+            stats[42] = _summarize(iouThr=0.75, metric="CGF1_w0dt")
+            stats[43] = _summarize(iouThr=0.75, metric="positive_w0dt_macro_F1")
+            return stats
+
+        summarize = _summarizeDets
+        self.stats = summarize()
+
+
+DEMO_METRICS = [
+    "CGF1",
+    "Precision",
+    "Recall",
+    "F1",
+    "Macro_F1",
+    "IL_Precision",
+    "IL_Recall",
+    "IL_F1",
+    "IL_FPR",
+    "IL_MCC",
+    "IL_perfect_pos",
+    "IL_perfect_neg",
+    "CGF1@0.5",
+    "Precision@0.5",
+    "Recall@0.5",
+    "F1@0.5",
+    "Macro_F1@0.5",
+    "IL_perfect_pos@0.5",
+    "IL_perfect_neg@0.5",
+    "CGF1@0.75",
+    "Precision@0.75",
+    "Recall@0.75",
+    "F1@0.75",
+    "Macro_F1@0.75",
+    "IL_perfect_pos@0.75",
+    "IL_perfect_neg@0.75",
+    "J",
+    "F",
+    "J&F",
+    "CGF1_micro",
+    "positive_micro_Precision",
+    "positive_micro_F1",
+    "CGF1_micro@0.5",
+    "positive_micro_Precision@0.5",
+    "positive_micro_F1@0.5",
+    "CGF1_micro@0.75",
+    "positive_micro_Precision@0.75",
+    "positive_micro_F1@0.75",
+    "CGF1_w0dt",
+    "positive_w0dt_macro_F1",
+    "CGF1_w0dt@0.5",
+    "positive_w0dt_macro_F1@0.5",
+    "CGF1_w0dt@0.75",
+    "positive_w0dt_macro_F1@0.75",
+]
+
+
+class DemoEvaluator(CocoEvaluator):
+    def __init__(
+        self,
+        coco_gt,
+        iou_types,
+        dump_dir: Optional[str],
+        postprocessor,
+        threshold=0.5,
+        average_by_rarity=False,
+        gather_pred_via_filesys=False,
+        exhaustive_only=False,
+        all_exhaustive_only=True,
+        compute_JnF=False,
+        metrics_dump_dir: Optional[str] = None,
+    ):
+        self.iou_types = iou_types
+        self.threshold = threshold
+        super().__init__(
+            coco_gt=coco_gt,
+            iou_types=iou_types,
+            useCats=False,
+            dump_dir=dump_dir,
+            postprocessor=postprocessor,
+            # average_by_rarity=average_by_rarity,
+            gather_pred_via_filesys=gather_pred_via_filesys,
+            exhaustive_only=exhaustive_only,
+            all_exhaustive_only=all_exhaustive_only,
+            metrics_dump_dir=metrics_dump_dir,
+        )
+
+        self.use_self_evaluate = True
+        self.compute_JnF = compute_JnF
+
+    def _lazy_init(self):
+        if self.initialized:
+            return
+        super()._lazy_init()
+        self.use_self_evaluate = True
+        self.reset()
+
+    def select_best_scoring(self, scorings):
+        # This function is used for "oracle" type evaluation.
+        # It accepts the evaluation results with respect to several ground truths, and picks the best
+        if len(scorings) == 1:
+            return scorings[0]
+
+        assert (
+            scorings[0].ndim == 3
+        ), f"Expecting results in [numCats, numAreas, numImgs] format, got {scorings[0].shape}"
+        assert (
+            scorings[0].shape[0] == 1
+        ), f"Expecting a single category, got {scorings[0].shape[0]}"
+
+        for scoring in scorings:
+            assert (
+                scoring.shape == scorings[0].shape
+            ), f"Shape mismatch: {scoring.shape}, {scorings[0].shape}"
+
+        selected_imgs = []
+        for img_id in range(scorings[0].shape[-1]):
+            best = scorings[0][:, :, img_id]
+
+            for scoring in scorings[1:]:
+                current = scoring[:, :, img_id]
+                if "local_F1s" in best[0, 0] and "local_F1s" in current[0, 0]:
+                    # we were able to compute a F1 score for this particular image in both evaluations
+                    # best["local_F1s"] contains the results at various IoU thresholds. We simply take the average for comparision
+                    best_score = best[0, 0]["local_F1s"].mean()
+                    current_score = current[0, 0]["local_F1s"].mean()
+                    if current_score > best_score:
+                        best = current
+
+                else:
+                    # If we're here, it means that in that in some evaluation we were not able to get a valid local F1
+                    # This happens when both the predictions and targets are empty. In that case, we can assume it's a perfect prediction
+                    if "local_F1s" not in current[0, 0]:
+                        best = current
+            selected_imgs.append(best)
+        result = np.stack(selected_imgs, axis=-1)
+        assert result.shape == scorings[0].shape
+        return result
+
+    def summarize(self):
+        self._lazy_init()
+        logging.info("Demo evaluator: Summarizing")
+        if not is_main_process():
+            return {}
+        outs = {}
+        prefix = "oracle_" if len(self.coco_evals) > 1 else ""
+        # if self.rarity_buckets is None:
+        self.accumulate(self.eval_img_ids)
+        for iou_type, coco_eval in self.coco_evals[0].items():
+            print("Demo metric, IoU type={}".format(iou_type))
+            coco_eval.summarize()
+
+        if "bbox" in self.coco_evals[0]:
+            for i, value in enumerate(self.coco_evals[0]["bbox"].stats):
+                outs[f"coco_eval_bbox_{prefix}{DEMO_METRICS[i]}"] = value
+        if "segm" in self.coco_evals[0]:
+            for i, value in enumerate(self.coco_evals[0]["segm"].stats):
+                outs[f"coco_eval_masks_{prefix}{DEMO_METRICS[i]}"] = value
+        # else:
+        #     total_stats = {}
+        #     for bucket, img_list in self.rarity_buckets.items():
+        #         self.accumulate(imgIds=img_list)
+        #         bucket_name = RARITY_BUCKETS[bucket]
+        #         for iou_type, coco_eval in self.coco_evals[0].items():
+        #             print(
+        #                 "Demo metric, IoU type={}, Rarity bucket={}".format(
+        #                     iou_type, bucket_name
+        #                 )
+        #             )
+        #             coco_eval.summarize()
+
+        #         if "bbox" in self.coco_evals[0]:
+        #             if "bbox" not in total_stats:
+        #                 total_stats["bbox"] = np.zeros_like(
+        #                     self.coco_evals[0]["bbox"].stats
+        #                 )
+        #             total_stats["bbox"] += self.coco_evals[0]["bbox"].stats
+        #             for i, value in enumerate(self.coco_evals[0]["bbox"].stats):
+        #                 outs[
+        #                     f"coco_eval_bbox_{bucket_name}_{prefix}{DEMO_METRICS[i]}"
+        #                 ] = value
+        #         if "segm" in self.coco_evals[0]:
+        #             if "segm" not in total_stats:
+        #                 total_stats["segm"] = np.zeros_like(
+        #                     self.coco_evals[0]["segm"].stats
+        #                 )
+        #             total_stats["segm"] += self.coco_evals[0]["segm"].stats
+        #             for i, value in enumerate(self.coco_evals[0]["segm"].stats):
+        #                 outs[
+        #                     f"coco_eval_masks_{bucket_name}_{prefix}{DEMO_METRICS[i]}"
+        #                 ] = value
+
+        #     if "bbox" in total_stats:
+        #         total_stats["bbox"] /= len(self.rarity_buckets)
+        #         for i, value in enumerate(total_stats["bbox"]):
+        #             outs[f"coco_eval_bbox_{prefix}{DEMO_METRICS[i]}"] = value
+        #     if "segm" in total_stats:
+        #         total_stats["segm"] /= len(self.rarity_buckets)
+        #         for i, value in enumerate(total_stats["segm"]):
+        #             outs[f"coco_eval_masks_{prefix}{DEMO_METRICS[i]}"] = value
+
+        return outs
+
+    def accumulate(self, imgIds=None):
+        self._lazy_init()
+        logging.info(
+            f"demo evaluator: Accumulating on {len(imgIds) if imgIds is not None else 'all'} images"
+        )
+        if not is_main_process():
+            return
+
+        if imgIds is not None:
+            for coco_eval in self.coco_evals[0].values():
+                coco_eval.params.imgIds = list(imgIds)
+
+        for coco_eval in self.coco_evals[0].values():
+            coco_eval.accumulate()
+
+    def reset(self):
+        self.coco_evals = [{} for _ in range(len(self.coco_gts))]
+        for i, coco_gt in enumerate(self.coco_gts):
+            for iou_type in self.iou_types:
+                self.coco_evals[i][iou_type] = DemoEval(
+                    coco_gt=coco_gt,
+                    iouType=iou_type,
+                    threshold=self.threshold,
+                    compute_JnF=self.compute_JnF,
+                )
+                self.coco_evals[i][iou_type].useCats = False
+        self.img_ids = []
+        self.eval_imgs = {k: [] for k in self.iou_types}
+        if self.dump is not None:
+            self.dump = []