Initial commit
fbshipit-source-id: da6be2f26e3a1202f4bffde8cb980e2dcb851294
This commit is contained in:
146
sam3/eval/ytvis_coco_wrapper.py
Normal file
146
sam3/eval/ytvis_coco_wrapper.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
|
||||
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
|
||||
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import pycocotools.mask as mask_util
|
||||
from pycocotools.coco import COCO
|
||||
from typing_extensions import override
|
||||
|
||||
|
||||
class YTVIS(COCO):
|
||||
"""
|
||||
Helper class for reading YT-VIS annotations
|
||||
"""
|
||||
|
||||
@override
|
||||
def __init__(self, annotation_file: str = None, ignore_gt_cats: bool = True):
|
||||
"""
|
||||
Args:
|
||||
annotation_file: Path to the annotation file
|
||||
ignore_gt_cats: If True, we ignore the ground truth categories and replace them with a dummy "object" category. This is useful for Phrase AP evaluation.
|
||||
"""
|
||||
self.ignore_gt_cats = ignore_gt_cats
|
||||
super().__init__(annotation_file=annotation_file)
|
||||
|
||||
@override
|
||||
def createIndex(self):
|
||||
# We rename some keys to match the COCO format before creating the index.
|
||||
if "annotations" in self.dataset:
|
||||
for ann in self.dataset["annotations"]:
|
||||
if "video_id" in ann:
|
||||
ann["image_id"] = int(ann.pop("video_id"))
|
||||
if self.ignore_gt_cats:
|
||||
ann["category_id"] = -1
|
||||
else:
|
||||
ann["category_id"] = int(ann["category_id"])
|
||||
if "bboxes" in ann:
|
||||
# note that in some datasets we load under this YTVIS class,
|
||||
# some "bboxes" could be None for when the GT object is invisible,
|
||||
# so we replace them with [0, 0, 0, 0]
|
||||
ann["bboxes"] = [
|
||||
bbox if bbox is not None else [0, 0, 0, 0]
|
||||
for bbox in ann["bboxes"]
|
||||
]
|
||||
if "areas" in ann:
|
||||
# similar to "bboxes", some areas could be None for when the GT
|
||||
# object is invisible, so we replace them with 0
|
||||
areas = [a if a is not None else 0 for a in ann["areas"]]
|
||||
# Compute average area of tracklet
|
||||
ann["area"] = np.mean(areas)
|
||||
if "videos" in self.dataset:
|
||||
for vid in self.dataset["videos"]:
|
||||
vid["id"] = int(vid["id"])
|
||||
self.dataset["images"] = self.dataset.pop("videos")
|
||||
|
||||
if self.ignore_gt_cats:
|
||||
self.dataset["categories"] = [
|
||||
{"supercategory": "object", "id": -1, "name": "object"}
|
||||
]
|
||||
else:
|
||||
for cat in self.dataset["categories"]:
|
||||
cat["id"] = int(cat["id"])
|
||||
super().createIndex()
|
||||
|
||||
@override
|
||||
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
|
||||
if len(areaRng) > 0:
|
||||
logging.warning(
|
||||
"Note that we filter out objects based on their *average* area across the video, not per frame area"
|
||||
)
|
||||
|
||||
return super().getAnnIds(imgIds=imgIds, catIds=catIds, iscrowd=iscrowd)
|
||||
|
||||
@override
|
||||
def showAnns(self, anns, draw_bbox=False):
|
||||
raise NotImplementedError("Showing annotations is not supported")
|
||||
|
||||
@override
|
||||
def loadRes(self, resFile):
|
||||
# Adapted from COCO.loadRes to support tracklets/masklets
|
||||
res = YTVIS(ignore_gt_cats=self.ignore_gt_cats)
|
||||
res.dataset["images"] = [img for img in self.dataset["images"]]
|
||||
|
||||
if type(resFile) == str:
|
||||
with open(resFile) as f:
|
||||
anns = json.load(f)
|
||||
elif type(resFile) == np.ndarray:
|
||||
anns = self.loadNumpyAnnotations(resFile)
|
||||
else:
|
||||
anns = resFile
|
||||
assert type(anns) == list, "results is not an array of objects"
|
||||
annsImgIds = [ann["image_id"] for ann in anns]
|
||||
assert set(annsImgIds) == (
|
||||
set(annsImgIds) & set(self.getImgIds())
|
||||
), "Results do not correspond to current coco set"
|
||||
if "bboxes" in anns[0] and not anns[0]["bboxes"] == []:
|
||||
res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
|
||||
for id, ann in enumerate(anns):
|
||||
bbs = [(bb if bb is not None else [0, 0, 0, 0]) for bb in ann["bboxes"]]
|
||||
xxyy = [[bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] for bb in bbs]
|
||||
if not "segmentations" in ann:
|
||||
ann["segmentations"] = [
|
||||
[[x1, y1, x1, y2, x2, y2, x2, y1]] for (x1, x2, y1, y2) in xxyy
|
||||
]
|
||||
ann["areas"] = [bb[2] * bb[3] for bb in bbs]
|
||||
# NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
|
||||
ann["area"] = np.mean(ann["areas"])
|
||||
ann["id"] = id + 1
|
||||
ann["iscrowd"] = 0
|
||||
elif "segmentations" in anns[0]:
|
||||
res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
|
||||
for id, ann in enumerate(anns):
|
||||
ann["bboxes"] = [
|
||||
mask_util.toBbox(segm) for segm in ann["segmentations"]
|
||||
]
|
||||
if "areas" not in ann:
|
||||
ann["areas"] = [
|
||||
mask_util.area(segm) for segm in ann["segmentations"]
|
||||
]
|
||||
# NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
|
||||
ann["area"] = np.mean(ann["areas"])
|
||||
ann["id"] = id + 1
|
||||
ann["iscrowd"] = 0
|
||||
|
||||
res.dataset["annotations"] = anns
|
||||
res.createIndex()
|
||||
return res
|
||||
|
||||
@override
|
||||
def download(self, tarDir=None, imgIds=[]):
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
def loadNumpyAnnotations(self, data):
|
||||
raise NotImplementedError("We don't support numpy annotations for now")
|
||||
|
||||
@override
|
||||
def annToRLE(self, ann):
|
||||
raise NotImplementedError("We expect masks to be already in RLE format")
|
||||
|
||||
@override
|
||||
def annToMask(self, ann):
|
||||
raise NotImplementedError("We expect masks to be already in RLE format")
|
||||
Reference in New Issue
Block a user