Files
sam3_local/scripts/eval/veval/saco_yt1b_annot_update.py
generatedunixname89002005307016 7b89b8fc3f Add missing Pyre mode headers] [batch:11/N] [shard:17/N]
Differential Revision: D90237984

fbshipit-source-id: 526fd760f303bf31be4f743bdcd77760496de0de
2026-01-07 05:16:41 -08:00

139 lines
4.3 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
# pyre-unsafe
import argparse
import json
import logging
import os
import pandas as pd
logger = logging.getLogger(__name__)
def get_available_saco_yt1b_ids(yt1b_meida_dir, data):
vdf = pd.DataFrame(data["videos"])
expected_saco_yt1b_ids = vdf.video_name.tolist()
yt1b_media_folders = os.listdir(yt1b_meida_dir)
available_saco_yt1b_ids = []
for yt1b_media_folder in yt1b_media_folders:
if yt1b_media_folder not in expected_saco_yt1b_ids:
continue
jpeg_folder_dir = os.path.join(yt1b_meida_dir, yt1b_media_folder)
jpeg_count = len(os.listdir(jpeg_folder_dir))
if jpeg_count > 0:
available_saco_yt1b_ids.append(yt1b_media_folder)
else:
logger.info(
f"No JPEG images found for {yt1b_media_folder}. The annotation related to this video will be removed."
)
logger.info(
f"Expected {len(expected_saco_yt1b_ids)} videos for {data['info']}. Found {len(available_saco_yt1b_ids)} videos available in {yt1b_meida_dir}."
)
return available_saco_yt1b_ids
def update_yt1b_annot_per_field(data, field, id_col, available_ids):
field_data = data[field]
new_field_data = []
for data_entry in field_data:
if data_entry[id_col] not in available_ids:
logger.info(
f"{field}: Removing {data_entry} due to the video being unavailable."
)
continue
new_field_data.append(data_entry)
data[field] = new_field_data
logger.info(
f"Updated {field} by {id_col} - Before: {len(field_data)}, After: {len(new_field_data)}, Removed: {len(field_data) - len(new_field_data)}"
)
return data
def update_yt1b_annot(yt1b_input_annot_path, yt1b_media_dir, yt1b_output_annot_path):
with open(yt1b_input_annot_path, "r") as f:
data = json.load(f)
available_saco_yt1b_ids = get_available_saco_yt1b_ids(yt1b_media_dir, data)
data = update_yt1b_annot_per_field(
data=data,
field="videos",
id_col="video_name",
available_ids=available_saco_yt1b_ids,
)
videos_data = data["videos"]
available_video_incremental_ids = [data_entry["id"] for data_entry in videos_data]
data = update_yt1b_annot_per_field(
data=data,
field="annotations",
id_col="video_id",
available_ids=available_video_incremental_ids,
)
data = update_yt1b_annot_per_field(
data=data,
field="video_np_pairs",
id_col="video_id",
available_ids=available_video_incremental_ids,
)
with open(yt1b_output_annot_path, "w") as f:
json.dump(data, f)
return data
def main():
parser = argparse.ArgumentParser(description="Run video grounding evaluators")
parser.add_argument(
"--yt1b_media_dir",
type=str,
help="Path to the directory where the yt1b media is stored e.g media/saco_yt1b/JPEGImages_6fps",
)
parser.add_argument(
"--yt1b_input_annot_path",
type=str,
help="Path to the saco_veval_yt1b input annotation file e.g annotation/saco_veval_yt1b_test.json or annotation/saco_veval_yt1b_val.json",
)
parser.add_argument(
"--yt1b_output_annot_path",
type=str,
help="Path to the output annotation file e.g annotation/saco_veval_yt1b_test_updated.json or annotation/saco_veval_yt1b_val_updated.json",
)
parser.add_argument(
"--yt1b_annot_update_log_path",
type=str,
help="Path to the yt1b annot update log file e.g annotation/yt1b_annot_update_log.log",
)
args = parser.parse_args()
os.makedirs(os.path.dirname(args.yt1b_annot_update_log_path), exist_ok=True)
os.makedirs(os.path.dirname(args.yt1b_output_annot_path), exist_ok=True)
logging.basicConfig(
filename=args.yt1b_annot_update_log_path,
format="%(asctime)s [%(threadName)s] %(levelname)s: %(message)s",
level=logging.INFO,
filemode="w",
)
_ = update_yt1b_annot(
yt1b_input_annot_path=args.yt1b_input_annot_path,
yt1b_media_dir=args.yt1b_media_dir,
yt1b_output_annot_path=args.yt1b_output_annot_path,
)
print("Done!! Check the log at", args.yt1b_annot_update_log_path)
if __name__ == "__main__":
main()