Files
sam3/scripts/eval/silver/preprocess_silver_geode_bdd100k_food_rec.py
2026-02-12 13:17:11 +08:00

73 lines
2.7 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
# pyre-unsafe
import argparse
from multiprocessing import Pool
from pathlib import Path
import pandas as pd
import utils
from tqdm import tqdm
def main(args, n_workers=20):
raw_folder = Path(args.raw_images_folder)
processed_folder = Path(args.processed_images_folder)
utils.setup(processed_folder)
img_ids = utils.get_image_ids(args.annotation_file)
if args.dataset_name == "geode":
metadata = pd.read_csv(raw_folder / "index.csv")
metadata["flat_filepath"] = metadata.file_path.apply(
lambda x: x.replace("/", "_")
)
metadata["original_absolute_path"] = metadata.file_path.apply(
lambda x: str((raw_folder / "images") / x)
)
metadata["new_absolute_path"] = metadata.flat_filepath.apply(
lambda x: str(processed_folder / x)
)
metadata["filestem"] = metadata.new_absolute_path.apply(lambda x: Path(x).stem)
img_id_mapping = metadata.set_index("filestem").to_dict()
# print(img_id_mapping.keys())
paths = [
(
img_id_mapping["original_absolute_path"][each],
img_id_mapping["new_absolute_path"][each],
)
for each in img_ids
]
elif args.dataset_name == "bdd100k":
bdd_subfolder = "100k/train"
img_filenames = utils.get_filenames(args.annotation_file)
raw_folder_bdd_images = raw_folder / bdd_subfolder
paths = [
(raw_folder_bdd_images / each, processed_folder / each)
for each in img_filenames
]
elif args.dataset_name == "food_rec":
food_subfolder = "public_validation_set_2.0/images"
img_filenames = utils.get_filenames(args.annotation_file)
raw_folder_food_images = raw_folder / food_subfolder
paths = [
(
raw_folder_food_images
/ f"{Path(each).stem.split('_')[-1]}{Path(each).suffix}",
processed_folder / each,
)
for each in img_filenames
]
print("Preparing to copy and flatten filename for", len(paths), "images")
with Pool(20) as p:
for _ in tqdm(p.imap(utils.copy_file, paths), total=len(paths)):
continue
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--annotation_file", help="Path to annotation file")
parser.add_argument("--raw_images_folder", help="Path to downloaded images")
parser.add_argument("--processed_images_folder", help="Path to processed images")
parser.add_argument("--dataset_name", help="Path to processed images")
args = parser.parse_args()
main(args)