Files
sam3_local/scripts/eval/silver/download_fathomnet.py
generatedunixname89002005307016 7b89b8fc3f Add missing Pyre mode headers] [batch:11/N] [shard:17/N]
Differential Revision: D90237984

fbshipit-source-id: 526fd760f303bf31be4f743bdcd77760496de0de
2026-01-07 05:16:41 -08:00

65 lines
2.0 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
# pyre-unsafe
import argparse
import json
import os
from multiprocessing import Pool
from pathlib import Path
import requests
from fathomnet.api import images
from tqdm import tqdm
def download_imgs(args, image_uuids):
flag = 0
for uuid in tqdm(image_uuids, desc="Downloading images"):
image = images.find_by_uuid(uuid)
file_name = (
Path(args.processed_images_folder)
/ f"{image.uuid}.{image.url.split('.')[-1]}"
)
if not file_name.exists():
try:
resp = requests.get(image.url, stream=True)
resp.raise_for_status()
with open(file_name, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024):
f.write(chunk)
flag += 1
except requests.exceptions.RequestException as e:
print(f"Error downloading {image.url}: {e}")
print(f"Downloaded {flag} new images to {args.processed_images_folder}")
def main():
parser = argparse.ArgumentParser(description="Download images from FathomNet")
parser.add_argument("--processed_images_folder", help="Path to downloaded images")
parser.add_argument(
"--image-uuids",
default="fathomnet_image_uuids.json",
help="Path to JSON file containing image uuids to download",
)
parser.add_argument(
"--num-procs", type=int, default=16, help="Number of parallel processes"
)
args = parser.parse_args()
with open(args.image_uuids, "r") as f:
all_uuids = json.load(f)
Path(args.processed_images_folder).mkdir(parents=True, exist_ok=True)
chunk_size = len(all_uuids) // args.num_procs
chunks = [
all_uuids[i : i + chunk_size] for i in range(0, len(all_uuids), chunk_size)
]
with Pool(processes=args.num_procs) as pool:
pool.starmap(download_imgs, [(args, chunk) for chunk in chunks])
if __name__ == "__main__":
main()