first commit
This commit is contained in:
64
scripts/eval/silver/download_fathomnet.py
Normal file
64
scripts/eval/silver/download_fathomnet.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
|
||||
|
||||
# pyre-unsafe
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from multiprocessing import Pool
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from fathomnet.api import images
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def download_imgs(args, image_uuids):
|
||||
flag = 0
|
||||
for uuid in tqdm(image_uuids, desc="Downloading images"):
|
||||
image = images.find_by_uuid(uuid)
|
||||
file_name = (
|
||||
Path(args.processed_images_folder)
|
||||
/ f"{image.uuid}.{image.url.split('.')[-1]}"
|
||||
)
|
||||
if not file_name.exists():
|
||||
try:
|
||||
resp = requests.get(image.url, stream=True)
|
||||
resp.raise_for_status()
|
||||
with open(file_name, "wb") as f:
|
||||
for chunk in resp.iter_content(chunk_size=1024):
|
||||
f.write(chunk)
|
||||
flag += 1
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error downloading {image.url}: {e}")
|
||||
print(f"Downloaded {flag} new images to {args.processed_images_folder}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download images from FathomNet")
|
||||
parser.add_argument("--processed_images_folder", help="Path to downloaded images")
|
||||
parser.add_argument(
|
||||
"--image-uuids",
|
||||
default="fathomnet_image_uuids.json",
|
||||
help="Path to JSON file containing image uuids to download",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-procs", type=int, default=16, help="Number of parallel processes"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.image_uuids, "r") as f:
|
||||
all_uuids = json.load(f)
|
||||
|
||||
Path(args.processed_images_folder).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
chunk_size = len(all_uuids) // args.num_procs
|
||||
chunks = [
|
||||
all_uuids[i : i + chunk_size] for i in range(0, len(all_uuids), chunk_size)
|
||||
]
|
||||
|
||||
with Pool(processes=args.num_procs) as pool:
|
||||
pool.starmap(download_imgs, [(args, chunk) for chunk in chunks])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user