65 lines
2.0 KiB
Python
65 lines
2.0 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
|
|
|
|
# pyre-unsafe
|
|
import argparse
|
|
import json
|
|
import os
|
|
from multiprocessing import Pool
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
from fathomnet.api import images
|
|
from tqdm import tqdm
|
|
|
|
|
|
def download_imgs(args, image_uuids):
|
|
flag = 0
|
|
for uuid in tqdm(image_uuids, desc="Downloading images"):
|
|
image = images.find_by_uuid(uuid)
|
|
file_name = (
|
|
Path(args.processed_images_folder)
|
|
/ f"{image.uuid}.{image.url.split('.')[-1]}"
|
|
)
|
|
if not file_name.exists():
|
|
try:
|
|
resp = requests.get(image.url, stream=True)
|
|
resp.raise_for_status()
|
|
with open(file_name, "wb") as f:
|
|
for chunk in resp.iter_content(chunk_size=1024):
|
|
f.write(chunk)
|
|
flag += 1
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error downloading {image.url}: {e}")
|
|
print(f"Downloaded {flag} new images to {args.processed_images_folder}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Download images from FathomNet")
|
|
parser.add_argument("--processed_images_folder", help="Path to downloaded images")
|
|
parser.add_argument(
|
|
"--image-uuids",
|
|
default="fathomnet_image_uuids.json",
|
|
help="Path to JSON file containing image uuids to download",
|
|
)
|
|
parser.add_argument(
|
|
"--num-procs", type=int, default=16, help="Number of parallel processes"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
with open(args.image_uuids, "r") as f:
|
|
all_uuids = json.load(f)
|
|
|
|
Path(args.processed_images_folder).mkdir(parents=True, exist_ok=True)
|
|
|
|
chunk_size = len(all_uuids) // args.num_procs
|
|
chunks = [
|
|
all_uuids[i : i + chunk_size] for i in range(0, len(all_uuids), chunk_size)
|
|
]
|
|
|
|
with Pool(processes=args.num_procs) as pool:
|
|
pool.starmap(download_imgs, [(args, chunk) for chunk in chunks])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|