Differential Revision: D90237984 fbshipit-source-id: 526fd760f303bf31be4f743bdcd77760496de0de
233 lines
7.9 KiB
Python
233 lines
7.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
|
|
|
|
# pyre-unsafe
|
|
|
|
"""
|
|
Self-contained COCO JSON re-indexing function that creates temporary files.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
|
|
def reindex_coco_to_temp(input_json_path: str) -> Optional[str]:
|
|
"""
|
|
Convert 0-indexed COCO JSON file to 1-indexed and save to temporary location.
|
|
|
|
Args:
|
|
input_json_path: Path to the input COCO JSON file
|
|
|
|
Returns:
|
|
Path to the new 1-indexed JSON file in temporary directory, or None if no conversion needed
|
|
|
|
Raises:
|
|
FileNotFoundError: If input file doesn't exist
|
|
json.JSONDecodeError: If input file is not valid JSON
|
|
ValueError: If input file is not a valid COCO format
|
|
"""
|
|
|
|
def is_coco_json(data: Dict[str, Any]) -> bool:
|
|
"""Check if data appears to be a COCO format file."""
|
|
if not isinstance(data, dict):
|
|
return False
|
|
# A COCO file should have at least one of these keys
|
|
coco_keys = {"images", "annotations", "categories"}
|
|
return any(key in data for key in coco_keys)
|
|
|
|
def check_zero_indexed(data: Dict[str, Any]) -> Tuple[bool, bool, bool]:
|
|
"""
|
|
Check if annotations, images, or categories start from index 0.
|
|
|
|
Returns:
|
|
Tuple of (annotations_zero_indexed, images_zero_indexed, categories_zero_indexed)
|
|
"""
|
|
annotations_zero = False
|
|
images_zero = False
|
|
categories_zero = False
|
|
|
|
# Check annotations
|
|
annotations = data.get("annotations", [])
|
|
if annotations and any(ann.get("id", -1) == 0 for ann in annotations):
|
|
annotations_zero = True
|
|
|
|
# Check images
|
|
images = data.get("images", [])
|
|
if images and any(img.get("id", -1) == 0 for img in images):
|
|
images_zero = True
|
|
|
|
# Check categories
|
|
categories = data.get("categories", [])
|
|
if categories and any(cat.get("id", -1) == 0 for cat in categories):
|
|
categories_zero = True
|
|
|
|
return annotations_zero, images_zero, categories_zero
|
|
|
|
def reindex_coco_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Convert 0-indexed COCO data to 1-indexed."""
|
|
modified_data = data.copy()
|
|
|
|
annotations_zero, images_zero, categories_zero = check_zero_indexed(data)
|
|
|
|
# Create ID mapping for consistency
|
|
image_id_mapping = {}
|
|
category_id_mapping = {}
|
|
|
|
# Process images first (since annotations reference image IDs)
|
|
if images_zero and "images" in modified_data:
|
|
for img in modified_data["images"]:
|
|
old_id = img["id"]
|
|
new_id = old_id + 1
|
|
image_id_mapping[old_id] = new_id
|
|
img["id"] = new_id
|
|
|
|
# Process categories (since annotations reference category IDs)
|
|
if categories_zero and "categories" in modified_data:
|
|
for cat in modified_data["categories"]:
|
|
old_id = cat["id"]
|
|
new_id = old_id + 1
|
|
category_id_mapping[old_id] = new_id
|
|
cat["id"] = new_id
|
|
|
|
# Process annotations
|
|
if "annotations" in modified_data:
|
|
for ann in modified_data["annotations"]:
|
|
# Update annotation ID if needed
|
|
if annotations_zero:
|
|
ann["id"] = ann["id"] + 1
|
|
|
|
# Update image_id reference if images were reindexed
|
|
if images_zero and ann.get("image_id") is not None:
|
|
old_image_id = ann["image_id"]
|
|
if old_image_id in image_id_mapping:
|
|
ann["image_id"] = image_id_mapping[old_image_id]
|
|
|
|
# Update category_id reference if categories were reindexed
|
|
if categories_zero and ann.get("category_id") is not None:
|
|
old_category_id = ann["category_id"]
|
|
if old_category_id in category_id_mapping:
|
|
ann["category_id"] = category_id_mapping[old_category_id]
|
|
|
|
return modified_data
|
|
|
|
# Validate input path
|
|
if not os.path.exists(input_json_path):
|
|
raise FileNotFoundError(f"Input file not found: {input_json_path}")
|
|
|
|
# Load and validate JSON data
|
|
try:
|
|
with open(input_json_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
except json.JSONDecodeError as e:
|
|
raise json.JSONDecodeError(f"Invalid JSON in {input_json_path}: {e}")
|
|
|
|
# Validate COCO format
|
|
if not is_coco_json(data):
|
|
raise ValueError(
|
|
f"File does not appear to be in COCO format: {input_json_path}"
|
|
)
|
|
|
|
# Check if reindexing is needed
|
|
annotations_zero, images_zero, categories_zero = check_zero_indexed(data)
|
|
|
|
if not (annotations_zero or images_zero or categories_zero):
|
|
# No conversion needed - just copy to temp location
|
|
input_path = Path(input_json_path)
|
|
temp_dir = tempfile.mkdtemp()
|
|
temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}"
|
|
temp_path = os.path.join(temp_dir, temp_filename)
|
|
|
|
with open(temp_path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
|
|
return temp_path
|
|
|
|
# Perform reindexing
|
|
modified_data = reindex_coco_data(data)
|
|
|
|
# Create temporary file
|
|
input_path = Path(input_json_path)
|
|
temp_dir = tempfile.mkdtemp()
|
|
temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}"
|
|
temp_path = os.path.join(temp_dir, temp_filename)
|
|
|
|
# Write modified data to temporary file
|
|
with open(temp_path, "w", encoding="utf-8") as f:
|
|
json.dump(modified_data, f, indent=2, ensure_ascii=False)
|
|
|
|
return temp_path
|
|
|
|
|
|
# Example usage and test function
|
|
def test_reindex_function():
|
|
"""Test the reindex function with a sample COCO file."""
|
|
|
|
# Create a test COCO file
|
|
test_data = {
|
|
"info": {"description": "Test COCO dataset", "version": "1.0", "year": 2023},
|
|
"images": [
|
|
{"id": 0, "width": 640, "height": 480, "file_name": "test1.jpg"},
|
|
{"id": 1, "width": 640, "height": 480, "file_name": "test2.jpg"},
|
|
],
|
|
"categories": [
|
|
{"id": 0, "name": "person", "supercategory": "person"},
|
|
{"id": 1, "name": "car", "supercategory": "vehicle"},
|
|
],
|
|
"annotations": [
|
|
{
|
|
"id": 0,
|
|
"image_id": 0,
|
|
"category_id": 0,
|
|
"bbox": [100, 100, 50, 75],
|
|
"area": 3750,
|
|
"iscrowd": 0,
|
|
},
|
|
{
|
|
"id": 1,
|
|
"image_id": 1,
|
|
"category_id": 1,
|
|
"bbox": [200, 150, 120, 80],
|
|
"area": 9600,
|
|
"iscrowd": 0,
|
|
},
|
|
],
|
|
}
|
|
|
|
# Create temporary test file
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
json.dump(test_data, f, indent=2)
|
|
test_file_path = f.name
|
|
|
|
try:
|
|
# Test the function
|
|
result_path = reindex_coco_to_temp(test_file_path)
|
|
print(f"Original file: {test_file_path}")
|
|
print(f"Converted file: {result_path}")
|
|
|
|
# Load and display the result
|
|
with open(result_path, "r") as f:
|
|
result_data = json.load(f)
|
|
|
|
print("\nConverted data sample:")
|
|
print(f"First image ID: {result_data['images'][0]['id']}")
|
|
print(f"First category ID: {result_data['categories'][0]['id']}")
|
|
print(f"First annotation ID: {result_data['annotations'][0]['id']}")
|
|
print(f"First annotation image_id: {result_data['annotations'][0]['image_id']}")
|
|
print(
|
|
f"First annotation category_id: {result_data['annotations'][0]['category_id']}"
|
|
)
|
|
|
|
# Clean up
|
|
os.unlink(result_path)
|
|
os.rmdir(os.path.dirname(result_path))
|
|
|
|
finally:
|
|
# Clean up test file
|
|
os.unlink(test_file_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_reindex_function()
|