Initial commit

fbshipit-source-id: da6be2f26e3a1202f4bffde8cb980e2dcb851294
2025-11-18 23:07:42 -08:00
commit a13e358df4
504 changed files with 122758 additions and 0 deletions
--- a/sam3/train/configs/odinw13/odinw_text_only.yaml
+++ b/sam3/train/configs/odinw13/odinw_text_only.yaml
@@ -0,0 +1,253 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
+
+paths:
+  odinw_data_root: <YOUR_DATA_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
+# Validation transforms pipeline
+val_transforms:
+  - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+    transforms:
+      - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+        sizes: ${scratch.resolution}
+        max_size:
+          _target_: sam3.train.transforms.basic.get_random_resize_max_size
+          size: ${scratch.resolution}
+        square: true
+        consistent_transform: False
+      - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+      - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+        mean: ${scratch.val_norm_mean}
+        std: ${scratch.val_norm_std}
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: True
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  # Normalization parameters
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  val_batch_size: 2
+  num_val_workers: 0
+  gather_pred_via_filesys: false
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  max_epochs: 1
+  accelerator: cuda
+  seed_value: 123
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${supercategory_tuple.name}}
+          include_negatives: true
+          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
+          _partial_: true
+        img_folder: ${paths.odinw_data_root}/${supercategory_tuple.val.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+        transforms: ${val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: 1
+        dict_key: odinw35
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true # Set to false if training
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      odinw35:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/odinw/${supercategory_tuple.name}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path:
+                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+                input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+              tide: False
+              iou_type: "bbox"
+              positive_split: False
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${supercategory_tuple.name}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+
+  job_array:
+    num_tasks: 13
+    task_index: 0
+
+# ============================================================================
+# ODinW13 Supercategories
+# ============================================================================
+
+all_odinw_supercategories:
+  - name: AerialMaritimeDrone_large
+    val:
+      img_folder: AerialMaritimeDrone/large/test/
+      json: AerialMaritimeDrone/large/test/annotations_without_background.json
+  - name: Aquarium
+    val:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
+  - name: CottontailRabbits
+    val:
+      img_folder: CottontailRabbits/test/
+      json: CottontailRabbits/test/annotations_without_background.json
+  - name: EgoHands_generic
+    val:
+      img_folder: EgoHands/generic/test/
+      json: EgoHands/generic/test/annotations_without_background.json
+  - name: NorthAmericaMushrooms
+    val:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
+  - name: Packages
+    val:
+      img_folder: Packages/Raw/test/
+      json: Packages/Raw/test/annotations_without_background.json
+  - name: PascalVOC
+    val:
+      img_folder: PascalVOC/valid/
+      json: PascalVOC/valid/annotations_without_background.json
+  - name: Raccoon
+    val:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
+      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
+  - name: ShellfishOpenImages
+    val:
+      img_folder: ShellfishOpenImages/raw/test/
+      json: ShellfishOpenImages/raw/test/annotations_without_background.json
+  - name: VehiclesOpenImages
+    val:
+      img_folder: VehiclesOpenImages/416x416/test/
+      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
+  - name: pistols
+    val:
+      img_folder: pistols/export/
+      json: pistols/export/test_annotations_without_background.json
+  - name: pothole
+    val:
+      img_folder: pothole/test/
+      json: pothole/test/annotations_without_background.json
+  - name: thermalDogsAndPeople
+    val:
+      img_folder: thermalDogsAndPeople/test/
+      json: thermalDogsAndPeople/test/annotations_without_background.json
+
+
+odinw35_prompts:
+  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
+    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
+    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
+    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
+  Aquarium: null
+  CottontailRabbits: null
+  EgoHands_generic: null
+  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
+    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
+  Packages: null
+  PascalVOC: null
+  Raccoon: null
+  ShellfishOpenImages: null
+  VehiclesOpenImages: null
+  pistols: null
+  pothole: null
+  thermalDogsAndPeople: null