sam3_local/sam3/train/configs/odinw13/odinw_text_only_positive.yaml

# @package _global_
defaults:
  - _self_

# ============================================================================
# Paths Configuration (Chage this to your own paths)
# ============================================================================
#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}

paths:
  odinw_data_root: <YOUR_DATA_DIR>
  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
  bpe_path: <BPE_PATH> # This should be under sam3/assets/bpe_simple_vocab_16e6.txt.gz


supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
# Validation transforms pipeline
val_transforms:
  - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
    transforms:
      - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
        sizes: ${scratch.resolution}
        max_size:
          _target_: sam3.train.transforms.basic.get_random_resize_max_size
          size: ${scratch.resolution}
        square: true
        consistent_transform: False
      - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
      - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
        mean: ${scratch.val_norm_mean}
        std: ${scratch.val_norm_std}

# ============================================================================
# Different helper parameters and functions
# ============================================================================
scratch:
  enable_segmentation: True
  # Box processing
  use_presence_eval: True
  original_box_postprocessor:
    _target_: sam3.eval.postprocessors.PostProcessImage
    max_dets_per_img: -1  # infinite detections
    use_original_ids: true
    use_original_sizes_box: true
    use_presence: ${scratch.use_presence_eval}

  # Image processing parameters
  resolution: 1008
  # Normalization parameters
  val_norm_mean: [0.5, 0.5, 0.5]
  val_norm_std: [0.5, 0.5, 0.5]

  # Training parameters
  val_batch_size: 2
  num_val_workers: 0
  gather_pred_via_filesys: false

# ============================================================================
# Trainer Configuration
# ============================================================================

trainer:
  _target_: sam3.train.trainer.Trainer
  skip_saving_ckpts: true
  empty_gpu_mem_cache_after_eval: True
  max_epochs: 1
  accelerator: cuda
  seed_value: 123
  mode: val

  distributed:
    backend: nccl
    find_unused_parameters: True
    gradient_as_bucket_view: True

  loss:
    default:
      _target_: sam3.train.loss.sam3_loss.DummyLoss

  data:
    val:
      _target_: sam3.train.data.torch_dataset.TorchDataset
      dataset:
        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
        coco_json_loader:
          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
          prompts: ${odinw35_prompts.${supercategory_tuple.name}}
          include_negatives: true
          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
          _partial_: true
        img_folder: ${paths.odinw_data_root}/${supercategory_tuple.val.img_folder}
        ann_file:
          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
          input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
        transforms: ${val_transforms}
        max_ann_per_img: 100000
        multiplier: 1
        training: false

      shuffle: False
      batch_size: ${scratch.val_batch_size}
      num_workers: ${scratch.num_val_workers}
      pin_memory: False
      drop_last: False
      collate_fn:
        _target_: sam3.train.data.collator.collate_fn_api
        _partial_: true
        repeats: 1
        dict_key: odinw35

  model:
    _target_: sam3.model_builder.build_sam3_image_model
    bpe_path: ${paths.bpe_path}
    device: cpus
    eval_mode: true # Set to false if training
    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.

  meters:
    val:
      odinw35:
        detection:
          _target_: sam3.eval.coco_writer.PredictionDumper
          iou_type: "bbox"
          dump_dir: ${launcher.experiment_log_dir}/dumps/roboflow/${supercategory_tuple.name}
          merge_predictions: True
          postprocessor: ${scratch.original_box_postprocessor}
          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
          maxdets: 100
          pred_file_evaluators:
            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
              gt_path:
                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
                input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
              tide: False
              iou_type: "bbox"
              positive_split: true

  checkpoint:
    save_dir: ${launcher.experiment_log_dir}/checkpoints
    save_freq: 0  # 0 only last checkpoint is saved.


  logging:
    tensorboard_writer:
      _target_: sam3.train.utils.logger.make_tensorboard_logger
      log_dir: ${launcher.experiment_log_dir}/tensorboard
      flush_secs: 120
      should_log: True
    wandb_writer: null
    log_dir: ${launcher.experiment_log_dir}/logs/${supercategory_tuple.name}
    log_freq: 10

# ============================================================================
# Launcher and Submitit Configuration
# ============================================================================

launcher:
  num_nodes: 1
  gpus_per_node: 2
  experiment_log_dir: ${paths.experiment_log_dir}
  multiprocessing_context: forkserver

submitit:
  account: null
  partition: null
  qos: null
  timeout_hour: 72
  use_cluster: True
  cpus_per_task: 10
  port_range: [10000, 65000]
  constraint: null

  job_array:
    num_tasks: 13
    task_index: 0

# ============================================================================
# ODinW13 Supercategories
# ============================================================================

all_odinw_supercategories:
  - name: AerialMaritimeDrone_large
    val:
      img_folder: AerialMaritimeDrone/large/test/
      json: AerialMaritimeDrone/large/test/annotations_without_background.json
  - name: Aquarium
    val:
      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
  - name: CottontailRabbits
    val:
      img_folder: CottontailRabbits/test/
      json: CottontailRabbits/test/annotations_without_background.json
  - name: EgoHands_generic
    val:
      img_folder: EgoHands/generic/test/
      json: EgoHands/generic/test/annotations_without_background.json
  - name: NorthAmericaMushrooms
    val:
      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
  - name: Packages
    val:
      img_folder: Packages/Raw/test/
      json: Packages/Raw/test/annotations_without_background.json
  - name: PascalVOC
    val:
      img_folder: PascalVOC/valid/
      json: PascalVOC/valid/annotations_without_background.json
  - name: Raccoon
    val:
      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
  - name: ShellfishOpenImages
    val:
      img_folder: ShellfishOpenImages/raw/test/
      json: ShellfishOpenImages/raw/test/annotations_without_background.json
  - name: VehiclesOpenImages
    val:
      img_folder: VehiclesOpenImages/416x416/test/
      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
  - name: pistols
    val:
      img_folder: pistols/export/
      json: pistols/export/test_annotations_without_background.json
  - name: pothole
    val:
      img_folder: pothole/test/
      json: pothole/test/annotations_without_background.json
  - name: thermalDogsAndPeople
    val:
      img_folder: thermalDogsAndPeople/test/
      json: thermalDogsAndPeople/test/annotations_without_background.json


odinw35_prompts:
  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
  Aquarium: null
  CottontailRabbits: null
  EgoHands_generic: null
  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
  Packages: null
  PascalVOC: null
  Raccoon: null
  ShellfishOpenImages: null
  VehiclesOpenImages: null
  pistols: null
  pothole: null
  thermalDogsAndPeople: null