sam3_local/sam3/train/configs/eval_base.yaml

# @package _global_
defaults:
  - _self_

# This config is the base configuration for all evaluations. Amongst other things, it defines:
# - the model
# - the image transforms
# - the post processors
# - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
#
# Most of the parameters should be kept as-is. The main modifications you may want to make are:
# - the cluster configuration, to adjust partitions/qos to your system
# - the flag gather_pred_via_filesys if you ram is tight
# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
# - the paths below


# ============================================================================
# Paths Configuration (Chage this to your own paths)
# ============================================================================
paths:
  # If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
  checkpoint_path: null
  # the experiments will be subfolders of this
  base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>

  # base path to the annotation folder for gold (refer to the readmes on how to download)
  base_annotation_path: <YOUR_GOLD_GT_DIR>

  # base path to the annotation folder for silver (refer to the readmes on how to download)
  base_annotation_path_silver: <YOUR_SILVER_GT_DIR>

  # path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
  metaclip_img_path: <YOUR_METACLIP_IMG_DIR>

  # path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
  sa1b_img_path: <YOUR_SA1B_IMG_DIR>

  # path to the SA-Co/silver images
  silver_img_path: <YOUR_SILVER_IMG_DIR>

  bpe_path: <BPE_PATH> # This should be under sam3/assets/bpe_simple_vocab_16e6.txt.gz


# ============================================================================
# Different helper parameters and functions
# ============================================================================
scratch:

  use_presence_eval: True

  base_val_transform:
    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
      transforms:
        ######## transforms for validation (begin) ########
        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
          sizes: ${scratch.resolution}  # originally `resolution: 1024`
          max_size:
            _target_: sam3.train.transforms.basic.get_random_resize_max_size
            size: ${scratch.resolution}  # originally `resolution: 1024`
          square: true
          consistent_transform: False
        ######## transforms for validation (end) ########
        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
          mean: ${scratch.val_norm_mean}
          std: ${scratch.val_norm_std}

  loss: null

  # Model parameters
  d_model: 256
  input_box_embedding_dim: ${add:${scratch.d_model},2}

  # Box processing
  original_box_postprocessor:
    _target_: sam3.eval.postprocessors.PostProcessImage
    max_dets_per_img: -1  # infinite detections
    use_original_ids: true
    use_original_sizes_box: true
    use_presence: ${scratch.use_presence_eval}

  box_postprocessor:
    _target_: sam3.eval.postprocessors.PostProcessImage
    max_dets_per_img: -1 #infinite detections
    use_original_ids: false
    use_original_sizes_box: false
    use_presence: ${scratch.use_presence_eval}

  box_postprocessor_thresholded:
    _target_: sam3.eval.postprocessors.PostProcessImage
    max_dets_per_img: -1 #infinite detections
    use_original_ids: false
    use_original_sizes_box: false
    detection_threshold: 0.3
    use_presence: ${scratch.use_presence_eval}

  mask_postprocessor_thresholded:
    _target_: sam3.eval.postprocessors.PostProcessImage
    max_dets_per_img: -1 #infinite detections
    iou_type: "segm"
    use_original_ids: false
    use_original_sizes_box: false
    use_original_sizes_mask: true
    convert_mask_to_rle: True
    detection_threshold: 0.3
    use_presence: ${scratch.use_presence_eval}

  # Image processing parameters
  resolution: 1008
  max_ann_per_img: 200

  # Normalization parameters
  train_norm_mean: [0.5, 0.5, 0.5]
  train_norm_std: [0.5, 0.5, 0.5]
  val_norm_mean: [0.5, 0.5, 0.5]
  val_norm_std: [0.5, 0.5, 0.5]

  # Training parameters
  train_batch_size: 1
  val_batch_size: 1
  num_train_workers: 0
  num_val_workers: 10 # change this depending on the number of cpu cores available
  max_data_epochs: 20
  target_epoch_size: 1500
  hybrid_repeats: 1
  context_length: 2

  # All reduce - this controls how the predictions are sent back to node 0.
  # If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
  # Switch to true if you get cpu ooms during gather.
  gather_pred_via_filesys: false

  # Learning rate and scheduler parameters (unused for eval)
  lr_scale: 0.1
  lr_transformer: ${times:8e-4,${scratch.lr_scale}}
  lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
  lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
  lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
  wd: 0.1
  scheduler_timescale: 20
  scheduler_warmup: 20
  scheduler_cooldown: 20


# ============================================================================
# Trainer Configuration
# ============================================================================

trainer:
  _target_: sam3.train.trainer.Trainer
  skip_saving_ckpts: true
  empty_gpu_mem_cache_after_eval: True
  skip_first_val: True
  max_epochs: ${scratch.max_data_epochs}
  accelerator: cuda
  seed_value: 123
  val_epoch_freq: 10
  mode: val

  distributed:
    backend: nccl
    find_unused_parameters: True
    gradient_as_bucket_view: True

  loss:
    all:
      _target_: sam3.train.loss.sam3_loss.DummyLoss
    default:
      _target_: sam3.train.loss.sam3_loss.DummyLoss

  data:
    train: null
    val: null

  model:
    _target_: sam3.model_builder.build_sam3_image_model
    bpe_path: ${paths.bpe_path}
    device: cpus
    eval_mode: true
    enable_segmentation: true # Warning: Enable this if using segmentation.
    checkpoint_path: ${paths.checkpoint_path}

  meters:
    val: null

  optim:
    amp:
      enabled: True
      amp_dtype: bfloat16

    optimizer:
      _target_: torch.optim.AdamW

    gradient_clip:
      _target_: sam3.train.optim.optimizer.GradientClipper
      max_norm: 0.1
      norm_type: 2

    param_group_modifiers:
      - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
        _partial_: True
        layer_decay_value: ${scratch.lrd_vision_backbone}
        apply_to: 'backbone.vision_backbone.trunk'
        overrides:
          - pattern: '*pos_embed*'
            value: 1.0

    options:
      lr:
        - scheduler:  # transformer and class_embed
            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
            base_lr: ${scratch.lr_transformer}
            timescale: ${scratch.scheduler_timescale}
            warmup_steps: ${scratch.scheduler_warmup}
            cooldown_steps: ${scratch.scheduler_cooldown}
        - scheduler:
            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
            base_lr: ${scratch.lr_vision_backbone}
            timescale: ${scratch.scheduler_timescale}
            warmup_steps: ${scratch.scheduler_warmup}
            cooldown_steps: ${scratch.scheduler_cooldown}
          param_names:
            - 'backbone.vision_backbone.*'
        - scheduler:
            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
            base_lr: ${scratch.lr_language_backbone}
            timescale: ${scratch.scheduler_timescale}
            warmup_steps: ${scratch.scheduler_warmup}
            cooldown_steps: ${scratch.scheduler_cooldown}
          param_names:
            - 'backbone.language_backbone.*'

      weight_decay:
        - scheduler:
            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
            value: ${scratch.wd}
        - scheduler:
            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
            value: 0.0
          param_names:
            - '*bias*'
          module_cls_names: ['torch.nn.LayerNorm']

  checkpoint:
    save_dir: ${launcher.experiment_log_dir}/checkpoints
    save_freq: 0  # 0 only last checkpoint is saved.


  logging:
    tensorboard_writer:
      _target_: sam3.train.utils.logger.make_tensorboard_logger
      log_dir: ${launcher.experiment_log_dir}/tensorboard
      flush_secs: 120
      should_log: True
    wandb_writer: null
    log_dir: ${launcher.experiment_log_dir}/logs/
    log_freq: 10

# ============================================================================
# Launcher and Submitit Configuration
# ============================================================================

launcher:
  num_nodes: 4
  gpus_per_node: 8
  experiment_log_dir: ${paths.experiment_log_dir}
  multiprocessing_context: forkserver


submitit:
  account: null # Add your SLURM account if use_cluster == 1
  partition: null
  qos: null # Add your QoS if use_cluster == 1
  timeout_hour: 72
  use_cluster: True
  cpus_per_task: 10
  port_range: [10000, 65000]
  constraint: null