Initial commit

fbshipit-source-id: da6be2f26e3a1202f4bffde8cb980e2dcb851294
2025-11-18 23:07:42 -08:00
commit a13e358df4
504 changed files with 122758 additions and 0 deletions
--- a/sam3/train/configs/eval_base.yaml
+++ b/sam3/train/configs/eval_base.yaml
@@ -0,0 +1,279 @@
+# @package _global_
+defaults:
+  - _self_
+
+# This config is the base configuration for all evaluations. Amongst other things, it defines:
+# - the model
+# - the image transforms
+# - the post processors
+# - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
+#
+# Most of the parameters should be kept as-is. The main modifications you may want to make are:
+# - the cluster configuration, to adjust partitions/qos to your system
+# - the flag gather_pred_via_filesys if you ram is tight
+# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
+# - the paths below
+
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+  # If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
+  checkpoint_path: null
+  # the experiments will be subfolders of this
+  base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+
+  # base path to the annotation folder for gold (refer to the readmes on how to download)
+  base_annotation_path: <YOUR_GOLD_GT_DIR>
+
+  # base path to the annotation folder for silver (refer to the readmes on how to download)
+  base_annotation_path_silver: <YOUR_SILVER_GT_DIR>
+
+  # path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
+  metaclip_img_path: <YOUR_METACLIP_IMG_DIR>
+
+  # path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
+  sa1b_img_path: <YOUR_SA1B_IMG_DIR>
+
+  # path to the SA-Co/silver images
+  silver_img_path: <YOUR_SILVER_IMG_DIR>
+
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+
+  use_presence_eval: True
+
+  base_val_transform:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        ######## transforms for validation (begin) ########
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: False
+        ######## transforms for validation (end) ########
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  loss: null
+
+  # Model parameters
+  d_model: 256
+  input_box_embedding_dim: ${add:${scratch.d_model},2}
+
+  # Box processing
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1 #infinite detections
+    use_original_ids: false
+    use_original_sizes_box: false
+    use_presence: ${scratch.use_presence_eval}
+
+  box_postprocessor_thresholded:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1 #infinite detections
+    use_original_ids: false
+    use_original_sizes_box: false
+    detection_threshold: 0.3
+    use_presence: ${scratch.use_presence_eval}
+
+  mask_postprocessor_thresholded:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1 #infinite detections
+    iou_type: "segm"
+    use_original_ids: false
+    use_original_sizes_box: false
+    use_original_sizes_mask: true
+    convert_mask_to_rle: True
+    detection_threshold: 0.3
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  max_ann_per_img: 200
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  train_batch_size: 1
+  val_batch_size: 1
+  num_train_workers: 0
+  num_val_workers: 10 # change this depending on the number of cpu cores available
+  max_data_epochs: 20
+  target_epoch_size: 1500
+  hybrid_repeats: 1
+  context_length: 2
+
+  # All reduce - this controls how the predictions are sent back to node 0.
+  # If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
+  # Switch to true if you get cpu ooms during gather.
+  gather_pred_via_filesys: false
+
+  # Learning rate and scheduler parameters (unused for eval)
+  lr_scale: 0.1
+  lr_transformer: ${times:8e-4,${scratch.lr_scale}}
+  lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
+  lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
+  lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
+  wd: 0.1
+  scheduler_timescale: 20
+  scheduler_warmup: 20
+  scheduler_cooldown: 20
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val: null
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true
+    enable_segmentation: true # Warning: Enable this if using segmentation.
+    checkpoint_path: ${paths.checkpoint_path}
+
+  meters:
+    val: null
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+    optimizer:
+      _target_: torch.optim.AdamW
+
+    gradient_clip:
+      _target_: sam3.train.optim.optimizer.GradientClipper
+      max_norm: 0.1
+      norm_type: 2
+
+    param_group_modifiers:
+      - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
+        _partial_: True
+        layer_decay_value: ${scratch.lrd_vision_backbone}
+        apply_to: 'backbone.vision_backbone.trunk'
+        overrides:
+          - pattern: '*pos_embed*'
+            value: 1.0
+
+    options:
+      lr:
+        - scheduler:  # transformer and class_embed
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_transformer}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_vision_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.vision_backbone.*'
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_language_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.language_backbone.*'
+
+      weight_decay:
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: ${scratch.wd}
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: 0.0
+          param_names:
+            - '*bias*'
+          module_cls_names: ['torch.nn.LayerNorm']
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 4
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+
+submitit:
+  account: null # Add your SLURM account if use_cluster == 1
+  partition: null
+  qos: null # Add your QoS if use_cluster == 1
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null