Summary: there are several imports within the `sam3.model` package that reference other packages within `sam3` other than `sam3` and `sam3.model` (for example [here](https://github.com/facebookresearch/sam3/blob/main/sam3/model/sam3_tracker_base.py#L15)). This fixes the package structure so that you can `pip install` the package and `import sam3` Pull Request resolved: https://github.com/facebookresearch/sam3/pull/327 Reviewed By: haithamkhedr Differential Revision: D88950127 Pulled By: lematt1991 fbshipit-source-id: 3554512d304ccdf679a9af8606bbfe1f7f2a1cfb
280 lines
9.2 KiB
YAML
280 lines
9.2 KiB
YAML
# @package _global_
|
|
defaults:
|
|
- _self_
|
|
|
|
# This config is the base configuration for all evaluations. Amongst other things, it defines:
|
|
# - the model
|
|
# - the image transforms
|
|
# - the post processors
|
|
# - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
|
|
#
|
|
# Most of the parameters should be kept as-is. The main modifications you may want to make are:
|
|
# - the cluster configuration, to adjust partitions/qos to your system
|
|
# - the flag gather_pred_via_filesys if you ram is tight
|
|
# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
|
|
# - the paths below
|
|
|
|
|
|
# ============================================================================
|
|
# Paths Configuration (Chage this to your own paths)
|
|
# ============================================================================
|
|
paths:
|
|
# If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
|
|
checkpoint_path: null
|
|
# the experiments will be subfolders of this
|
|
base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
|
|
|
|
# base path to the annotation folder for gold (refer to the readmes on how to download)
|
|
base_annotation_path: <YOUR_GOLD_GT_DIR>
|
|
|
|
# base path to the annotation folder for silver (refer to the readmes on how to download)
|
|
base_annotation_path_silver: <YOUR_SILVER_GT_DIR>
|
|
|
|
# path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
|
|
metaclip_img_path: <YOUR_METACLIP_IMG_DIR>
|
|
|
|
# path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
|
|
sa1b_img_path: <YOUR_SA1B_IMG_DIR>
|
|
|
|
# path to the SA-Co/silver images
|
|
silver_img_path: <YOUR_SILVER_IMG_DIR>
|
|
|
|
bpe_path: <BPE_PATH> # This should be under sam3/assets/bpe_simple_vocab_16e6.txt.gz
|
|
|
|
|
|
# ============================================================================
|
|
# Different helper parameters and functions
|
|
# ============================================================================
|
|
scratch:
|
|
|
|
use_presence_eval: True
|
|
|
|
base_val_transform:
|
|
- _target_: sam3.train.transforms.basic_for_api.ComposeAPI
|
|
transforms:
|
|
######## transforms for validation (begin) ########
|
|
- _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
|
|
sizes: ${scratch.resolution} # originally `resolution: 1024`
|
|
max_size:
|
|
_target_: sam3.train.transforms.basic.get_random_resize_max_size
|
|
size: ${scratch.resolution} # originally `resolution: 1024`
|
|
square: true
|
|
consistent_transform: False
|
|
######## transforms for validation (end) ########
|
|
- _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
|
|
- _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
|
|
mean: ${scratch.val_norm_mean}
|
|
std: ${scratch.val_norm_std}
|
|
|
|
loss: null
|
|
|
|
# Model parameters
|
|
d_model: 256
|
|
input_box_embedding_dim: ${add:${scratch.d_model},2}
|
|
|
|
# Box processing
|
|
original_box_postprocessor:
|
|
_target_: sam3.eval.postprocessors.PostProcessImage
|
|
max_dets_per_img: -1 # infinite detections
|
|
use_original_ids: true
|
|
use_original_sizes_box: true
|
|
use_presence: ${scratch.use_presence_eval}
|
|
|
|
box_postprocessor:
|
|
_target_: sam3.eval.postprocessors.PostProcessImage
|
|
max_dets_per_img: -1 #infinite detections
|
|
use_original_ids: false
|
|
use_original_sizes_box: false
|
|
use_presence: ${scratch.use_presence_eval}
|
|
|
|
box_postprocessor_thresholded:
|
|
_target_: sam3.eval.postprocessors.PostProcessImage
|
|
max_dets_per_img: -1 #infinite detections
|
|
use_original_ids: false
|
|
use_original_sizes_box: false
|
|
detection_threshold: 0.3
|
|
use_presence: ${scratch.use_presence_eval}
|
|
|
|
mask_postprocessor_thresholded:
|
|
_target_: sam3.eval.postprocessors.PostProcessImage
|
|
max_dets_per_img: -1 #infinite detections
|
|
iou_type: "segm"
|
|
use_original_ids: false
|
|
use_original_sizes_box: false
|
|
use_original_sizes_mask: true
|
|
convert_mask_to_rle: True
|
|
detection_threshold: 0.3
|
|
use_presence: ${scratch.use_presence_eval}
|
|
|
|
# Image processing parameters
|
|
resolution: 1008
|
|
max_ann_per_img: 200
|
|
|
|
# Normalization parameters
|
|
train_norm_mean: [0.5, 0.5, 0.5]
|
|
train_norm_std: [0.5, 0.5, 0.5]
|
|
val_norm_mean: [0.5, 0.5, 0.5]
|
|
val_norm_std: [0.5, 0.5, 0.5]
|
|
|
|
# Training parameters
|
|
train_batch_size: 1
|
|
val_batch_size: 1
|
|
num_train_workers: 0
|
|
num_val_workers: 10 # change this depending on the number of cpu cores available
|
|
max_data_epochs: 20
|
|
target_epoch_size: 1500
|
|
hybrid_repeats: 1
|
|
context_length: 2
|
|
|
|
# All reduce - this controls how the predictions are sent back to node 0.
|
|
# If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
|
|
# Switch to true if you get cpu ooms during gather.
|
|
gather_pred_via_filesys: false
|
|
|
|
# Learning rate and scheduler parameters (unused for eval)
|
|
lr_scale: 0.1
|
|
lr_transformer: ${times:8e-4,${scratch.lr_scale}}
|
|
lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
|
|
lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
|
|
lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
|
|
wd: 0.1
|
|
scheduler_timescale: 20
|
|
scheduler_warmup: 20
|
|
scheduler_cooldown: 20
|
|
|
|
|
|
# ============================================================================
|
|
# Trainer Configuration
|
|
# ============================================================================
|
|
|
|
trainer:
|
|
_target_: sam3.train.trainer.Trainer
|
|
skip_saving_ckpts: true
|
|
empty_gpu_mem_cache_after_eval: True
|
|
skip_first_val: True
|
|
max_epochs: ${scratch.max_data_epochs}
|
|
accelerator: cuda
|
|
seed_value: 123
|
|
val_epoch_freq: 10
|
|
mode: val
|
|
|
|
distributed:
|
|
backend: nccl
|
|
find_unused_parameters: True
|
|
gradient_as_bucket_view: True
|
|
|
|
loss:
|
|
all:
|
|
_target_: sam3.train.loss.sam3_loss.DummyLoss
|
|
default:
|
|
_target_: sam3.train.loss.sam3_loss.DummyLoss
|
|
|
|
data:
|
|
train: null
|
|
val: null
|
|
|
|
model:
|
|
_target_: sam3.model_builder.build_sam3_image_model
|
|
bpe_path: ${paths.bpe_path}
|
|
device: cpus
|
|
eval_mode: true
|
|
enable_segmentation: true # Warning: Enable this if using segmentation.
|
|
checkpoint_path: ${paths.checkpoint_path}
|
|
|
|
meters:
|
|
val: null
|
|
|
|
optim:
|
|
amp:
|
|
enabled: True
|
|
amp_dtype: bfloat16
|
|
|
|
optimizer:
|
|
_target_: torch.optim.AdamW
|
|
|
|
gradient_clip:
|
|
_target_: sam3.train.optim.optimizer.GradientClipper
|
|
max_norm: 0.1
|
|
norm_type: 2
|
|
|
|
param_group_modifiers:
|
|
- _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
|
|
_partial_: True
|
|
layer_decay_value: ${scratch.lrd_vision_backbone}
|
|
apply_to: 'backbone.vision_backbone.trunk'
|
|
overrides:
|
|
- pattern: '*pos_embed*'
|
|
value: 1.0
|
|
|
|
options:
|
|
lr:
|
|
- scheduler: # transformer and class_embed
|
|
_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
|
|
base_lr: ${scratch.lr_transformer}
|
|
timescale: ${scratch.scheduler_timescale}
|
|
warmup_steps: ${scratch.scheduler_warmup}
|
|
cooldown_steps: ${scratch.scheduler_cooldown}
|
|
- scheduler:
|
|
_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
|
|
base_lr: ${scratch.lr_vision_backbone}
|
|
timescale: ${scratch.scheduler_timescale}
|
|
warmup_steps: ${scratch.scheduler_warmup}
|
|
cooldown_steps: ${scratch.scheduler_cooldown}
|
|
param_names:
|
|
- 'backbone.vision_backbone.*'
|
|
- scheduler:
|
|
_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
|
|
base_lr: ${scratch.lr_language_backbone}
|
|
timescale: ${scratch.scheduler_timescale}
|
|
warmup_steps: ${scratch.scheduler_warmup}
|
|
cooldown_steps: ${scratch.scheduler_cooldown}
|
|
param_names:
|
|
- 'backbone.language_backbone.*'
|
|
|
|
weight_decay:
|
|
- scheduler:
|
|
_target_: fvcore.common.param_scheduler.ConstantParamScheduler
|
|
value: ${scratch.wd}
|
|
- scheduler:
|
|
_target_: fvcore.common.param_scheduler.ConstantParamScheduler
|
|
value: 0.0
|
|
param_names:
|
|
- '*bias*'
|
|
module_cls_names: ['torch.nn.LayerNorm']
|
|
|
|
checkpoint:
|
|
save_dir: ${launcher.experiment_log_dir}/checkpoints
|
|
save_freq: 0 # 0 only last checkpoint is saved.
|
|
|
|
|
|
logging:
|
|
tensorboard_writer:
|
|
_target_: sam3.train.utils.logger.make_tensorboard_logger
|
|
log_dir: ${launcher.experiment_log_dir}/tensorboard
|
|
flush_secs: 120
|
|
should_log: True
|
|
wandb_writer: null
|
|
log_dir: ${launcher.experiment_log_dir}/logs/
|
|
log_freq: 10
|
|
|
|
# ============================================================================
|
|
# Launcher and Submitit Configuration
|
|
# ============================================================================
|
|
|
|
launcher:
|
|
num_nodes: 4
|
|
gpus_per_node: 8
|
|
experiment_log_dir: ${paths.experiment_log_dir}
|
|
multiprocessing_context: forkserver
|
|
|
|
|
|
submitit:
|
|
account: null # Add your SLURM account if use_cluster == 1
|
|
partition: null
|
|
qos: null # Add your QoS if use_cluster == 1
|
|
timeout_hour: 72
|
|
use_cluster: True
|
|
cpus_per_task: 10
|
|
port_range: [10000, 65000]
|
|
constraint: null
|