Skip to content
This repository was archived by the owner on Mar 19, 2024. It is now read-only.

Add BYOL implementation #290

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions configs/config/dataset_catalog.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"imagenet1k_folder": {
"train": ["<img_path>", "<lbl_path>"],
"val": ["<img_path>", "<lbl_path>"]
"train": ["/datasets01/imagenet_full_size/061417/train", "/datasets01/imagenet_full_size/061417/train"],
"val": ["/datasets01/imagenet_full_size/061417/val", "/datasets01/imagenet_full_size/061417/val"]
},
"imagenet_a_filelist": {
"train": ["<not_used>", "<not_used>"],
Expand Down
113 changes: 113 additions & 0 deletions configs/config/pretrain/byol/byol_1node_resnet.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 10
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
ROLLING_BTIME_FREQ: 313
TENSORBOARD_SETUP:
USE_TENSORBOARD: True
EXPERIMENT_LOG_DIR: "byol_reference"
LOG_PARAMS: False
FLUSH_EVERY_N_MIN: 20
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 32
LABEL_TYPE: sample_index # just an implementation detail. Label isn't used
TRANSFORMS:
- name: ImgReplicatePil
num_times: 2
- name: RandomResizedCrop
size: 128
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 0.5
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilGaussianBlur
p: 1.0
radius_min: 0.1
radius_max: 2.0
prob: [ 1.0, 0.1 ]
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilRandomSolarize
p: 1.0
prob: [ 0.0, 0.2 ]
- name: ToTensor
- name: Normalize
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
COLLATE_FUNCTION: simclr_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
DROP_LAST: True
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL:
TRUNK:
NAME: resnet
TRUNK_PARAMS:
RESNETS:
DEPTH: 50
ZERO_INIT_RESIDUAL: True
HEAD:
PARAMS: [
["mlp", {"dims": [2048, 4096], "use_relu": True, "use_bn": True}],
["mlp", {"dims": [4096, 256]}],
["mlp", {"dims": [256, 4096], "use_relu": True, "use_bn": True}],
["mlp", {"dims": [4096, 256]}],
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: pytorch
AMP_PARAMS:
USE_AMP: False
LOSS:
name: byol_loss
byol_loss:
embedding_dim: 256
momentum: 0.999
OPTIMIZER:
name: sgd
use_larc: True
larc_config:
clip: False
trust_coefficient: 0.001
eps: 0.00000001
weight_decay: 0.0001
momentum: 0.9
nesterov: False
num_epochs: 200
regularize_bn: False
regularize_bias: False
param_schedulers:
lr:
name: multistep
values: [0.03, 0.003, 0.0003]
milestones: [120, 160]
update_interval: epoch
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 8
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 5
CHECKPOINT_ITER_FREQUENCY: -1 # set this variable to checkpoint every few iterations
135 changes: 135 additions & 0 deletions configs/config/quick_1gpu_resnet50_byol.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 1
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
MONITOR_PERF_STATS: True
PERF_STAT_FREQUENCY: 10
ROLLING_BTIME_FREQ: 5
HOOKS:
TENSORBOARD_SETUP:
USE_TENSORBOARD: True
EXPERIMENT_LOG_DIR: "byol_quick"
LOG_PARAMS: False
FLUSH_EVERY_N_MIN: 20
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 128
LABEL_TYPE: sample_index # just an implementation detail. Label isn't used
TRANSFORMS:
- name: ImgReplicatePil
num_times: 2
- name: RandomResizedCrop
size: 128
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 0.5
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilGaussianBlur
p: 1.0
radius_min: 0.1
radius_max: 2.0
prob: [ 1.0, 0.1 ]
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilRandomSolarize
p: 1.0
prob: [ 0.0, 0.2 ]
- name: ToTensor
- name: Normalize
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
COLLATE_FUNCTION: simclr_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
DROP_LAST: True
COPY_DESTINATION_DIR: "/tmp/imagenet1k"
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL:
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 50
ZERO_INIT_RESIDUAL: True
HEAD:
PARAMS: [
["mlp", {"dims": [2048, 4096], "use_relu": True, "use_bn": True}],
["mlp", {"dims": [4096, 256]}],
["mlp", {"dims": [256, 4096], "use_relu": True, "use_bn": True}],
["mlp", {"dims": [4096, 256]}],
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: pytorch
AMP_PARAMS:
USE_AMP: False
LOSS:
name: byol_loss
byol_loss:
embedding_dim: 256
momentum: 0.999
OPTIMIZER:
name: sgd
use_larc: True
larc_config:
clip: False
trust_coefficient: 0.001
eps: 0.00000001
weight_decay: 0.000001
momentum: 0.9
nesterov: False
num_epochs: 500
regularize_bn: False
regularize_bias: True
head_optimizer_params:
use_different_lr: False
use_different_wd: False
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: false
base_value: 0.3
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.6
end_value: 4.8
- name: cosine_warm_restart
start_value: 4.8
end_value: 0.0048
# wave_type: half
# restart_interval_length: 0.5
wave_type: full
is_adaptive: True
restart_interval_length: 0.334
interval_scaling: [rescaled, rescaled]
update_interval: step
lengths: [0.1, 0.9] # 100ep
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 1
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
DIR: "."
AUTO_RESUME: False
CHECKPOINT_FREQUENCY: 1
OVERWRITE_EXISTING: true

TENSORBOARD_SETUP:
USE_TENSORBOARD: true
121 changes: 121 additions & 0 deletions configs/config/quick_1gpu_resnet50_simclr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 1
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
MONITOR_PERF_STATS: True
PERF_STAT_FREQUENCY: 10
ROLLING_BTIME_FREQ: 5
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_filelist]
DATASET_NAMES: [imagenet1k_filelist]
BATCHSIZE_PER_REPLICA: 32
LABEL_TYPE: sample_index # just an implementation detail. Label isn't used
TRANSFORMS:
- name: ImgReplicatePil
num_times: 2
- name: RandomResizedCrop
size: 224
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 1.0
- name: ImgPilGaussianBlur
p: 0.5
radius_min: 0.1
radius_max: 2.0
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
COLLATE_FUNCTION: simclr_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
DATA_LIMIT: 500
DROP_LAST: True
COPY_DESTINATION_DIR: "/tmp/imagenet1k"
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL:
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 50
HEAD:
PARAMS: [
["mlp", {"dims": [2048, 2048], "use_relu": True}],
["mlp", {"dims": [2048, 128]}],
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: pytorch
AMP_PARAMS:
USE_AMP: False
AMP_ARGS: {"opt_level": "O3", "keep_batchnorm_fp32": True, "master_weights": True, "loss_scale": "dynamic"}
LOSS:
name: simclr_info_nce_loss
simclr_info_nce_loss:
temperature: 0.1
buffer_params:
embedding_dim: 128
OPTIMIZER:
name: sgd
use_larc: True
larc_config:
clip: False
trust_coefficient: 0.001
eps: 0.00000001
weight_decay: 0.000001
momentum: 0.9
nesterov: False
num_epochs: 2
regularize_bn: False
regularize_bias: True
head_optimizer_params:
use_different_lr: False
use_different_wd: False
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: false
base_value: 0.3
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.6
end_value: 4.8
- name: cosine_warm_restart
start_value: 4.8
end_value: 0.0048
# wave_type: half
# restart_interval_length: 0.5
wave_type: full
is_adaptive: True
restart_interval_length: 0.334
interval_scaling: [rescaled, rescaled]
update_interval: step
lengths: [0.1, 0.9] # 100ep
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 1
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
DIR: "."
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 1
OVERWRITE_EXISTING: true

TENSORBOARD_SETUP:
USE_TENSORBOARD: true
7 changes: 7 additions & 0 deletions launch_byol_1node.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

./dev/launch_slurm.sh \
config=pretrain/byol/byol_1node_resnet \
config.SLURM.NAME=byol_test \
config.SLURM.COMMENT="BYOL FOR VISSL" \
config.SLURM.PARTITION=learnfair \
Loading