Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- configs/cls_schedule/cls_vit_b16_s1.28B_bs16k.yaml +36 -0
- configs/cls_schedule/cls_vit_b16_s1.28B_bs16k_val.yaml +34 -0
- configs/cls_schedule/cls_vit_b16_s512m_bs16k.yaml +36 -0
- configs/cls_schedule/cls_vit_l14_224_s12.8B_bs90k.yaml +36 -0
- configs/cls_schedule/cls_vit_l14_s1.28B_bs16k.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_1_s512m_bs16k.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_224_s12.8B_bs90k.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s1.28B_bs16k.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s1.28B_bs16k_noidf.yaml +37 -0
- configs/cls_schedule/cls_vit_l16_s4B_bs32k.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s4B_bs32k_2b.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s4B_bs32k_3b.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s4B_bs32k_cls.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s512m_bs16k.yaml +36 -0
- configs/cls_schedule/cls_vit_l16_s512m_bs32k.yaml +36 -0
- configs/cls_schedule/lit_clip_vit_b16_s512m_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_b16_s1.28B_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_b16_s4B_bs32k.yaml +38 -0
- configs/cls_schedule/lit_vit_b16_s512m_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_l14_224_s12.8B_bs90k.yaml +39 -0
- configs/cls_schedule/lit_vit_l14_s1.28B_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_1_s512m_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_224_s12.8B_bs90k.yaml +39 -0
- configs/cls_schedule/lit_vit_l16_s1.28B_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s1.28B_bs16k_noidf.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s4B_bs32k.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s4B_bs32k_2b.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s4B_bs32k_3b.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s4B_bs32k_cls.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s512m_bs16k.yaml +38 -0
- configs/cls_schedule/lit_vit_l16_s512m_bs32k.yaml +38 -0
- configs/cls_schedule/test.yaml +36 -0
- configs/exp_schedule/cls_vit_l16_s12.8B_bs90k.yaml +37 -0
- configs/exp_schedule/cls_vit_l16_s12.8B_bs90k_w1.0.yaml +37 -0
- configs/exp_schedule/test.yaml +37 -0
- configs/long_schedule/clip_test.yaml +35 -0
- configs/long_schedule/cls_test.yaml +36 -0
- configs/long_schedule/vit_l16_224_s12.8B_bs90k.yaml +35 -0
- configs/long_schedule/vit_l16_224_s4B_bs32k.yaml +35 -0
- configs/long_schedule/vit_l16_224_s4B_bs32k_3b.yaml +35 -0
- configs/long_schedule/vit_l16_224_s4B_bs64k.yaml +35 -0
- configs/long_schedule/vit_l_224_s12.8B_bs90k.yaml +35 -0
- configs/long_schedule/vitamin_l2_224_s12.8B_bs90k.yaml +35 -0
- configs/long_schedule/vitamin_l2_224_s12.8B_bs90k_ft256.yaml +39 -0
- configs/long_schedule/vitamin_l2_224_s12.8B_bs90k_ft336.yaml +39 -0
- configs/long_schedule/vitamin_l2_224_s12.8B_bs90k_ft384.yaml +39 -0
- configs/long_schedule/vitamin_l_224_s12.8B_bs90k.yaml +35 -0
- configs/long_schedule/vitamin_l_224_s12.8B_bs90k_ft256.yaml +39 -0
- configs/long_schedule/vitamin_l_224_s12.8B_bs90k_ft336.yaml +39 -0
- configs/long_schedule/vitamin_l_224_s12.8B_bs90k_ft384.yaml +39 -0
configs/cls_schedule/cls_vit_b16_s1.28B_bs16k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_b16_s1.28B_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-B-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 6104
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_b16_s1.28B_bs16k_val.yaml
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_b16_s1.28B_bs16k"
|
| 3 |
+
train_data: '' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-B-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_frequency: 1
|
| 30 |
+
val_frequency: 1
|
| 31 |
+
save_every_n_steps: 6104
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
|
| 34 |
+
resume: latest
|
configs/cls_schedule/cls_vit_b16_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_b16_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-B-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 6104
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l14_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l14_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-14"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l14_s1.28B_bs16k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l14_s1.28B_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 10
|
| 16 |
+
model: "CLS-ViT-L-14"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 6104
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_1_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_1_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16-1"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 6104
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s1.28B_bs16k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s1.28B_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 10
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 6104
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s1.28B_bs16k_noidf.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s1.28B_bs16k_noidf"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 10
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
use_idf: false
|
| 24 |
+
|
| 25 |
+
logs: './logs'
|
| 26 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 27 |
+
|
| 28 |
+
report_to: "tensorboard"
|
| 29 |
+
log_every_n_steps: 128
|
| 30 |
+
zeroshot_steps: 0
|
| 31 |
+
val_steps: 0
|
| 32 |
+
zeroshot_frequency: 0
|
| 33 |
+
val_frequency: 0
|
| 34 |
+
save_every_n_steps: 6104
|
| 35 |
+
delete_prev_step_ckpt: true
|
| 36 |
+
|
| 37 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s4B_bs32k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s4B_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 64
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s4B_bs32k_2b.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s4B_bs32k_2b"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 64
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s4B_bs32k_3b.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s4B_bs32k_3b"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 64
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s4B_bs32k_cls.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s4B_bs32k_cls"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16-cls"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 64
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 128
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 6104
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/cls_vit_l16_s512m_bs32k.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s512m_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 64
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/cls_schedule/lit_clip_vit_b16_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_clip_vit_b16_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-B-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: '/mnt/bn/seed-aws-va/zilonghuang/code/ViTamin/ViTamin/logs/vit_b16_s512m_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_b16_s1.28B_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_b16_s1.28B_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-B-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_b16_s1.28B_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_b16_s4B_bs32k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s4B_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_b16_s4B_bs32k/checkpoints/epoch_3.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 64
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
save_every_n_steps: 3052
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_b16_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_b16_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-B-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_b16_s512m_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l14_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l14_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-14-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l14_224_s12.8B_bs90k/checkpoints/epoch_10.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 32
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
zeroshot_frequency: 1
|
| 36 |
+
save_every_n_steps: 3052
|
| 37 |
+
delete_prev_step_ckpt: true
|
| 38 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 39 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l14_s1.28B_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l14_s1.28B_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-14"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l14_s1.28B_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_1_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_1_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_1_s512m_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_224_s12.8B_bs90k/checkpoints/epoch_10.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 32
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
zeroshot_frequency: 1
|
| 36 |
+
save_every_n_steps: 3052
|
| 37 |
+
delete_prev_step_ckpt: true
|
| 38 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 39 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s1.28B_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s1.28B_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s1.28B_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s1.28B_bs16k_noidf.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s1.28B_bs16k_noidf"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s1.28B_bs16k_noidf/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s4B_bs32k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s4B_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s4B_bs32k/checkpoints/epoch_3.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 64
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
save_every_n_steps: 3052
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s4B_bs32k_2b.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s4B_bs32k_2b"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s4B_bs32k_2b/checkpoints/epoch_3.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 64
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
save_every_n_steps: 3052
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s4B_bs32k_3b.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s4B_bs32k_3b"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s4B_bs32k_3b/checkpoints/epoch_3.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 64
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
save_every_n_steps: 3052
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s4B_bs32k_cls.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s4B_bs32k_cls"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s4B_bs32k_cls/checkpoints/epoch_3.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 64
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
save_every_n_steps: 3052
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s512m_bs16k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_clip_vit_l16_s512m_bs16k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 16384
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 5e-4
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s512m_bs16k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 128
|
| 33 |
+
zeroshot_steps: 6104
|
| 34 |
+
val_steps: 6104
|
| 35 |
+
save_every_n_steps: 6104
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/lit_vit_l16_s512m_bs32k.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "lit_cls_vit_l16_s512m_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..51200}.tar'
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-avg"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
lock_image: true
|
| 25 |
+
lock_image_unlocked_groups: 1
|
| 26 |
+
pretrained_image: './logs/cls_vit_l16_s512m_bs32k/checkpoints/epoch_1.pt'
|
| 27 |
+
|
| 28 |
+
logs: './logs'
|
| 29 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 30 |
+
|
| 31 |
+
report_to: "tensorboard"
|
| 32 |
+
log_every_n_steps: 64
|
| 33 |
+
zeroshot_steps: 3052
|
| 34 |
+
val_steps: 3052
|
| 35 |
+
save_every_n_steps: 3052
|
| 36 |
+
delete_prev_step_ckpt: true
|
| 37 |
+
|
| 38 |
+
resume: latest
|
configs/cls_schedule/test.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_vit_l16_s512m_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 500
|
| 8 |
+
global_batch_size: 3200
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 64
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/exp_schedule/cls_vit_l16_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "clsclip_l16_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
coca_caption_loss_weight: 0.5
|
| 25 |
+
|
| 26 |
+
logs: './logs'
|
| 27 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 28 |
+
|
| 29 |
+
report_to: "tensorboard"
|
| 30 |
+
log_every_n_steps: 32
|
| 31 |
+
zeroshot_steps: 3052
|
| 32 |
+
val_steps: 3052
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 36 |
+
zeroshot_frequency: 1
|
| 37 |
+
resume: latest
|
configs/exp_schedule/cls_vit_l16_s12.8B_bs90k_w1.0.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "clsclip_l16_224_s12.8B_bs90k_w1.0"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
coca_caption_loss_weight: 1.0
|
| 25 |
+
|
| 26 |
+
logs: './logs'
|
| 27 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 28 |
+
|
| 29 |
+
report_to: "tensorboard"
|
| 30 |
+
log_every_n_steps: 32
|
| 31 |
+
zeroshot_steps: 3052
|
| 32 |
+
val_steps: 3052
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 36 |
+
zeroshot_frequency: 1
|
| 37 |
+
resume: latest
|
configs/exp_schedule/test.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "clsclip_test"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 8000
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
coca_caption_loss_weight: 0.5
|
| 25 |
+
|
| 26 |
+
logs: './logs'
|
| 27 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 28 |
+
|
| 29 |
+
report_to: "tensorboard"
|
| 30 |
+
log_every_n_steps: 32
|
| 31 |
+
zeroshot_steps: 3052
|
| 32 |
+
val_steps: 3052
|
| 33 |
+
save_every_n_steps: 3052
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 36 |
+
zeroshot_frequency: 1
|
| 37 |
+
resume: latest
|
configs/long_schedule/clip_test.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "clip_test"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/cls_test.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "cls_test"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..128000}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.98
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "CLS-ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 0
|
| 30 |
+
val_steps: 0
|
| 31 |
+
zeroshot_frequency: 0
|
| 32 |
+
val_frequency: 0
|
| 33 |
+
save_every_n_steps: 1500
|
| 34 |
+
delete_prev_step_ckpt: true
|
| 35 |
+
|
| 36 |
+
resume: latest
|
configs/long_schedule/vit_l16_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vit_l_224_s12.8B_bs90k_recap_cl128"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16-cl128"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vit_l16_224_s4B_bs32k.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vit_l16_224_s4B_bs32k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vit_l16_224_s4B_bs32k_3b.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vit_l16_224_s4B_bs32k_3B"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 32768
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vit_l16_224_s4B_bs64k.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vit_l16_224_s4B_bs64k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 1000
|
| 8 |
+
global_batch_size: 64000
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 3
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-16"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
# aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vit_l_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vit_l_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 10000
|
| 8 |
+
global_batch_size: 90112
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViT-L-14"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vitamin_l2_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l2_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 4436 # 400M
|
| 8 |
+
global_batch_size: 90160
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 2.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViTamin-L2"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 500
|
| 30 |
+
val_steps: 500
|
| 31 |
+
save_every_n_steps: 500
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vitamin_l2_224_s12.8B_bs90k_ft256.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l2_224_s12.8B_bs90k_ft256"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 0
|
| 8 |
+
global_batch_size: 90160
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1.0e-5
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViTamin-L2-256"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 256
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 200
|
| 30 |
+
val_steps: 200
|
| 31 |
+
save_every_n_steps: 200
|
| 32 |
+
delete_prev_step_ckpt: false
|
| 33 |
+
zeroshot_frequency: 1
|
| 34 |
+
resume: latest
|
| 35 |
+
|
| 36 |
+
pretrained: './logs/vitamin_l2_224_s12.8B_bs90k/checkpoints/epoch_10.pt' # please modify to your own path
|
| 37 |
+
pretrained_optim_scaler: false
|
| 38 |
+
lr_scheduler: const
|
| 39 |
+
wd: 0.0
|
configs/long_schedule/vitamin_l2_224_s12.8B_bs90k_ft336.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l2_224_s12.8B_bs90k_ft336"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 0
|
| 8 |
+
global_batch_size: 90160
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1.0e-5
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViTamin-L2-336"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 336
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 200
|
| 30 |
+
val_steps: 200
|
| 31 |
+
save_every_n_steps: 200
|
| 32 |
+
delete_prev_step_ckpt: false
|
| 33 |
+
zeroshot_frequency: 1
|
| 34 |
+
resume: latest
|
| 35 |
+
|
| 36 |
+
pretrained: './logs/vitamin_l2_224_s12.8B_bs90k/checkpoints/epoch_10.pt' # please modify to your own path
|
| 37 |
+
pretrained_optim_scaler: false
|
| 38 |
+
lr_scheduler: const
|
| 39 |
+
wd: 0.0
|
configs/long_schedule/vitamin_l2_224_s12.8B_bs90k_ft384.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l2_224_s12.8B_bs90k_ft384"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 0
|
| 8 |
+
global_batch_size: 90160 # 322*280gpu = 90160
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1.0e-5
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "ViTamin-L2-384"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 384
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 200
|
| 30 |
+
val_steps: 200
|
| 31 |
+
save_every_n_steps: 200
|
| 32 |
+
delete_prev_step_ckpt: false
|
| 33 |
+
zeroshot_frequency: 1
|
| 34 |
+
resume: latest
|
| 35 |
+
|
| 36 |
+
pretrained: './logs/vitamin_l2_224_s12.8B_bs90k/checkpoints/epoch_10.pt' # please modify to your own path
|
| 37 |
+
pretrained_optim_scaler: false
|
| 38 |
+
lr_scheduler: const
|
| 39 |
+
wd: 0.0
|
configs/long_schedule/vitamin_l_224_s12.8B_bs90k.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l_224_s12.8B_bs90k"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 782
|
| 8 |
+
global_batch_size: 90160
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 2.0e-3
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "vit_l16_mbconv_glu_d31_224"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 224
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 3052
|
| 30 |
+
val_steps: 3052
|
| 31 |
+
save_every_n_steps: 3052
|
| 32 |
+
delete_prev_step_ckpt: true
|
| 33 |
+
aug_cfg: {'scale': [0.4, 1.0], 'color_jitter': [0.32, 0.32, 0.32, 0.08], 'color_jitter_prob': 0.8, 'gray_scale_prob': 0.2}
|
| 34 |
+
zeroshot_frequency: 1
|
| 35 |
+
resume: latest
|
configs/long_schedule/vitamin_l_224_s12.8B_bs90k_ft256.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l_224_s12.8B_bs90k_ft256"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 0
|
| 8 |
+
global_batch_size: 90240
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 1
|
| 11 |
+
lr: 1.0e-5
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "vit_l16_mbconv_glu_d31_ft256"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 256
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 200
|
| 30 |
+
val_steps: 200
|
| 31 |
+
save_every_n_steps: 200
|
| 32 |
+
delete_prev_step_ckpt: false
|
| 33 |
+
zeroshot_frequency: 1
|
| 34 |
+
resume: latest
|
| 35 |
+
|
| 36 |
+
pretrained: './logs/vitamin_l_224_s12.8B_bs90k/checkpoints/epoch_10.pt' # please modify to your own path
|
| 37 |
+
pretrained_optim_scaler: false
|
| 38 |
+
lr_scheduler: const
|
| 39 |
+
wd: 0.0
|
configs/long_schedule/vitamin_l_224_s12.8B_bs90k_ft336.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l_224_s12.8B_bs90k_ft336"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 512_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 0
|
| 8 |
+
global_batch_size: 90200
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-5
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "vit_l16_mbconv_glu_d31_ft336"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 336
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 500
|
| 30 |
+
val_steps: 500
|
| 31 |
+
save_every_n_steps: 500
|
| 32 |
+
delete_prev_step_ckpt: false
|
| 33 |
+
zeroshot_frequency: 1
|
| 34 |
+
resume: latest
|
| 35 |
+
|
| 36 |
+
pretrained: './logs/vitamin_l_224_s12.8B_bs90k/checkpoints/epoch_10.pt' # please modify to your own path
|
| 37 |
+
pretrained_optim_scaler: false
|
| 38 |
+
lr_scheduler: const
|
| 39 |
+
wd: 0.0
|
configs/long_schedule/vitamin_l_224_s12.8B_bs90k_ft384.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
save_frequency: 1
|
| 2 |
+
name: "vitamin_l_224_s12.8B_bs90k_ft384"
|
| 3 |
+
train_data: '/datasets/datacomp_1b/data/{00000..140089}.tar' # please modify to your own path
|
| 4 |
+
train_num_samples: 1_280_000_000
|
| 5 |
+
dataset_type: webdataset
|
| 6 |
+
precision: 'amp_bfloat16'
|
| 7 |
+
warmup: 0
|
| 8 |
+
global_batch_size: 90200
|
| 9 |
+
batch_size: 0
|
| 10 |
+
epochs: 10
|
| 11 |
+
lr: 1.0e-5
|
| 12 |
+
beta1: 0.9
|
| 13 |
+
beta2: 0.95
|
| 14 |
+
eps: 1.0e-6
|
| 15 |
+
workers: 6
|
| 16 |
+
model: "vit_l16_mbconv_glu_d31_ft384"
|
| 17 |
+
seed: 0
|
| 18 |
+
ddp_static_graph: true
|
| 19 |
+
local_loss: true
|
| 20 |
+
gather_with_grad: true
|
| 21 |
+
force_image_size: 384
|
| 22 |
+
grad_checkpointing: true
|
| 23 |
+
|
| 24 |
+
logs: './logs'
|
| 25 |
+
imagenet_val: './imagenet1k/val' # please modify to your own path
|
| 26 |
+
|
| 27 |
+
report_to: "tensorboard"
|
| 28 |
+
log_every_n_steps: 32
|
| 29 |
+
zeroshot_steps: 500
|
| 30 |
+
val_steps: 500
|
| 31 |
+
save_every_n_steps: 500
|
| 32 |
+
delete_prev_step_ckpt: false
|
| 33 |
+
zeroshot_frequency: 1
|
| 34 |
+
resume: latest
|
| 35 |
+
|
| 36 |
+
pretrained: './logs/vitamin_l_224_s12.8B_bs90k/checkpoints/epoch_10.pt' # please modify to your own path
|
| 37 |
+
pretrained_optim_scaler: false
|
| 38 |
+
lr_scheduler: const
|
| 39 |
+
wd: 0.0
|