Upload checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins
Browse files
checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/wandb/offline-run-20260127_015413-checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins-run0/files/output.log
CHANGED
|
@@ -1008,6 +1008,53 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1008 |
[[34m2026-01-27 03:48:48[39m] (step=0000997) Train Loss mse: 0.0083, Train Loss ce: 0.1699, Train Steps/Sec: 0.15,
|
| 1009 |
[[34m2026-01-27 03:48:54[39m] (step=0000998) Train Loss mse: 0.0078, Train Loss ce: 0.1487, Train Steps/Sec: 0.15,
|
| 1010 |
[[34m2026-01-27 03:49:01[39m] (step=0000999) Train Loss mse: 0.0096, Train Loss ce: 0.1769, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1011 |
FullyShardedDataParallel(
|
| 1012 |
(_fsdp_wrapped_module): Bagel(
|
| 1013 |
(language_model): Qwen2ForCausalLM(
|
|
@@ -1194,13 +1241,6 @@ Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_mov
|
|
| 1194 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1195 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1196 |
ce_avg: 0.20799879729747772, mse_avg: 0.007969205267727375
|
| 1197 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step1000
|
| 1198 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 1199 |
-
[eval debug] first 3 batch fingerprints:
|
| 1200 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1201 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1202 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1203 |
-
ce_avg: 0.17712201178073883, mse_avg: 0.00836949609220028
|
| 1204 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step1500
|
| 1205 |
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 1206 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -1215,53 +1255,6 @@ Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_mov
|
|
| 1215 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1216 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1217 |
ce_avg: 0.1749173104763031, mse_avg: 0.009318988770246506
|
| 1218 |
-
[[34m2026-01-27 03:49:39[39m] (step=0001000) Train Loss mse: 0.0100, Train Loss ce: 0.1701, Train Steps/Sec: 0.03,
|
| 1219 |
-
[[34m2026-01-27 03:49:45[39m] (step=0001001) Train Loss mse: 0.0078, Train Loss ce: 0.1978, Train Steps/Sec: 0.16,
|
| 1220 |
-
[[34m2026-01-27 03:49:51[39m] (step=0001002) Train Loss mse: 0.0093, Train Loss ce: 0.1890, Train Steps/Sec: 0.16,
|
| 1221 |
-
[[34m2026-01-27 03:49:58[39m] (step=0001003) Train Loss mse: 0.0089, Train Loss ce: 0.1751, Train Steps/Sec: 0.16,
|
| 1222 |
-
[[34m2026-01-27 03:50:04[39m] (step=0001004) Train Loss mse: 0.0100, Train Loss ce: 0.1660, Train Steps/Sec: 0.16,
|
| 1223 |
-
[[34m2026-01-27 03:50:10[39m] (step=0001005) Train Loss mse: 0.0092, Train Loss ce: 0.1881, Train Steps/Sec: 0.16,
|
| 1224 |
-
[[34m2026-01-27 03:50:17[39m] (step=0001006) Train Loss mse: 0.0088, Train Loss ce: 0.1926, Train Steps/Sec: 0.15,
|
| 1225 |
-
[[34m2026-01-27 03:50:24[39m] (step=0001007) Train Loss mse: 0.0083, Train Loss ce: 0.1839, Train Steps/Sec: 0.15,
|
| 1226 |
-
[[34m2026-01-27 03:50:30[39m] (step=0001008) Train Loss mse: 0.0097, Train Loss ce: 0.2083, Train Steps/Sec: 0.16,
|
| 1227 |
-
[[34m2026-01-27 03:50:37[39m] (step=0001009) Train Loss mse: 0.0083, Train Loss ce: 0.1949, Train Steps/Sec: 0.16,
|
| 1228 |
-
[[34m2026-01-27 03:50:43[39m] (step=0001010) Train Loss mse: 0.0074, Train Loss ce: 0.1642, Train Steps/Sec: 0.16,
|
| 1229 |
-
[[34m2026-01-27 03:50:49[39m] (step=0001011) Train Loss mse: 0.0095, Train Loss ce: 0.2152, Train Steps/Sec: 0.16,
|
| 1230 |
-
[[34m2026-01-27 03:50:56[39m] (step=0001012) Train Loss mse: 0.0085, Train Loss ce: 0.1922, Train Steps/Sec: 0.16,
|
| 1231 |
-
[[34m2026-01-27 03:51:02[39m] (step=0001013) Train Loss mse: 0.0084, Train Loss ce: 0.1974, Train Steps/Sec: 0.16,
|
| 1232 |
-
[[34m2026-01-27 03:51:09[39m] (step=0001014) Train Loss mse: 0.0080, Train Loss ce: 0.1732, Train Steps/Sec: 0.15,
|
| 1233 |
-
[[34m2026-01-27 03:51:15[39m] (step=0001015) Train Loss mse: 0.0084, Train Loss ce: 0.2082, Train Steps/Sec: 0.16,
|
| 1234 |
-
[[34m2026-01-27 03:51:22[39m] (step=0001016) Train Loss mse: 0.0088, Train Loss ce: 0.1730, Train Steps/Sec: 0.15,
|
| 1235 |
-
[[34m2026-01-27 03:51:28[39m] (step=0001017) Train Loss mse: 0.0093, Train Loss ce: 0.2090, Train Steps/Sec: 0.16,
|
| 1236 |
-
[[34m2026-01-27 03:51:34[39m] (step=0001018) Train Loss mse: 0.0091, Train Loss ce: 0.2026, Train Steps/Sec: 0.16,
|
| 1237 |
-
[[34m2026-01-27 03:51:41[39m] (step=0001019) Train Loss mse: 0.0095, Train Loss ce: 0.1736, Train Steps/Sec: 0.16,
|
| 1238 |
-
[[34m2026-01-27 03:51:47[39m] (step=0001020) Train Loss mse: 0.0092, Train Loss ce: 0.2094, Train Steps/Sec: 0.16,
|
| 1239 |
-
[[34m2026-01-27 03:51:53[39m] (step=0001021) Train Loss mse: 0.0091, Train Loss ce: 0.1846, Train Steps/Sec: 0.16,
|
| 1240 |
-
[[34m2026-01-27 03:52:00[39m] (step=0001022) Train Loss mse: 0.0093, Train Loss ce: 0.1848, Train Steps/Sec: 0.16,
|
| 1241 |
-
[[34m2026-01-27 03:52:06[39m] (step=0001023) Train Loss mse: 0.0078, Train Loss ce: 0.1752, Train Steps/Sec: 0.15,
|
| 1242 |
-
[[34m2026-01-27 03:52:13[39m] (step=0001024) Train Loss mse: 0.0087, Train Loss ce: 0.1691, Train Steps/Sec: 0.14,
|
| 1243 |
-
[[34m2026-01-27 03:52:20[39m] (step=0001025) Train Loss mse: 0.0084, Train Loss ce: 0.1881, Train Steps/Sec: 0.16,
|
| 1244 |
-
[[34m2026-01-27 03:52:26[39m] (step=0001026) Train Loss mse: 0.0088, Train Loss ce: 0.2076, Train Steps/Sec: 0.16,
|
| 1245 |
-
[[34m2026-01-27 03:52:32[39m] (step=0001027) Train Loss mse: 0.0108, Train Loss ce: 0.1895, Train Steps/Sec: 0.16,
|
| 1246 |
-
[[34m2026-01-27 03:52:39[39m] (step=0001028) Train Loss mse: 0.0085, Train Loss ce: 0.1782, Train Steps/Sec: 0.16,
|
| 1247 |
-
[[34m2026-01-27 03:52:45[39m] (step=0001029) Train Loss mse: 0.0085, Train Loss ce: 0.1808, Train Steps/Sec: 0.16,
|
| 1248 |
-
[[34m2026-01-27 03:52:51[39m] (step=0001030) Train Loss mse: 0.0106, Train Loss ce: 0.1796, Train Steps/Sec: 0.18,
|
| 1249 |
-
[[34m2026-01-27 03:52:57[39m] (step=0001031) Train Loss mse: 0.0087, Train Loss ce: 0.1868, Train Steps/Sec: 0.16,
|
| 1250 |
-
[[34m2026-01-27 03:53:04[39m] (step=0001032) Train Loss mse: 0.0089, Train Loss ce: 0.2020, Train Steps/Sec: 0.15,
|
| 1251 |
-
[[34m2026-01-27 03:53:11[39m] (step=0001033) Train Loss mse: 0.0086, Train Loss ce: 0.1978, Train Steps/Sec: 0.14,
|
| 1252 |
-
[[34m2026-01-27 03:53:17[39m] (step=0001034) Train Loss mse: 0.0092, Train Loss ce: 0.1917, Train Steps/Sec: 0.15,
|
| 1253 |
-
[[34m2026-01-27 03:53:24[39m] (step=0001035) Train Loss mse: 0.0099, Train Loss ce: 0.1958, Train Steps/Sec: 0.16,
|
| 1254 |
-
[[34m2026-01-27 03:53:30[39m] (step=0001036) Train Loss mse: 0.0081, Train Loss ce: 0.1909, Train Steps/Sec: 0.16,
|
| 1255 |
-
[[34m2026-01-27 03:53:36[39m] (step=0001037) Train Loss mse: 0.0079, Train Loss ce: 0.1868, Train Steps/Sec: 0.16,
|
| 1256 |
-
[[34m2026-01-27 03:53:43[39m] (step=0001038) Train Loss mse: 0.0103, Train Loss ce: 0.1744, Train Steps/Sec: 0.16,
|
| 1257 |
-
[[34m2026-01-27 03:53:49[39m] (step=0001039) Train Loss mse: 0.0090, Train Loss ce: 0.2039, Train Steps/Sec: 0.16,
|
| 1258 |
-
[[34m2026-01-27 03:53:55[39m] (step=0001040) Train Loss mse: 0.0081, Train Loss ce: 0.1573, Train Steps/Sec: 0.16,
|
| 1259 |
-
[[34m2026-01-27 03:54:02[39m] (step=0001041) Train Loss mse: 0.0091, Train Loss ce: 0.1810, Train Steps/Sec: 0.15,
|
| 1260 |
-
[[34m2026-01-27 03:54:09[39m] (step=0001042) Train Loss mse: 0.0087, Train Loss ce: 0.2339, Train Steps/Sec: 0.15,
|
| 1261 |
-
[[34m2026-01-27 03:54:15[39m] (step=0001043) Train Loss mse: 0.0082, Train Loss ce: 0.1785, Train Steps/Sec: 0.15,
|
| 1262 |
-
[[34m2026-01-27 03:54:22[39m] (step=0001044) Train Loss mse: 0.0112, Train Loss ce: 0.2073, Train Steps/Sec: 0.16,
|
| 1263 |
-
[[34m2026-01-27 03:54:28[39m] (step=0001045) Train Loss mse: 0.0096, Train Loss ce: 0.1835, Train Steps/Sec: 0.16,
|
| 1264 |
-
[[34m2026-01-27 03:54:34[39m] (step=0001046) Train Loss mse: 0.0086, Train Loss ce: 0.1964, Train Steps/Sec: 0.16,
|
| 1265 |
[[34m2026-01-27 03:54:41[39m] (step=0001047) Train Loss mse: 0.0085, Train Loss ce: 0.1884, Train Steps/Sec: 0.16,
|
| 1266 |
[[34m2026-01-27 03:54:47[39m] (step=0001048) Train Loss mse: 0.0083, Train Loss ce: 0.2145, Train Steps/Sec: 0.16,
|
| 1267 |
[[34m2026-01-27 03:54:53[39m] (step=0001049) Train Loss mse: 0.0093, Train Loss ce: 0.1888, Train Steps/Sec: 0.16,
|
|
@@ -2610,20 +2603,6 @@ ce_avg: 0.1749173104763031, mse_avg: 0.009318988770246506
|
|
| 2610 |
[[34m2026-01-27 06:21:07[39m] (step=0002392) Train Loss mse: 0.0086, Train Loss ce: 0.1506, Train Steps/Sec: 0.16,
|
| 2611 |
[[34m2026-01-27 06:21:14[39m] (step=0002393) Train Loss mse: 0.0084, Train Loss ce: 0.1657, Train Steps/Sec: 0.15,
|
| 2612 |
[[34m2026-01-27 06:21:20[39m] (step=0002394) Train Loss mse: 0.0074, Train Loss ce: 0.1535, Train Steps/Sec: 0.15,
|
| 2613 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step2500
|
| 2614 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 2615 |
-
[eval debug] first 3 batch fingerprints:
|
| 2616 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2617 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2618 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2619 |
-
ce_avg: 0.16938790678977966, mse_avg: 0.009705973789095879
|
| 2620 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step3000
|
| 2621 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 2622 |
-
[eval debug] first 3 batch fingerprints:
|
| 2623 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2624 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2625 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2626 |
-
ce_avg: 0.1402648687362671, mse_avg: 0.007266872562468052
|
| 2627 |
[[34m2026-01-27 06:21:27[39m] (step=0002395) Train Loss mse: 0.0083, Train Loss ce: 0.1900, Train Steps/Sec: 0.16,
|
| 2628 |
[[34m2026-01-27 06:21:34[39m] (step=0002396) Train Loss mse: 0.0074, Train Loss ce: 0.1865, Train Steps/Sec: 0.15,
|
| 2629 |
[[34m2026-01-27 06:21:40[39m] (step=0002397) Train Loss mse: 0.0096, Train Loss ce: 0.1719, Train Steps/Sec: 0.15,
|
|
@@ -2723,6 +2702,27 @@ ce_avg: 0.1402648687362671, mse_avg: 0.007266872562468052
|
|
| 2723 |
[[34m2026-01-27 06:31:50[39m] (step=0002491) Train Loss mse: 0.0079, Train Loss ce: 0.1600, Train Steps/Sec: 0.16,
|
| 2724 |
[[34m2026-01-27 06:31:57[39m] (step=0002492) Train Loss mse: 0.0075, Train Loss ce: 0.1430, Train Steps/Sec: 0.15,
|
| 2725 |
[[34m2026-01-27 06:32:03[39m] (step=0002493) Train Loss mse: 0.0081, Train Loss ce: 0.1674, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2726 |
[[34m2026-01-27 06:32:10[39m] (step=0002494) Train Loss mse: 0.0080, Train Loss ce: 0.1591, Train Steps/Sec: 0.15,
|
| 2727 |
[[34m2026-01-27 06:32:17[39m] (step=0002495) Train Loss mse: 0.0089, Train Loss ce: 0.1406, Train Steps/Sec: 0.15,
|
| 2728 |
[[34m2026-01-27 06:32:23[39m] (step=0002496) Train Loss mse: 0.0077, Train Loss ce: 0.1890, Train Steps/Sec: 0.15,
|
|
@@ -3751,6 +3751,20 @@ ce_avg: 0.1402648687362671, mse_avg: 0.007266872562468052
|
|
| 3751 |
[[34m2026-01-27 08:26:36[39m] (step=0003516) Train Loss mse: 0.0078, Train Loss ce: 0.1360, Train Steps/Sec: 0.16,
|
| 3752 |
[[34m2026-01-27 08:26:43[39m] (step=0003517) Train Loss mse: 0.0089, Train Loss ce: 0.1218, Train Steps/Sec: 0.16,
|
| 3753 |
[[34m2026-01-27 08:26:49[39m] (step=0003518) Train Loss mse: 0.0083, Train Loss ce: 0.1212, Train Steps/Sec: 0.16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3754 |
[[34m2026-01-27 08:26:56[39m] (step=0003519) Train Loss mse: 0.0099, Train Loss ce: 0.1251, Train Steps/Sec: 0.16,
|
| 3755 |
[[34m2026-01-27 08:27:02[39m] (step=0003520) Train Loss mse: 0.0078, Train Loss ce: 0.1416, Train Steps/Sec: 0.16,
|
| 3756 |
[[34m2026-01-27 08:27:08[39m] (step=0003521) Train Loss mse: 0.0084, Train Loss ce: 0.1459, Train Steps/Sec: 0.16,
|
|
@@ -3807,27 +3821,6 @@ ce_avg: 0.1402648687362671, mse_avg: 0.007266872562468052
|
|
| 3807 |
[[34m2026-01-27 08:32:34[39m] (step=0003572) Train Loss mse: 0.0101, Train Loss ce: 0.1471, Train Steps/Sec: 0.18,
|
| 3808 |
[[34m2026-01-27 08:32:40[39m] (step=0003573) Train Loss mse: 0.0079, Train Loss ce: 0.1128, Train Steps/Sec: 0.16,
|
| 3809 |
[[34m2026-01-27 08:32:46[39m] (step=0003574) Train Loss mse: 0.0077, Train Loss ce: 0.1507, Train Steps/Sec: 0.16,
|
| 3810 |
-
[
|
| 3811 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step3500
|
| 3812 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 3813 |
-
[eval debug] first 3 batch fingerprints:
|
| 3814 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3815 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3816 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3817 |
-
ce_avg: 0.13325323164463043, mse_avg: 0.007202878128737211
|
| 3818 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step4000
|
| 3819 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 3820 |
-
[eval debug] first 3 batch fingerprints:
|
| 3821 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3822 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3823 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3824 |
-
ce_avg: 0.12750361859798431, mse_avg: 0.007511829491704702
|
| 3825 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step4500
|
| 3826 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 3827 |
-
[eval debug] first 3 batch fingerprints:
|
| 3828 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3829 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3830 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3831 |
[[34m2026-01-27 08:32:53[39m] (step=0003575) Train Loss mse: 0.0074, Train Loss ce: 0.1406, Train Steps/Sec: 0.15,
|
| 3832 |
[[34m2026-01-27 08:33:00[39m] (step=0003576) Train Loss mse: 0.0093, Train Loss ce: 0.1464, Train Steps/Sec: 0.15,
|
| 3833 |
[[34m2026-01-27 08:33:06[39m] (step=0003577) Train Loss mse: 0.0077, Train Loss ce: 0.1396, Train Steps/Sec: 0.15,
|
|
@@ -4953,13 +4946,6 @@ Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_mov
|
|
| 4953 |
[[34m2026-01-27 10:34:38[39m] (step=0004697) Train Loss mse: 0.0079, Train Loss ce: 0.1542, Train Steps/Sec: 0.15,
|
| 4954 |
[[34m2026-01-27 10:34:44[39m] (step=0004698) Train Loss mse: 0.0085, Train Loss ce: 0.1140, Train Steps/Sec: 0.16,
|
| 4955 |
[[34m2026-01-27 10:34:51[39m] (step=0004699) Train Loss mse: 0.0074, Train Loss ce: 0.1132, Train Steps/Sec: 0.15,
|
| 4956 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step5000
|
| 4957 |
-
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 4958 |
-
[eval debug] first 3 batch fingerprints:
|
| 4959 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 4960 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 4961 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 4962 |
-
ce_avg: 0.1215158998966217, mse_avg: 0.0074405064806342125
|
| 4963 |
[[34m2026-01-27 10:34:58[39m] (step=0004700) Train Loss mse: 0.0072, Train Loss ce: 0.1064, Train Steps/Sec: 0.14,
|
| 4964 |
[[34m2026-01-27 10:35:04[39m] (step=0004701) Train Loss mse: 0.0087, Train Loss ce: 0.1294, Train Steps/Sec: 0.16,
|
| 4965 |
[[34m2026-01-27 10:35:11[39m] (step=0004702) Train Loss mse: 0.0079, Train Loss ce: 0.1206, Train Steps/Sec: 0.16,
|
|
@@ -5262,4 +5248,11 @@ ce_avg: 0.1215158998966217, mse_avg: 0.0074405064806342125
|
|
| 5262 |
[[34m2026-01-27 11:07:18[39m] (step=0004999) Train Loss mse: 0.0092, Train Loss ce: 0.1221, Train Steps/Sec: 0.16,
|
| 5263 |
[[34m2026-01-27 11:07:56[39m] (step=0005000) Train Loss mse: 0.0077, Train Loss ce: 0.1160, Train Steps/Sec: 0.03,
|
| 5264 |
[[34m2026-01-27 11:07:56[39m] Saving checkpoint to /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/0005000.
|
| 5265 |
-
[[34m2026-01-27 11:10:32[39m] Done!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1008 |
[[34m2026-01-27 03:48:48[39m] (step=0000997) Train Loss mse: 0.0083, Train Loss ce: 0.1699, Train Steps/Sec: 0.15,
|
| 1009 |
[[34m2026-01-27 03:48:54[39m] (step=0000998) Train Loss mse: 0.0078, Train Loss ce: 0.1487, Train Steps/Sec: 0.15,
|
| 1010 |
[[34m2026-01-27 03:49:01[39m] (step=0000999) Train Loss mse: 0.0096, Train Loss ce: 0.1769, Train Steps/Sec: 0.15,
|
| 1011 |
+
[[34m2026-01-27 03:49:39[39m] (step=0001000) Train Loss mse: 0.0100, Train Loss ce: 0.1701, Train Steps/Sec: 0.03,
|
| 1012 |
+
[[34m2026-01-27 03:49:45[39m] (step=0001001) Train Loss mse: 0.0078, Train Loss ce: 0.1978, Train Steps/Sec: 0.16,
|
| 1013 |
+
[[34m2026-01-27 03:49:51[39m] (step=0001002) Train Loss mse: 0.0093, Train Loss ce: 0.1890, Train Steps/Sec: 0.16,
|
| 1014 |
+
[[34m2026-01-27 03:49:58[39m] (step=0001003) Train Loss mse: 0.0089, Train Loss ce: 0.1751, Train Steps/Sec: 0.16,
|
| 1015 |
+
[[34m2026-01-27 03:50:04[39m] (step=0001004) Train Loss mse: 0.0100, Train Loss ce: 0.1660, Train Steps/Sec: 0.16,
|
| 1016 |
+
[[34m2026-01-27 03:50:10[39m] (step=0001005) Train Loss mse: 0.0092, Train Loss ce: 0.1881, Train Steps/Sec: 0.16,
|
| 1017 |
+
[[34m2026-01-27 03:50:17[39m] (step=0001006) Train Loss mse: 0.0088, Train Loss ce: 0.1926, Train Steps/Sec: 0.15,
|
| 1018 |
+
[[34m2026-01-27 03:50:24[39m] (step=0001007) Train Loss mse: 0.0083, Train Loss ce: 0.1839, Train Steps/Sec: 0.15,
|
| 1019 |
+
[[34m2026-01-27 03:50:30[39m] (step=0001008) Train Loss mse: 0.0097, Train Loss ce: 0.2083, Train Steps/Sec: 0.16,
|
| 1020 |
+
[[34m2026-01-27 03:50:37[39m] (step=0001009) Train Loss mse: 0.0083, Train Loss ce: 0.1949, Train Steps/Sec: 0.16,
|
| 1021 |
+
[[34m2026-01-27 03:50:43[39m] (step=0001010) Train Loss mse: 0.0074, Train Loss ce: 0.1642, Train Steps/Sec: 0.16,
|
| 1022 |
+
[[34m2026-01-27 03:50:49[39m] (step=0001011) Train Loss mse: 0.0095, Train Loss ce: 0.2152, Train Steps/Sec: 0.16,
|
| 1023 |
+
[[34m2026-01-27 03:50:56[39m] (step=0001012) Train Loss mse: 0.0085, Train Loss ce: 0.1922, Train Steps/Sec: 0.16,
|
| 1024 |
+
[[34m2026-01-27 03:51:02[39m] (step=0001013) Train Loss mse: 0.0084, Train Loss ce: 0.1974, Train Steps/Sec: 0.16,
|
| 1025 |
+
[[34m2026-01-27 03:51:09[39m] (step=0001014) Train Loss mse: 0.0080, Train Loss ce: 0.1732, Train Steps/Sec: 0.15,
|
| 1026 |
+
[[34m2026-01-27 03:51:15[39m] (step=0001015) Train Loss mse: 0.0084, Train Loss ce: 0.2082, Train Steps/Sec: 0.16,
|
| 1027 |
+
[[34m2026-01-27 03:51:22[39m] (step=0001016) Train Loss mse: 0.0088, Train Loss ce: 0.1730, Train Steps/Sec: 0.15,
|
| 1028 |
+
[[34m2026-01-27 03:51:28[39m] (step=0001017) Train Loss mse: 0.0093, Train Loss ce: 0.2090, Train Steps/Sec: 0.16,
|
| 1029 |
+
[[34m2026-01-27 03:51:34[39m] (step=0001018) Train Loss mse: 0.0091, Train Loss ce: 0.2026, Train Steps/Sec: 0.16,
|
| 1030 |
+
[[34m2026-01-27 03:51:41[39m] (step=0001019) Train Loss mse: 0.0095, Train Loss ce: 0.1736, Train Steps/Sec: 0.16,
|
| 1031 |
+
[[34m2026-01-27 03:51:47[39m] (step=0001020) Train Loss mse: 0.0092, Train Loss ce: 0.2094, Train Steps/Sec: 0.16,
|
| 1032 |
+
[[34m2026-01-27 03:51:53[39m] (step=0001021) Train Loss mse: 0.0091, Train Loss ce: 0.1846, Train Steps/Sec: 0.16,
|
| 1033 |
+
[[34m2026-01-27 03:52:00[39m] (step=0001022) Train Loss mse: 0.0093, Train Loss ce: 0.1848, Train Steps/Sec: 0.16,
|
| 1034 |
+
[[34m2026-01-27 03:52:06[39m] (step=0001023) Train Loss mse: 0.0078, Train Loss ce: 0.1752, Train Steps/Sec: 0.15,
|
| 1035 |
+
[[34m2026-01-27 03:52:13[39m] (step=0001024) Train Loss mse: 0.0087, Train Loss ce: 0.1691, Train Steps/Sec: 0.14,
|
| 1036 |
+
[[34m2026-01-27 03:52:20[39m] (step=0001025) Train Loss mse: 0.0084, Train Loss ce: 0.1881, Train Steps/Sec: 0.16,
|
| 1037 |
+
[[34m2026-01-27 03:52:26[39m] (step=0001026) Train Loss mse: 0.0088, Train Loss ce: 0.2076, Train Steps/Sec: 0.16,
|
| 1038 |
+
[[34m2026-01-27 03:52:32[39m] (step=0001027) Train Loss mse: 0.0108, Train Loss ce: 0.1895, Train Steps/Sec: 0.16,
|
| 1039 |
+
[[34m2026-01-27 03:52:39[39m] (step=0001028) Train Loss mse: 0.0085, Train Loss ce: 0.1782, Train Steps/Sec: 0.16,
|
| 1040 |
+
[[34m2026-01-27 03:52:45[39m] (step=0001029) Train Loss mse: 0.0085, Train Loss ce: 0.1808, Train Steps/Sec: 0.16,
|
| 1041 |
+
[[34m2026-01-27 03:52:51[39m] (step=0001030) Train Loss mse: 0.0106, Train Loss ce: 0.1796, Train Steps/Sec: 0.18,
|
| 1042 |
+
[[34m2026-01-27 03:52:57[39m] (step=0001031) Train Loss mse: 0.0087, Train Loss ce: 0.1868, Train Steps/Sec: 0.16,
|
| 1043 |
+
[[34m2026-01-27 03:53:04[39m] (step=0001032) Train Loss mse: 0.0089, Train Loss ce: 0.2020, Train Steps/Sec: 0.15,
|
| 1044 |
+
[[34m2026-01-27 03:53:11[39m] (step=0001033) Train Loss mse: 0.0086, Train Loss ce: 0.1978, Train Steps/Sec: 0.14,
|
| 1045 |
+
[[34m2026-01-27 03:53:17[39m] (step=0001034) Train Loss mse: 0.0092, Train Loss ce: 0.1917, Train Steps/Sec: 0.15,
|
| 1046 |
+
[[34m2026-01-27 03:53:24[39m] (step=0001035) Train Loss mse: 0.0099, Train Loss ce: 0.1958, Train Steps/Sec: 0.16,
|
| 1047 |
+
[[34m2026-01-27 03:53:30[39m] (step=0001036) Train Loss mse: 0.0081, Train Loss ce: 0.1909, Train Steps/Sec: 0.16,
|
| 1048 |
+
[[34m2026-01-27 03:53:36[39m] (step=0001037) Train Loss mse: 0.0079, Train Loss ce: 0.1868, Train Steps/Sec: 0.16,
|
| 1049 |
+
[[34m2026-01-27 03:53:43[39m] (step=0001038) Train Loss mse: 0.0103, Train Loss ce: 0.1744, Train Steps/Sec: 0.16,
|
| 1050 |
+
[[34m2026-01-27 03:53:49[39m] (step=0001039) Train Loss mse: 0.0090, Train Loss ce: 0.2039, Train Steps/Sec: 0.16,
|
| 1051 |
+
[[34m2026-01-27 03:53:55[39m] (step=0001040) Train Loss mse: 0.0081, Train Loss ce: 0.1573, Train Steps/Sec: 0.16,
|
| 1052 |
+
[[34m2026-01-27 03:54:02[39m] (step=0001041) Train Loss mse: 0.0091, Train Loss ce: 0.1810, Train Steps/Sec: 0.15,
|
| 1053 |
+
[[34m2026-01-27 03:54:09[39m] (step=0001042) Train Loss mse: 0.0087, Train Loss ce: 0.2339, Train Steps/Sec: 0.15,
|
| 1054 |
+
[[34m2026-01-27 03:54:15[39m] (step=0001043) Train Loss mse: 0.0082, Train Loss ce: 0.1785, Train Steps/Sec: 0.15,
|
| 1055 |
+
[[34m2026-01-27 03:54:22[39m] (step=0001044) Train Loss mse: 0.0112, Train Loss ce: 0.2073, Train Steps/Sec: 0.16,
|
| 1056 |
+
[[34m2026-01-27 03:54:28[39m] (step=0001045) Train Loss mse: 0.0096, Train Loss ce: 0.1835, Train Steps/Sec: 0.16,
|
| 1057 |
+
[[34m2026-01-27 03:54:34[39m] (step=0001046) Train Loss mse: 0.0086, Train Loss ce: 0.1964, Train Steps/Sec: 0.16,
|
| 1058 |
FullyShardedDataParallel(
|
| 1059 |
(_fsdp_wrapped_module): Bagel(
|
| 1060 |
(language_model): Qwen2ForCausalLM(
|
|
|
|
| 1241 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1242 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1243 |
ce_avg: 0.20799879729747772, mse_avg: 0.007969205267727375
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1244 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step1500
|
| 1245 |
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 1246 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 1255 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1256 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 1257 |
ce_avg: 0.1749173104763031, mse_avg: 0.009318988770246506
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1258 |
[[34m2026-01-27 03:54:41[39m] (step=0001047) Train Loss mse: 0.0085, Train Loss ce: 0.1884, Train Steps/Sec: 0.16,
|
| 1259 |
[[34m2026-01-27 03:54:47[39m] (step=0001048) Train Loss mse: 0.0083, Train Loss ce: 0.2145, Train Steps/Sec: 0.16,
|
| 1260 |
[[34m2026-01-27 03:54:53[39m] (step=0001049) Train Loss mse: 0.0093, Train Loss ce: 0.1888, Train Steps/Sec: 0.16,
|
|
|
|
| 2603 |
[[34m2026-01-27 06:21:07[39m] (step=0002392) Train Loss mse: 0.0086, Train Loss ce: 0.1506, Train Steps/Sec: 0.16,
|
| 2604 |
[[34m2026-01-27 06:21:14[39m] (step=0002393) Train Loss mse: 0.0084, Train Loss ce: 0.1657, Train Steps/Sec: 0.15,
|
| 2605 |
[[34m2026-01-27 06:21:20[39m] (step=0002394) Train Loss mse: 0.0074, Train Loss ce: 0.1535, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2606 |
[[34m2026-01-27 06:21:27[39m] (step=0002395) Train Loss mse: 0.0083, Train Loss ce: 0.1900, Train Steps/Sec: 0.16,
|
| 2607 |
[[34m2026-01-27 06:21:34[39m] (step=0002396) Train Loss mse: 0.0074, Train Loss ce: 0.1865, Train Steps/Sec: 0.15,
|
| 2608 |
[[34m2026-01-27 06:21:40[39m] (step=0002397) Train Loss mse: 0.0096, Train Loss ce: 0.1719, Train Steps/Sec: 0.15,
|
|
|
|
| 2702 |
[[34m2026-01-27 06:31:50[39m] (step=0002491) Train Loss mse: 0.0079, Train Loss ce: 0.1600, Train Steps/Sec: 0.16,
|
| 2703 |
[[34m2026-01-27 06:31:57[39m] (step=0002492) Train Loss mse: 0.0075, Train Loss ce: 0.1430, Train Steps/Sec: 0.15,
|
| 2704 |
[[34m2026-01-27 06:32:03[39m] (step=0002493) Train Loss mse: 0.0081, Train Loss ce: 0.1674, Train Steps/Sec: 0.15,
|
| 2705 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step2500
|
| 2706 |
+
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 2707 |
+
[eval debug] first 3 batch fingerprints:
|
| 2708 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2709 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2710 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2711 |
+
ce_avg: 0.16938790678977966, mse_avg: 0.009705973789095879
|
| 2712 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step3000
|
| 2713 |
+
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 2714 |
+
[eval debug] first 3 batch fingerprints:
|
| 2715 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2716 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2717 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2718 |
+
ce_avg: 0.1402648687362671, mse_avg: 0.007266872562468052
|
| 2719 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step3500
|
| 2720 |
+
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 2721 |
+
[eval debug] first 3 batch fingerprints:
|
| 2722 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2723 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2724 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 2725 |
+
ce_avg: 0.13325323164463043, mse_avg: 0.007202878128737211
|
| 2726 |
[[34m2026-01-27 06:32:10[39m] (step=0002494) Train Loss mse: 0.0080, Train Loss ce: 0.1591, Train Steps/Sec: 0.15,
|
| 2727 |
[[34m2026-01-27 06:32:17[39m] (step=0002495) Train Loss mse: 0.0089, Train Loss ce: 0.1406, Train Steps/Sec: 0.15,
|
| 2728 |
[[34m2026-01-27 06:32:23[39m] (step=0002496) Train Loss mse: 0.0077, Train Loss ce: 0.1890, Train Steps/Sec: 0.15,
|
|
|
|
| 3751 |
[[34m2026-01-27 08:26:36[39m] (step=0003516) Train Loss mse: 0.0078, Train Loss ce: 0.1360, Train Steps/Sec: 0.16,
|
| 3752 |
[[34m2026-01-27 08:26:43[39m] (step=0003517) Train Loss mse: 0.0089, Train Loss ce: 0.1218, Train Steps/Sec: 0.16,
|
| 3753 |
[[34m2026-01-27 08:26:49[39m] (step=0003518) Train Loss mse: 0.0083, Train Loss ce: 0.1212, Train Steps/Sec: 0.16,
|
| 3754 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step4000
|
| 3755 |
+
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 3756 |
+
[eval debug] first 3 batch fingerprints:
|
| 3757 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3758 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3759 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3760 |
+
ce_avg: 0.12750361859798431, mse_avg: 0.007511829491704702
|
| 3761 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step4500
|
| 3762 |
+
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 3763 |
+
[eval debug] first 3 batch fingerprints:
|
| 3764 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3765 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3766 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 3767 |
+
ce_avg: 0.12324703484773636, mse_avg: 0.007881568744778633
|
| 3768 |
[[34m2026-01-27 08:26:56[39m] (step=0003519) Train Loss mse: 0.0099, Train Loss ce: 0.1251, Train Steps/Sec: 0.16,
|
| 3769 |
[[34m2026-01-27 08:27:02[39m] (step=0003520) Train Loss mse: 0.0078, Train Loss ce: 0.1416, Train Steps/Sec: 0.16,
|
| 3770 |
[[34m2026-01-27 08:27:08[39m] (step=0003521) Train Loss mse: 0.0084, Train Loss ce: 0.1459, Train Steps/Sec: 0.16,
|
|
|
|
| 3821 |
[[34m2026-01-27 08:32:34[39m] (step=0003572) Train Loss mse: 0.0101, Train Loss ce: 0.1471, Train Steps/Sec: 0.18,
|
| 3822 |
[[34m2026-01-27 08:32:40[39m] (step=0003573) Train Loss mse: 0.0079, Train Loss ce: 0.1128, Train Steps/Sec: 0.16,
|
| 3823 |
[[34m2026-01-27 08:32:46[39m] (step=0003574) Train Loss mse: 0.0077, Train Loss ce: 0.1507, Train Steps/Sec: 0.16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3824 |
[[34m2026-01-27 08:32:53[39m] (step=0003575) Train Loss mse: 0.0074, Train Loss ce: 0.1406, Train Steps/Sec: 0.15,
|
| 3825 |
[[34m2026-01-27 08:33:00[39m] (step=0003576) Train Loss mse: 0.0093, Train Loss ce: 0.1464, Train Steps/Sec: 0.15,
|
| 3826 |
[[34m2026-01-27 08:33:06[39m] (step=0003577) Train Loss mse: 0.0077, Train Loss ce: 0.1396, Train Steps/Sec: 0.15,
|
|
|
|
| 4946 |
[[34m2026-01-27 10:34:38[39m] (step=0004697) Train Loss mse: 0.0079, Train Loss ce: 0.1542, Train Steps/Sec: 0.15,
|
| 4947 |
[[34m2026-01-27 10:34:44[39m] (step=0004698) Train Loss mse: 0.0085, Train Loss ce: 0.1140, Train Steps/Sec: 0.16,
|
| 4948 |
[[34m2026-01-27 10:34:51[39m] (step=0004699) Train Loss mse: 0.0074, Train Loss ce: 0.1132, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4949 |
[[34m2026-01-27 10:34:58[39m] (step=0004700) Train Loss mse: 0.0072, Train Loss ce: 0.1064, Train Steps/Sec: 0.14,
|
| 4950 |
[[34m2026-01-27 10:35:04[39m] (step=0004701) Train Loss mse: 0.0087, Train Loss ce: 0.1294, Train Steps/Sec: 0.16,
|
| 4951 |
[[34m2026-01-27 10:35:11[39m] (step=0004702) Train Loss mse: 0.0079, Train Loss ce: 0.1206, Train Steps/Sec: 0.16,
|
|
|
|
| 5248 |
[[34m2026-01-27 11:07:18[39m] (step=0004999) Train Loss mse: 0.0092, Train Loss ce: 0.1221, Train Steps/Sec: 0.16,
|
| 5249 |
[[34m2026-01-27 11:07:56[39m] (step=0005000) Train Loss mse: 0.0077, Train Loss ce: 0.1160, Train Steps/Sec: 0.03,
|
| 5250 |
[[34m2026-01-27 11:07:56[39m] Saving checkpoint to /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/0005000.
|
| 5251 |
+
[[34m2026-01-27 11:10:32[39m] Done!
|
| 5252 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_move_fix3_unit_one_image_lr2e_5_ce_ins_step5000
|
| 5253 |
+
Preparing Dataset vlm_gym_match_move_fix3_unit_celoss_evalonce/vlm_gym_match_move_fix3_unit_val
|
| 5254 |
+
[eval debug] first 3 batch fingerprints:
|
| 5255 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 5256 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 5257 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_move_fix3_unit_celoss_evalonce'}]
|
| 5258 |
+
ce_avg: 0.1215158998966217, mse_avg: 0.0074405064806342125
|