| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 3858, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.38880248833592534, |
| "grad_norm": 0.6042742133140564, |
| "learning_rate": 1.740798341109383e-05, |
| "loss": 0.2151, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.38880248833592534, |
| "eval_accuracy": 0.9762738234150136, |
| "eval_loss": 0.10295161604881287, |
| "eval_runtime": 26.8587, |
| "eval_samples_per_second": 95.723, |
| "eval_steps_per_second": 11.989, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7776049766718507, |
| "grad_norm": 0.04766521230340004, |
| "learning_rate": 1.4815966822187664e-05, |
| "loss": 0.1167, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7776049766718507, |
| "eval_accuracy": 0.9875534811357448, |
| "eval_loss": 0.05390123650431633, |
| "eval_runtime": 26.735, |
| "eval_samples_per_second": 96.166, |
| "eval_steps_per_second": 12.044, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.166407465007776, |
| "grad_norm": 0.008871573023498058, |
| "learning_rate": 1.2223950233281495e-05, |
| "loss": 0.0687, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.166407465007776, |
| "eval_accuracy": 0.9793854531310774, |
| "eval_loss": 0.12513048946857452, |
| "eval_runtime": 27.1074, |
| "eval_samples_per_second": 94.845, |
| "eval_steps_per_second": 11.879, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5552099533437014, |
| "grad_norm": 0.0014064661227166653, |
| "learning_rate": 9.631933644375326e-06, |
| "loss": 0.0279, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.5552099533437014, |
| "eval_accuracy": 0.9778296382730455, |
| "eval_loss": 0.148821160197258, |
| "eval_runtime": 26.879, |
| "eval_samples_per_second": 95.651, |
| "eval_steps_per_second": 11.98, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9440124416796267, |
| "grad_norm": 0.002627410925924778, |
| "learning_rate": 7.039917055469155e-06, |
| "loss": 0.0293, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.9440124416796267, |
| "eval_accuracy": 0.9914430182808246, |
| "eval_loss": 0.04895803704857826, |
| "eval_runtime": 26.8775, |
| "eval_samples_per_second": 95.656, |
| "eval_steps_per_second": 11.98, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.332814930015552, |
| "grad_norm": 0.0008352847071364522, |
| "learning_rate": 4.447900466562986e-06, |
| "loss": 0.0119, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.332814930015552, |
| "eval_accuracy": 0.9766627771295215, |
| "eval_loss": 0.15932457149028778, |
| "eval_runtime": 27.4463, |
| "eval_samples_per_second": 93.674, |
| "eval_steps_per_second": 11.732, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.721617418351477, |
| "grad_norm": 0.006800688803195953, |
| "learning_rate": 1.8558838776568172e-06, |
| "loss": 0.0045, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.721617418351477, |
| "eval_accuracy": 0.9797744068455854, |
| "eval_loss": 0.14682677388191223, |
| "eval_runtime": 27.6918, |
| "eval_samples_per_second": 92.843, |
| "eval_steps_per_second": 11.628, |
| "step": 3500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 3858, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 5863351757603040.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|