{ "best_global_step": 936, "best_metric": 3.4453418254852295, "best_model_checkpoint": "igbo_en_translation_t4_optimized/checkpoint-936", "epoch": 2.992, "eval_steps": 500, "global_step": 936, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 105667.1875, "learning_rate": 4.738247863247863e-05, "loss": 3.908, "step": 50 }, { "epoch": 0.32, "grad_norm": 183695.078125, "learning_rate": 4.4711538461538466e-05, "loss": 3.6837, "step": 100 }, { "epoch": 0.48, "grad_norm": 105609.046875, "learning_rate": 4.2040598290598295e-05, "loss": 3.6434, "step": 150 }, { "epoch": 0.64, "grad_norm": 90329.390625, "learning_rate": 3.9369658119658124e-05, "loss": 3.5711, "step": 200 }, { "epoch": 0.8, "grad_norm": 75909.1015625, "learning_rate": 3.6698717948717946e-05, "loss": 3.5542, "step": 250 }, { "epoch": 0.96, "grad_norm": 101548.625, "learning_rate": 3.402777777777778e-05, "loss": 3.5248, "step": 300 }, { "epoch": 1.0, "eval_loss": 3.4996626377105713, "eval_rouge1": 18.13845106805843, "eval_rouge2": 4.382326032460742, "eval_rougeL": 16.25107422982945, "eval_rougeLsum": 16.185034213563082, "eval_runtime": 7.0086, "eval_samples_per_second": 27.538, "eval_steps_per_second": 1.855, "step": 313 }, { "epoch": 1.1184, "grad_norm": 85483.953125, "learning_rate": 3.135683760683761e-05, "loss": 3.4393, "step": 350 }, { "epoch": 1.2784, "grad_norm": 84569.3203125, "learning_rate": 2.8685897435897437e-05, "loss": 3.4524, "step": 400 }, { "epoch": 1.4384000000000001, "grad_norm": 77848.4375, "learning_rate": 2.601495726495727e-05, "loss": 3.4723, "step": 450 }, { "epoch": 1.5984, "grad_norm": 73118.3046875, "learning_rate": 2.3344017094017095e-05, "loss": 3.4171, "step": 500 }, { "epoch": 1.7584, "grad_norm": 384926.9375, "learning_rate": 2.0673076923076924e-05, "loss": 3.4344, "step": 550 }, { "epoch": 1.9184, "grad_norm": 92710.7734375, "learning_rate": 1.8002136752136753e-05, "loss": 3.4022, "step": 600 }, { "epoch": 2.0, "eval_loss": 3.4569602012634277, "eval_rouge1": 17.603753476397717, "eval_rouge2": 4.407651351529355, "eval_rougeL": 15.912143520543449, "eval_rougeLsum": 15.874740254358638, "eval_runtime": 6.7217, "eval_samples_per_second": 28.713, "eval_steps_per_second": 1.934, "step": 626 }, { "epoch": 2.0768, "grad_norm": 78027.09375, "learning_rate": 1.5331196581196582e-05, "loss": 3.3792, "step": 650 }, { "epoch": 2.2368, "grad_norm": 88675.40625, "learning_rate": 1.2660256410256411e-05, "loss": 3.3763, "step": 700 }, { "epoch": 2.3968, "grad_norm": 77131.90625, "learning_rate": 9.98931623931624e-06, "loss": 3.4146, "step": 750 }, { "epoch": 2.5568, "grad_norm": 108723.25, "learning_rate": 7.318376068376069e-06, "loss": 3.3924, "step": 800 }, { "epoch": 2.7168, "grad_norm": 76727.1484375, "learning_rate": 4.647435897435897e-06, "loss": 3.3895, "step": 850 }, { "epoch": 2.8768000000000002, "grad_norm": 82499.703125, "learning_rate": 1.9764957264957265e-06, "loss": 3.3928, "step": 900 }, { "epoch": 2.992, "eval_loss": 3.4453418254852295, "eval_rouge1": 17.902942345295795, "eval_rouge2": 4.319690451903335, "eval_rougeL": 15.969199830396672, "eval_rougeLsum": 15.935294540519285, "eval_runtime": 6.777, "eval_samples_per_second": 28.479, "eval_steps_per_second": 1.918, "step": 936 } ], "logging_steps": 50, "max_steps": 936, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4049426700042240.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }