| { | |
| "best_metric": 0.822429906542056, | |
| "best_model_checkpoint": "SF-RHS-DA\\checkpoint-1384", | |
| "epoch": 39.436619718309856, | |
| "eval_steps": 500, | |
| "global_step": 1400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.6931, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.6931, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.428571428571429e-06, | |
| "loss": 0.693, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.6822429906542056, | |
| "eval_loss": 0.6927286386489868, | |
| "eval_runtime": 2.3546, | |
| "eval_samples_per_second": 45.444, | |
| "eval_steps_per_second": 2.973, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.6929, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 0.6927, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 0.6925, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.6919, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6915887850467289, | |
| "eval_loss": 0.6909971237182617, | |
| "eval_runtime": 2.6496, | |
| "eval_samples_per_second": 40.383, | |
| "eval_steps_per_second": 2.642, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.6912, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 0.6896, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 0.6872, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.6822429906542056, | |
| "eval_loss": 0.6820780634880066, | |
| "eval_runtime": 2.8722, | |
| "eval_samples_per_second": 37.254, | |
| "eval_steps_per_second": 2.437, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.357142857142857e-05, | |
| "loss": 0.6841, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 0.6761, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 2.7857142857142858e-05, | |
| "loss": 0.6671, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3e-05, | |
| "loss": 0.6613, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6542056074766355, | |
| "eval_loss": 0.6552135348320007, | |
| "eval_runtime": 2.5956, | |
| "eval_samples_per_second": 41.223, | |
| "eval_steps_per_second": 2.697, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.9761904761904762e-05, | |
| "loss": 0.6416, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 2.9523809523809523e-05, | |
| "loss": 0.6301, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 2.9285714285714284e-05, | |
| "loss": 0.6196, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.616822429906542, | |
| "eval_loss": 0.6403455138206482, | |
| "eval_runtime": 2.3884, | |
| "eval_samples_per_second": 44.801, | |
| "eval_steps_per_second": 2.931, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.904761904761905e-05, | |
| "loss": 0.6023, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.880952380952381e-05, | |
| "loss": 0.5858, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 0.5762, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.8333333333333332e-05, | |
| "loss": 0.5695, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6448598130841121, | |
| "eval_loss": 0.612833559513092, | |
| "eval_runtime": 2.4301, | |
| "eval_samples_per_second": 44.032, | |
| "eval_steps_per_second": 2.881, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 2.8095238095238096e-05, | |
| "loss": 0.5664, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 2.7857142857142858e-05, | |
| "loss": 0.5482, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 2.761904761904762e-05, | |
| "loss": 0.5436, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.5700934579439252, | |
| "eval_loss": 0.6765128374099731, | |
| "eval_runtime": 2.4356, | |
| "eval_samples_per_second": 43.932, | |
| "eval_steps_per_second": 2.874, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 2.738095238095238e-05, | |
| "loss": 0.5434, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 2.7142857142857144e-05, | |
| "loss": 0.5175, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.6904761904761905e-05, | |
| "loss": 0.5149, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.4836, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6542056074766355, | |
| "eval_loss": 0.6075218319892883, | |
| "eval_runtime": 2.3996, | |
| "eval_samples_per_second": 44.591, | |
| "eval_steps_per_second": 2.917, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 2.6428571428571428e-05, | |
| "loss": 0.4856, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 0.5027, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 2.5952380952380953e-05, | |
| "loss": 0.4902, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.6355140186915887, | |
| "eval_loss": 0.578782856464386, | |
| "eval_runtime": 2.6977, | |
| "eval_samples_per_second": 39.663, | |
| "eval_steps_per_second": 2.595, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 0.5014, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 2.5476190476190476e-05, | |
| "loss": 0.4682, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 2.523809523809524e-05, | |
| "loss": 0.4744, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.4759, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.719626168224299, | |
| "eval_loss": 0.5284451842308044, | |
| "eval_runtime": 2.3275, | |
| "eval_samples_per_second": 45.971, | |
| "eval_steps_per_second": 3.007, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 2.4761904761904762e-05, | |
| "loss": 0.5062, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 2.4523809523809523e-05, | |
| "loss": 0.4543, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 0.463, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 2.404761904761905e-05, | |
| "loss": 0.4746, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.6822429906542056, | |
| "eval_loss": 0.5532112121582031, | |
| "eval_runtime": 2.302, | |
| "eval_samples_per_second": 46.48, | |
| "eval_steps_per_second": 3.041, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.4488, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 2.357142857142857e-05, | |
| "loss": 0.4454, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.4067, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7383177570093458, | |
| "eval_loss": 0.5355571508407593, | |
| "eval_runtime": 2.2815, | |
| "eval_samples_per_second": 46.898, | |
| "eval_steps_per_second": 3.068, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "learning_rate": 2.3095238095238097e-05, | |
| "loss": 0.4181, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 0.4448, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 2.261904761904762e-05, | |
| "loss": 0.4621, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "learning_rate": 2.238095238095238e-05, | |
| "loss": 0.4138, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.7476635514018691, | |
| "eval_loss": 0.5042079091072083, | |
| "eval_runtime": 2.239, | |
| "eval_samples_per_second": 47.789, | |
| "eval_steps_per_second": 3.126, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 13.24, | |
| "learning_rate": 2.2142857142857145e-05, | |
| "loss": 0.4153, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 2.1904761904761903e-05, | |
| "loss": 0.3879, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.3752, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7383177570093458, | |
| "eval_loss": 0.5062668919563293, | |
| "eval_runtime": 2.265, | |
| "eval_samples_per_second": 47.24, | |
| "eval_steps_per_second": 3.09, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 0.4201, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 2.1190476190476193e-05, | |
| "loss": 0.3522, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "learning_rate": 2.095238095238095e-05, | |
| "loss": 0.3762, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 2.0714285714285715e-05, | |
| "loss": 0.4158, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.49523478746414185, | |
| "eval_runtime": 2.3011, | |
| "eval_samples_per_second": 46.5, | |
| "eval_steps_per_second": 3.042, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 15.21, | |
| "learning_rate": 2.0476190476190476e-05, | |
| "loss": 0.3994, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 15.49, | |
| "learning_rate": 2.023809523809524e-05, | |
| "loss": 0.3707, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 15.77, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 0.3646, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7383177570093458, | |
| "eval_loss": 0.5439864993095398, | |
| "eval_runtime": 2.286, | |
| "eval_samples_per_second": 46.806, | |
| "eval_steps_per_second": 3.062, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "learning_rate": 1.9761904761904763e-05, | |
| "loss": 0.3574, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 16.34, | |
| "learning_rate": 1.9523809523809524e-05, | |
| "loss": 0.3817, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 16.62, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 0.3769, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.3644, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.7757009345794392, | |
| "eval_loss": 0.5146493911743164, | |
| "eval_runtime": 2.4036, | |
| "eval_samples_per_second": 44.517, | |
| "eval_steps_per_second": 2.912, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 17.18, | |
| "learning_rate": 1.880952380952381e-05, | |
| "loss": 0.3914, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 0.3649, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 17.75, | |
| "learning_rate": 1.8333333333333336e-05, | |
| "loss": 0.3411, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7757009345794392, | |
| "eval_loss": 0.520774245262146, | |
| "eval_runtime": 2.3841, | |
| "eval_samples_per_second": 44.881, | |
| "eval_steps_per_second": 2.936, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "learning_rate": 1.8095238095238094e-05, | |
| "loss": 0.364, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 0.3429, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 18.59, | |
| "learning_rate": 1.761904761904762e-05, | |
| "loss": 0.3262, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "learning_rate": 1.7380952380952384e-05, | |
| "loss": 0.3052, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_accuracy": 0.7383177570093458, | |
| "eval_loss": 0.5784975290298462, | |
| "eval_runtime": 2.3582, | |
| "eval_samples_per_second": 45.374, | |
| "eval_steps_per_second": 2.968, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 19.15, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.33, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "learning_rate": 1.6904761904761906e-05, | |
| "loss": 0.3185, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.3355, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.6428571428571432e-05, | |
| "loss": 0.3398, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7383177570093458, | |
| "eval_loss": 0.5366288423538208, | |
| "eval_runtime": 2.243, | |
| "eval_samples_per_second": 47.704, | |
| "eval_steps_per_second": 3.121, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "learning_rate": 1.619047619047619e-05, | |
| "loss": 0.3025, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "learning_rate": 1.5952380952380954e-05, | |
| "loss": 0.3167, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 20.85, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.3103, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.7289719626168224, | |
| "eval_loss": 0.5751203298568726, | |
| "eval_runtime": 2.5146, | |
| "eval_samples_per_second": 42.551, | |
| "eval_steps_per_second": 2.784, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 21.13, | |
| "learning_rate": 1.547619047619048e-05, | |
| "loss": 0.2846, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 21.41, | |
| "learning_rate": 1.5238095238095238e-05, | |
| "loss": 0.3186, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 21.69, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2833, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 21.97, | |
| "learning_rate": 1.4761904761904761e-05, | |
| "loss": 0.3168, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5194119811058044, | |
| "eval_runtime": 2.4472, | |
| "eval_samples_per_second": 43.723, | |
| "eval_steps_per_second": 2.86, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 22.25, | |
| "learning_rate": 1.4523809523809524e-05, | |
| "loss": 0.3255, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 22.54, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.3148, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 22.82, | |
| "learning_rate": 1.4047619047619048e-05, | |
| "loss": 0.2927, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5008327960968018, | |
| "eval_runtime": 2.8195, | |
| "eval_samples_per_second": 37.949, | |
| "eval_steps_per_second": 2.483, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 23.1, | |
| "learning_rate": 1.380952380952381e-05, | |
| "loss": 0.3163, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 23.38, | |
| "learning_rate": 1.3571428571428572e-05, | |
| "loss": 0.2708, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 23.66, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.2927, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "learning_rate": 1.3095238095238096e-05, | |
| "loss": 0.2874, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5215777158737183, | |
| "eval_runtime": 2.3666, | |
| "eval_samples_per_second": 45.213, | |
| "eval_steps_per_second": 2.958, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 24.23, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 0.3009, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 24.51, | |
| "learning_rate": 1.261904761904762e-05, | |
| "loss": 0.3082, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 24.79, | |
| "learning_rate": 1.2380952380952381e-05, | |
| "loss": 0.3021, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.5695382952690125, | |
| "eval_runtime": 2.2945, | |
| "eval_samples_per_second": 46.632, | |
| "eval_steps_per_second": 3.051, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 25.07, | |
| "learning_rate": 1.2142857142857144e-05, | |
| "loss": 0.2771, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 25.35, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 0.2882, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 25.63, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.3067, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.2978, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.5642871856689453, | |
| "eval_runtime": 2.3556, | |
| "eval_samples_per_second": 45.425, | |
| "eval_steps_per_second": 2.972, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 26.2, | |
| "learning_rate": 1.119047619047619e-05, | |
| "loss": 0.2787, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 26.48, | |
| "learning_rate": 1.0952380952380951e-05, | |
| "loss": 0.2857, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 26.76, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 0.2743, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.5767267346382141, | |
| "eval_runtime": 2.7372, | |
| "eval_samples_per_second": 39.092, | |
| "eval_steps_per_second": 2.557, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 27.04, | |
| "learning_rate": 1.0476190476190475e-05, | |
| "loss": 0.2486, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 27.32, | |
| "learning_rate": 1.0238095238095238e-05, | |
| "loss": 0.2884, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.308, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 27.89, | |
| "learning_rate": 9.761904761904762e-06, | |
| "loss": 0.2753, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5124983787536621, | |
| "eval_runtime": 3.3973, | |
| "eval_samples_per_second": 31.496, | |
| "eval_steps_per_second": 2.06, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 28.17, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.2838, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "learning_rate": 9.285714285714286e-06, | |
| "loss": 0.2619, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "learning_rate": 9.047619047619047e-06, | |
| "loss": 0.2773, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5245658159255981, | |
| "eval_runtime": 2.8221, | |
| "eval_samples_per_second": 37.914, | |
| "eval_steps_per_second": 2.48, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 29.01, | |
| "learning_rate": 8.80952380952381e-06, | |
| "loss": 0.2854, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 29.3, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.256, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.2634, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 29.86, | |
| "learning_rate": 8.095238095238095e-06, | |
| "loss": 0.2775, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.5473499298095703, | |
| "eval_runtime": 2.4506, | |
| "eval_samples_per_second": 43.664, | |
| "eval_steps_per_second": 2.856, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 30.14, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 0.294, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 30.42, | |
| "learning_rate": 7.619047619047619e-06, | |
| "loss": 0.2702, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 30.7, | |
| "learning_rate": 7.380952380952381e-06, | |
| "loss": 0.2726, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 0.268, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5285843014717102, | |
| "eval_runtime": 2.4786, | |
| "eval_samples_per_second": 43.17, | |
| "eval_steps_per_second": 2.824, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 31.27, | |
| "learning_rate": 6.904761904761905e-06, | |
| "loss": 0.2843, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 31.55, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.2584, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 31.83, | |
| "learning_rate": 6.428571428571429e-06, | |
| "loss": 0.2586, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.5232648849487305, | |
| "eval_runtime": 2.341, | |
| "eval_samples_per_second": 45.706, | |
| "eval_steps_per_second": 2.99, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 32.11, | |
| "learning_rate": 6.190476190476191e-06, | |
| "loss": 0.2735, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 32.39, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 0.2499, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.2677, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 32.96, | |
| "learning_rate": 5.476190476190476e-06, | |
| "loss": 0.2458, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "eval_accuracy": 0.7757009345794392, | |
| "eval_loss": 0.5451497435569763, | |
| "eval_runtime": 2.6154, | |
| "eval_samples_per_second": 40.912, | |
| "eval_steps_per_second": 2.676, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 33.24, | |
| "learning_rate": 5.238095238095238e-06, | |
| "loss": 0.2611, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 33.52, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": 0.2872, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 33.8, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 0.2524, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.5268486738204956, | |
| "eval_runtime": 2.4496, | |
| "eval_samples_per_second": 43.681, | |
| "eval_steps_per_second": 2.858, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 34.08, | |
| "learning_rate": 4.5238095238095235e-06, | |
| "loss": 0.2762, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 34.37, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.2541, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 34.65, | |
| "learning_rate": 4.0476190476190474e-06, | |
| "loss": 0.2055, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 34.93, | |
| "learning_rate": 3.8095238095238094e-06, | |
| "loss": 0.2438, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "eval_accuracy": 0.7757009345794392, | |
| "eval_loss": 0.5228263735771179, | |
| "eval_runtime": 2.4381, | |
| "eval_samples_per_second": 43.887, | |
| "eval_steps_per_second": 2.871, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "learning_rate": 3.5714285714285714e-06, | |
| "loss": 0.2296, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 35.49, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.2462, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 35.77, | |
| "learning_rate": 3.0952380952380953e-06, | |
| "loss": 0.2429, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5391428470611572, | |
| "eval_runtime": 2.4246, | |
| "eval_samples_per_second": 44.132, | |
| "eval_steps_per_second": 2.887, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 36.06, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.267, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 36.34, | |
| "learning_rate": 2.619047619047619e-06, | |
| "loss": 0.2715, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 36.62, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 0.2528, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 36.9, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.2689, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.5236551761627197, | |
| "eval_runtime": 2.5225, | |
| "eval_samples_per_second": 42.419, | |
| "eval_steps_per_second": 2.775, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 37.18, | |
| "learning_rate": 1.9047619047619047e-06, | |
| "loss": 0.2637, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 37.46, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.2367, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 37.75, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.2362, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5561335682868958, | |
| "eval_runtime": 2.5586, | |
| "eval_samples_per_second": 41.82, | |
| "eval_steps_per_second": 2.736, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 38.03, | |
| "learning_rate": 1.1904761904761904e-06, | |
| "loss": 0.2685, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 38.31, | |
| "learning_rate": 9.523809523809523e-07, | |
| "loss": 0.2447, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 38.59, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.2492, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 38.87, | |
| "learning_rate": 4.761904761904762e-07, | |
| "loss": 0.2656, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "eval_accuracy": 0.822429906542056, | |
| "eval_loss": 0.5233157277107239, | |
| "eval_runtime": 2.4536, | |
| "eval_samples_per_second": 43.61, | |
| "eval_steps_per_second": 2.853, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 39.15, | |
| "learning_rate": 2.380952380952381e-07, | |
| "loss": 0.2456, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "learning_rate": 0.0, | |
| "loss": 0.264, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "eval_accuracy": 0.8037383177570093, | |
| "eval_loss": 0.5111537575721741, | |
| "eval_runtime": 2.5592, | |
| "eval_samples_per_second": 41.809, | |
| "eval_steps_per_second": 2.735, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "step": 1400, | |
| "total_flos": 2.4404643889687757e+17, | |
| "train_loss": 0.38540612884930203, | |
| "train_runtime": 1009.3205, | |
| "train_samples_per_second": 89.486, | |
| "train_steps_per_second": 1.387 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 2.4404643889687757e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |