| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.67373142203852, | |
| "global_step": 110000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.885815185403178e-07, | |
| "loss": 0.6708, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.1771630370806356e-06, | |
| "loss": 0.6457, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.7657445556209538e-06, | |
| "loss": 0.6149, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.3543260741612712e-06, | |
| "loss": 0.5516, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.942907592701589e-06, | |
| "loss": 0.5014, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 3.5314891112419075e-06, | |
| "loss": 0.484, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.120070629782225e-06, | |
| "loss": 0.4428, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.7086521483225425e-06, | |
| "loss": 0.4323, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 5.297233666862861e-06, | |
| "loss": 0.4234, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 5.885815185403178e-06, | |
| "loss": 0.402, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 6.474396703943496e-06, | |
| "loss": 0.4142, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.062978222483815e-06, | |
| "loss": 0.39, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.651559741024132e-06, | |
| "loss": 0.3842, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.24014125956445e-06, | |
| "loss": 0.3747, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.828722778104768e-06, | |
| "loss": 0.37, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.417304296645085e-06, | |
| "loss": 0.3688, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.0005885815185404e-05, | |
| "loss": 0.3656, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.0594467333725723e-05, | |
| "loss": 0.3488, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.1183048852266041e-05, | |
| "loss": 0.3532, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.1771630370806357e-05, | |
| "loss": 0.3553, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.2360211889346675e-05, | |
| "loss": 0.3597, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.2948793407886992e-05, | |
| "loss": 0.3427, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3537374926427311e-05, | |
| "loss": 0.35, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.412595644496763e-05, | |
| "loss": 0.342, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.4714537963507947e-05, | |
| "loss": 0.3304, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.5303119482048264e-05, | |
| "loss": 0.3326, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.589170100058858e-05, | |
| "loss": 0.3273, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.64802825191289e-05, | |
| "loss": 0.3226, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.706886403766922e-05, | |
| "loss": 0.329, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.7657445556209536e-05, | |
| "loss": 0.3096, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.8246027074749856e-05, | |
| "loss": 0.3196, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.883460859329017e-05, | |
| "loss": 0.3228, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.942319011183049e-05, | |
| "loss": 0.3164, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.999925173503938e-05, | |
| "loss": 0.2999, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.996183848700825e-05, | |
| "loss": 0.3205, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.9924425238977122e-05, | |
| "loss": 0.3053, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.9887011990945994e-05, | |
| "loss": 0.2995, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.984959874291487e-05, | |
| "loss": 0.313, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.981218549488374e-05, | |
| "loss": 0.2996, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.977477224685261e-05, | |
| "loss": 0.3289, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.9737358998821485e-05, | |
| "loss": 0.3015, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.9699945750790357e-05, | |
| "loss": 0.3029, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.966253250275923e-05, | |
| "loss": 0.3075, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.96251192547281e-05, | |
| "loss": 0.2972, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9587706006696973e-05, | |
| "loss": 0.2984, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.9550292758665845e-05, | |
| "loss": 0.2894, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.9512879510634717e-05, | |
| "loss": 0.2857, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.9475466262603592e-05, | |
| "loss": 0.2812, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.943805301457246e-05, | |
| "loss": 0.2872, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.9400639766541333e-05, | |
| "loss": 0.289, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.9363226518510205e-05, | |
| "loss": 0.2791, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.932581327047908e-05, | |
| "loss": 0.2912, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.9288400022447952e-05, | |
| "loss": 0.2872, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.9250986774416824e-05, | |
| "loss": 0.2766, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9213573526385692e-05, | |
| "loss": 0.2819, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.9176160278354568e-05, | |
| "loss": 0.2795, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.913874703032344e-05, | |
| "loss": 0.2677, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.910133378229231e-05, | |
| "loss": 0.2488, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.9063920534261183e-05, | |
| "loss": 0.256, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.9026507286230055e-05, | |
| "loss": 0.2489, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.8989094038198927e-05, | |
| "loss": 0.2525, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.89516807901678e-05, | |
| "loss": 0.2483, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.8914267542136674e-05, | |
| "loss": 0.2528, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.8876854294105543e-05, | |
| "loss": 0.2481, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.8839441046074415e-05, | |
| "loss": 0.2517, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.880202779804329e-05, | |
| "loss": 0.2514, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.8764614550012162e-05, | |
| "loss": 0.2464, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.8727201301981034e-05, | |
| "loss": 0.2586, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.8689788053949906e-05, | |
| "loss": 0.2507, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.8652374805918778e-05, | |
| "loss": 0.2609, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.861496155788765e-05, | |
| "loss": 0.2368, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.857754830985652e-05, | |
| "loss": 0.2473, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.8540135061825394e-05, | |
| "loss": 0.2379, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.8502721813794265e-05, | |
| "loss": 0.2431, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.8465308565763137e-05, | |
| "loss": 0.2521, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.842789531773201e-05, | |
| "loss": 0.2473, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.8390482069700885e-05, | |
| "loss": 0.2496, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.8353068821669757e-05, | |
| "loss": 0.2529, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.8315655573638625e-05, | |
| "loss": 0.2484, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.8278242325607497e-05, | |
| "loss": 0.2393, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.8240829077576372e-05, | |
| "loss": 0.2394, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.8203415829545244e-05, | |
| "loss": 0.2265, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.8166002581514116e-05, | |
| "loss": 0.2435, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.8128589333482988e-05, | |
| "loss": 0.2513, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.809117608545186e-05, | |
| "loss": 0.2478, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.8053762837420732e-05, | |
| "loss": 0.2601, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.8016349589389604e-05, | |
| "loss": 0.2371, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.7978936341358476e-05, | |
| "loss": 0.2504, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.7941523093327348e-05, | |
| "loss": 0.2414, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.790410984529622e-05, | |
| "loss": 0.2296, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.7866696597265095e-05, | |
| "loss": 0.2413, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.7829283349233967e-05, | |
| "loss": 0.2447, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.779187010120284e-05, | |
| "loss": 0.2392, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.7754456853171707e-05, | |
| "loss": 0.2515, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.7717043605140583e-05, | |
| "loss": 0.2383, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.7679630357109454e-05, | |
| "loss": 0.2522, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.7642217109078326e-05, | |
| "loss": 0.244, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.7604803861047198e-05, | |
| "loss": 0.2368, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.756739061301607e-05, | |
| "loss": 0.2416, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.7529977364984942e-05, | |
| "loss": 0.2428, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.7492564116953814e-05, | |
| "loss": 0.2387, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.745515086892269e-05, | |
| "loss": 0.2363, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.7417737620891558e-05, | |
| "loss": 0.2433, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.738032437286043e-05, | |
| "loss": 0.2395, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.7342911124829302e-05, | |
| "loss": 0.237, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.7305497876798177e-05, | |
| "loss": 0.2382, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.726808462876705e-05, | |
| "loss": 0.2306, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.723067138073592e-05, | |
| "loss": 0.2356, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7193258132704793e-05, | |
| "loss": 0.2473, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7155844884673665e-05, | |
| "loss": 0.2428, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7118431636642537e-05, | |
| "loss": 0.2302, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.708101838861141e-05, | |
| "loss": 0.2235, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.704360514058028e-05, | |
| "loss": 0.2271, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.7006191892549152e-05, | |
| "loss": 0.2229, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6968778644518024e-05, | |
| "loss": 0.2, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.69313653964869e-05, | |
| "loss": 0.1905, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.689395214845577e-05, | |
| "loss": 0.1908, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.685653890042464e-05, | |
| "loss": 0.2062, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6819125652393512e-05, | |
| "loss": 0.1926, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6781712404362387e-05, | |
| "loss": 0.1867, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.674429915633126e-05, | |
| "loss": 0.1892, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.670688590830013e-05, | |
| "loss": 0.1853, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.6669472660269003e-05, | |
| "loss": 0.1972, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.6632059412237875e-05, | |
| "loss": 0.1916, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.6594646164206747e-05, | |
| "loss": 0.1836, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.655723291617562e-05, | |
| "loss": 0.1855, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.651981966814449e-05, | |
| "loss": 0.2007, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.6482406420113363e-05, | |
| "loss": 0.1857, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.6444993172082235e-05, | |
| "loss": 0.2067, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.6407579924051106e-05, | |
| "loss": 0.2096, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.6370166676019982e-05, | |
| "loss": 0.2039, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.6332753427988854e-05, | |
| "loss": 0.1985, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.6295340179957722e-05, | |
| "loss": 0.1927, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.6257926931926597e-05, | |
| "loss": 0.2015, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.622051368389547e-05, | |
| "loss": 0.1822, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.618310043586434e-05, | |
| "loss": 0.1957, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.6145687187833213e-05, | |
| "loss": 0.1936, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.6108273939802085e-05, | |
| "loss": 0.1896, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.6070860691770957e-05, | |
| "loss": 0.199, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.603344744373983e-05, | |
| "loss": 0.1954, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.5996034195708704e-05, | |
| "loss": 0.1918, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.5958620947677573e-05, | |
| "loss": 0.1902, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.5921207699646445e-05, | |
| "loss": 0.201, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.5883794451615317e-05, | |
| "loss": 0.1888, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.5846381203584192e-05, | |
| "loss": 0.1932, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 1.5808967955553064e-05, | |
| "loss": 0.2016, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 1.5771554707521936e-05, | |
| "loss": 0.199, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 1.5734141459490804e-05, | |
| "loss": 0.1854, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.569672821145968e-05, | |
| "loss": 0.1935, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.565931496342855e-05, | |
| "loss": 0.1902, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.5621901715397423e-05, | |
| "loss": 0.1831, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.5584488467366295e-05, | |
| "loss": 0.1978, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.5547075219335167e-05, | |
| "loss": 0.1938, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.550966197130404e-05, | |
| "loss": 0.1911, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.547224872327291e-05, | |
| "loss": 0.1847, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.5434835475241786e-05, | |
| "loss": 0.1919, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.5397422227210655e-05, | |
| "loss": 0.1944, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 1.5360008979179527e-05, | |
| "loss": 0.1809, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.5322595731148402e-05, | |
| "loss": 0.1986, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.5285182483117274e-05, | |
| "loss": 0.1927, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.5247769235086144e-05, | |
| "loss": 0.1955, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.5210355987055016e-05, | |
| "loss": 0.1909, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.517294273902389e-05, | |
| "loss": 0.2026, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.5135529490992762e-05, | |
| "loss": 0.1922, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.5098116242961634e-05, | |
| "loss": 0.1892, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.5060702994930506e-05, | |
| "loss": 0.1962, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.502328974689938e-05, | |
| "loss": 0.1987, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.4985876498868251e-05, | |
| "loss": 0.2, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.4948463250837121e-05, | |
| "loss": 0.1784, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.4911050002805995e-05, | |
| "loss": 0.1998, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.4873636754774867e-05, | |
| "loss": 0.1794, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 1.4836223506743739e-05, | |
| "loss": 0.1545, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.479881025871261e-05, | |
| "loss": 0.1596, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.4761397010681484e-05, | |
| "loss": 0.1522, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.4723983762650356e-05, | |
| "loss": 0.1633, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.4686570514619227e-05, | |
| "loss": 0.1421, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 1.4649157266588102e-05, | |
| "loss": 0.1506, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.4611744018556972e-05, | |
| "loss": 0.1563, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.4574330770525844e-05, | |
| "loss": 0.1557, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.4536917522494716e-05, | |
| "loss": 0.1641, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.449950427446359e-05, | |
| "loss": 0.1545, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.4462091026432461e-05, | |
| "loss": 0.1625, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.4424677778401333e-05, | |
| "loss": 0.165, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 1.4387264530370207e-05, | |
| "loss": 0.157, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.4349851282339077e-05, | |
| "loss": 0.1485, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.4312438034307949e-05, | |
| "loss": 0.1571, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.4275024786276821e-05, | |
| "loss": 0.1608, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.4237611538245695e-05, | |
| "loss": 0.162, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 1.4200198290214567e-05, | |
| "loss": 0.154, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.4162785042183438e-05, | |
| "loss": 0.1536, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.4125371794152309e-05, | |
| "loss": 0.164, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.4087958546121184e-05, | |
| "loss": 0.1655, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.4050545298090054e-05, | |
| "loss": 0.1606, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 1.4013132050058926e-05, | |
| "loss": 0.1533, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.39757188020278e-05, | |
| "loss": 0.1515, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.3938305553996672e-05, | |
| "loss": 0.1624, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.3900892305965544e-05, | |
| "loss": 0.1467, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.3863479057934415e-05, | |
| "loss": 0.1582, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 1.3826065809903289e-05, | |
| "loss": 0.163, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.378865256187216e-05, | |
| "loss": 0.1561, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 1.3751239313841031e-05, | |
| "loss": 0.1558, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.3713826065809905e-05, | |
| "loss": 0.1686, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 1.3676412817778777e-05, | |
| "loss": 0.161, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.3638999569747649e-05, | |
| "loss": 0.1574, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.360158632171652e-05, | |
| "loss": 0.1591, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.3564173073685394e-05, | |
| "loss": 0.1618, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.3526759825654266e-05, | |
| "loss": 0.1545, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 1.3489346577623136e-05, | |
| "loss": 0.1626, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 1.345193332959201e-05, | |
| "loss": 0.1588, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.3414520081560882e-05, | |
| "loss": 0.1538, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.3377106833529754e-05, | |
| "loss": 0.1557, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 1.3339693585498626e-05, | |
| "loss": 0.1554, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.33022803374675e-05, | |
| "loss": 0.1639, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.3264867089436371e-05, | |
| "loss": 0.1519, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.3227453841405241e-05, | |
| "loss": 0.1671, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 1.3190040593374113e-05, | |
| "loss": 0.1614, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.3152627345342987e-05, | |
| "loss": 0.1523, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.3115214097311859e-05, | |
| "loss": 0.1564, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.307780084928073e-05, | |
| "loss": 0.1662, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.3040387601249604e-05, | |
| "loss": 0.1667, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.3002974353218476e-05, | |
| "loss": 0.1631, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 1.2965561105187348e-05, | |
| "loss": 0.1561, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.2928147857156219e-05, | |
| "loss": 0.159, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.2890734609125092e-05, | |
| "loss": 0.1618, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.2853321361093964e-05, | |
| "loss": 0.1538, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.2815908113062836e-05, | |
| "loss": 0.1586, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 1.277849486503171e-05, | |
| "loss": 0.16, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 1.2741081617000581e-05, | |
| "loss": 0.1483, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.2703668368969453e-05, | |
| "loss": 0.1201, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.2666255120938324e-05, | |
| "loss": 0.1278, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 1.2628841872907199e-05, | |
| "loss": 0.1348, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.2591428624876069e-05, | |
| "loss": 0.1238, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 1.2554015376844941e-05, | |
| "loss": 0.1215, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.2516602128813815e-05, | |
| "loss": 0.132, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.2479188880782687e-05, | |
| "loss": 0.1244, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 1.2441775632751559e-05, | |
| "loss": 0.1235, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.240436238472043e-05, | |
| "loss": 0.1312, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.2366949136689304e-05, | |
| "loss": 0.1226, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.2329535888658174e-05, | |
| "loss": 0.1292, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.2292122640627046e-05, | |
| "loss": 0.1255, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 1.2254709392595918e-05, | |
| "loss": 0.1258, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.2217296144564792e-05, | |
| "loss": 0.1292, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 1.2179882896533664e-05, | |
| "loss": 0.1298, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.2142469648502536e-05, | |
| "loss": 0.1254, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.210505640047141e-05, | |
| "loss": 0.1241, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.2067643152440281e-05, | |
| "loss": 0.1302, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.2030229904409151e-05, | |
| "loss": 0.1309, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 1.1992816656378023e-05, | |
| "loss": 0.1182, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.1955403408346897e-05, | |
| "loss": 0.1331, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.1917990160315769e-05, | |
| "loss": 0.1289, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 1.188057691228464e-05, | |
| "loss": 0.1149, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.1843163664253514e-05, | |
| "loss": 0.1201, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 1.1805750416222386e-05, | |
| "loss": 0.1218, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.1768337168191256e-05, | |
| "loss": 0.1278, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.1730923920160128e-05, | |
| "loss": 0.1275, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 1.1693510672129002e-05, | |
| "loss": 0.1379, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 1.1656097424097874e-05, | |
| "loss": 0.1298, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 1.1618684176066746e-05, | |
| "loss": 0.1375, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 1.1581270928035618e-05, | |
| "loss": 0.1271, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.1543857680004491e-05, | |
| "loss": 0.1487, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 1.1506444431973363e-05, | |
| "loss": 0.1341, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 1.1469031183942233e-05, | |
| "loss": 0.1314, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 1.1431617935911107e-05, | |
| "loss": 0.1187, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 1.1394204687879979e-05, | |
| "loss": 0.1308, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.1356791439848851e-05, | |
| "loss": 0.1357, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 1.1319378191817723e-05, | |
| "loss": 0.1346, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 1.1281964943786596e-05, | |
| "loss": 0.1302, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 1.1244551695755468e-05, | |
| "loss": 0.1318, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 1.120713844772434e-05, | |
| "loss": 0.1406, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.1169725199693214e-05, | |
| "loss": 0.1287, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 1.1132311951662084e-05, | |
| "loss": 0.1296, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.1094898703630956e-05, | |
| "loss": 0.1239, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.1057485455599828e-05, | |
| "loss": 0.1289, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.1020072207568702e-05, | |
| "loss": 0.1371, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.0982658959537573e-05, | |
| "loss": 0.1371, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.0945245711506445e-05, | |
| "loss": 0.1197, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.0907832463475319e-05, | |
| "loss": 0.1316, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 1.0870419215444191e-05, | |
| "loss": 0.1275, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.0833005967413061e-05, | |
| "loss": 0.1287, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 1.0795592719381933e-05, | |
| "loss": 0.1266, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.0758179471350807e-05, | |
| "loss": 0.1275, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.0720766223319679e-05, | |
| "loss": 0.1244, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 1.068335297528855e-05, | |
| "loss": 0.1453, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.0645939727257422e-05, | |
| "loss": 0.1343, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 1.0608526479226296e-05, | |
| "loss": 0.114, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 1.0571113231195166e-05, | |
| "loss": 0.1038, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 1.0533699983164038e-05, | |
| "loss": 0.1064, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 1.0496286735132912e-05, | |
| "loss": 0.0928, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 1.0458873487101784e-05, | |
| "loss": 0.1079, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.0421460239070656e-05, | |
| "loss": 0.0976, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.0384046991039528e-05, | |
| "loss": 0.1086, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 1.0346633743008401e-05, | |
| "loss": 0.105, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 1.0309220494977273e-05, | |
| "loss": 0.1086, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 1.0271807246946143e-05, | |
| "loss": 0.0972, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.0234393998915017e-05, | |
| "loss": 0.1086, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.0196980750883889e-05, | |
| "loss": 0.1151, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 1.015956750285276e-05, | |
| "loss": 0.1076, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 1.0122154254821633e-05, | |
| "loss": 0.1061, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 1.0084741006790506e-05, | |
| "loss": 0.1087, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 1.0047327758759378e-05, | |
| "loss": 0.1081, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 1.0009914510728248e-05, | |
| "loss": 0.1051, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 9.972501262697122e-06, | |
| "loss": 0.1096, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 9.935088014665994e-06, | |
| "loss": 0.1067, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 9.897674766634866e-06, | |
| "loss": 0.1106, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 9.86026151860374e-06, | |
| "loss": 0.1019, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 9.82284827057261e-06, | |
| "loss": 0.1107, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 9.785435022541483e-06, | |
| "loss": 0.1045, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 9.748021774510355e-06, | |
| "loss": 0.1101, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 9.710608526479227e-06, | |
| "loss": 0.1001, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 9.673195278448099e-06, | |
| "loss": 0.1093, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 9.635782030416971e-06, | |
| "loss": 0.1091, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 9.598368782385845e-06, | |
| "loss": 0.1073, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 9.560955534354715e-06, | |
| "loss": 0.1175, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 9.523542286323588e-06, | |
| "loss": 0.1018, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 9.48612903829246e-06, | |
| "loss": 0.1015, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 9.448715790261332e-06, | |
| "loss": 0.1159, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 9.411302542230204e-06, | |
| "loss": 0.1104, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 9.373889294199076e-06, | |
| "loss": 0.1105, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 9.33647604616795e-06, | |
| "loss": 0.1037, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 9.299062798136822e-06, | |
| "loss": 0.103, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 9.261649550105694e-06, | |
| "loss": 0.1129, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 9.224236302074565e-06, | |
| "loss": 0.1005, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 9.186823054043437e-06, | |
| "loss": 0.1082, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 9.14940980601231e-06, | |
| "loss": 0.1157, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 9.111996557981181e-06, | |
| "loss": 0.1139, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 9.074583309950053e-06, | |
| "loss": 0.1101, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 9.037170061918927e-06, | |
| "loss": 0.1139, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 8.999756813887799e-06, | |
| "loss": 0.1107, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 8.96234356585667e-06, | |
| "loss": 0.1095, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 8.924930317825543e-06, | |
| "loss": 0.1127, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 8.887517069794414e-06, | |
| "loss": 0.1118, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 8.850103821763288e-06, | |
| "loss": 0.1042, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 8.812690573732158e-06, | |
| "loss": 0.1112, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 8.775277325701032e-06, | |
| "loss": 0.1116, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 8.737864077669904e-06, | |
| "loss": 0.1139, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 8.700450829638776e-06, | |
| "loss": 0.1082, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 8.663037581607648e-06, | |
| "loss": 0.1056, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 8.62562433357652e-06, | |
| "loss": 0.102, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 8.588211085545393e-06, | |
| "loss": 0.1026, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 8.550797837514263e-06, | |
| "loss": 0.1103, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 8.513384589483137e-06, | |
| "loss": 0.1147, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 8.475971341452009e-06, | |
| "loss": 0.0773, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 8.43855809342088e-06, | |
| "loss": 0.0812, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 8.401144845389754e-06, | |
| "loss": 0.0801, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 8.363731597358625e-06, | |
| "loss": 0.0884, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 8.326318349327498e-06, | |
| "loss": 0.0914, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 8.28890510129637e-06, | |
| "loss": 0.0868, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 8.251491853265242e-06, | |
| "loss": 0.0948, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 8.214078605234114e-06, | |
| "loss": 0.0808, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 8.176665357202986e-06, | |
| "loss": 0.092, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 8.139252109171858e-06, | |
| "loss": 0.0841, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 8.10183886114073e-06, | |
| "loss": 0.0951, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 8.064425613109603e-06, | |
| "loss": 0.0928, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 8.027012365078475e-06, | |
| "loss": 0.0935, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 7.989599117047347e-06, | |
| "loss": 0.0927, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 7.952185869016219e-06, | |
| "loss": 0.0923, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 7.914772620985091e-06, | |
| "loss": 0.0801, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 7.877359372953963e-06, | |
| "loss": 0.0937, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 7.839946124922837e-06, | |
| "loss": 0.0865, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 7.802532876891707e-06, | |
| "loss": 0.0871, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 7.76511962886058e-06, | |
| "loss": 0.0786, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 7.727706380829452e-06, | |
| "loss": 0.0934, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 7.690293132798324e-06, | |
| "loss": 0.0838, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 7.652879884767196e-06, | |
| "loss": 0.097, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 7.615466636736069e-06, | |
| "loss": 0.0885, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 7.578053388704941e-06, | |
| "loss": 0.0919, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 7.540640140673813e-06, | |
| "loss": 0.0822, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 7.5032268926426856e-06, | |
| "loss": 0.0837, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 7.465813644611558e-06, | |
| "loss": 0.0879, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 7.428400396580429e-06, | |
| "loss": 0.0927, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 7.390987148549302e-06, | |
| "loss": 0.0929, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 7.353573900518174e-06, | |
| "loss": 0.0871, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 7.316160652487047e-06, | |
| "loss": 0.0886, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 7.278747404455918e-06, | |
| "loss": 0.0887, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 7.241334156424791e-06, | |
| "loss": 0.0924, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 7.203920908393663e-06, | |
| "loss": 0.0971, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 7.166507660362535e-06, | |
| "loss": 0.0922, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 7.129094412331407e-06, | |
| "loss": 0.0866, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 7.091681164300279e-06, | |
| "loss": 0.0822, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 7.054267916269152e-06, | |
| "loss": 0.102, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 7.016854668238023e-06, | |
| "loss": 0.091, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 6.979441420206896e-06, | |
| "loss": 0.0937, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 6.942028172175768e-06, | |
| "loss": 0.0795, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 6.9046149241446405e-06, | |
| "loss": 0.0917, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 6.8672016761135115e-06, | |
| "loss": 0.0987, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 6.829788428082384e-06, | |
| "loss": 0.0946, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 6.792375180051257e-06, | |
| "loss": 0.0915, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 6.754961932020129e-06, | |
| "loss": 0.0889, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 6.717548683989002e-06, | |
| "loss": 0.0884, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 6.680135435957873e-06, | |
| "loss": 0.0854, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 6.642722187926746e-06, | |
| "loss": 0.0847, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 6.6053089398956175e-06, | |
| "loss": 0.0907, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 6.5678956918644894e-06, | |
| "loss": 0.0955, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 6.530482443833361e-06, | |
| "loss": 0.095, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 6.493069195802234e-06, | |
| "loss": 0.0948, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 6.455655947771107e-06, | |
| "loss": 0.0774, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 6.418242699739978e-06, | |
| "loss": 0.0973, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 6.380829451708851e-06, | |
| "loss": 0.0976, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 6.343416203677723e-06, | |
| "loss": 0.0674, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 6.306002955646595e-06, | |
| "loss": 0.0735, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 6.2685897076154665e-06, | |
| "loss": 0.0777, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 6.231176459584339e-06, | |
| "loss": 0.0688, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 6.193763211553212e-06, | |
| "loss": 0.0721, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 6.156349963522084e-06, | |
| "loss": 0.0787, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 6.118936715490956e-06, | |
| "loss": 0.0755, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 6.081523467459828e-06, | |
| "loss": 0.072, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 6.0441102194287005e-06, | |
| "loss": 0.0695, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 6.0066969713975724e-06, | |
| "loss": 0.0713, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 5.969283723366444e-06, | |
| "loss": 0.0771, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 5.931870475335316e-06, | |
| "loss": 0.0695, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 5.894457227304189e-06, | |
| "loss": 0.0676, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 5.857043979273062e-06, | |
| "loss": 0.0766, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 5.819630731241933e-06, | |
| "loss": 0.0766, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 5.782217483210806e-06, | |
| "loss": 0.0807, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 5.7448042351796775e-06, | |
| "loss": 0.0834, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 5.70739098714855e-06, | |
| "loss": 0.0789, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 5.669977739117421e-06, | |
| "loss": 0.0691, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 5.632564491086294e-06, | |
| "loss": 0.0777, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 5.595151243055166e-06, | |
| "loss": 0.0725, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 5.557737995024039e-06, | |
| "loss": 0.0788, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 5.520324746992911e-06, | |
| "loss": 0.075, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 5.482911498961783e-06, | |
| "loss": 0.0742, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 5.4454982509306554e-06, | |
| "loss": 0.0666, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 5.4080850028995265e-06, | |
| "loss": 0.0688, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 5.370671754868399e-06, | |
| "loss": 0.0747, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 5.333258506837271e-06, | |
| "loss": 0.0741, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 5.295845258806144e-06, | |
| "loss": 0.0657, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 5.258432010775017e-06, | |
| "loss": 0.0788, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 5.221018762743888e-06, | |
| "loss": 0.0791, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 5.1836055147127605e-06, | |
| "loss": 0.0752, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 5.1461922666816325e-06, | |
| "loss": 0.0762, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 5.108779018650505e-06, | |
| "loss": 0.0771, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 5.071365770619376e-06, | |
| "loss": 0.0727, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 5.033952522588249e-06, | |
| "loss": 0.0843, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 4.996539274557121e-06, | |
| "loss": 0.0741, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 4.959126026525993e-06, | |
| "loss": 0.0721, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 4.921712778494866e-06, | |
| "loss": 0.0814, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 4.884299530463738e-06, | |
| "loss": 0.0703, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 4.8468862824326095e-06, | |
| "loss": 0.0745, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 4.809473034401481e-06, | |
| "loss": 0.0788, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 4.772059786370354e-06, | |
| "loss": 0.0721, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 4.734646538339226e-06, | |
| "loss": 0.0689, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 4.697233290308099e-06, | |
| "loss": 0.0651, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 4.659820042276971e-06, | |
| "loss": 0.0775, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 4.622406794245843e-06, | |
| "loss": 0.069, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 4.584993546214715e-06, | |
| "loss": 0.0807, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 4.547580298183587e-06, | |
| "loss": 0.0817, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 4.510167050152459e-06, | |
| "loss": 0.0771, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 4.472753802121331e-06, | |
| "loss": 0.0683, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 4.435340554090204e-06, | |
| "loss": 0.0704, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 4.397927306059076e-06, | |
| "loss": 0.0852, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 4.360514058027948e-06, | |
| "loss": 0.0773, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 4.323100809996821e-06, | |
| "loss": 0.0694, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 4.2856875619656925e-06, | |
| "loss": 0.0771, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 4.2482743139345644e-06, | |
| "loss": 0.0702, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 4.210861065903436e-06, | |
| "loss": 0.0533, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 4.173447817872308e-06, | |
| "loss": 0.0666, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 4.136034569841181e-06, | |
| "loss": 0.0566, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 4.098621321810054e-06, | |
| "loss": 0.0611, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 4.061208073778926e-06, | |
| "loss": 0.0589, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 4.023794825747798e-06, | |
| "loss": 0.0602, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 3.9863815777166695e-06, | |
| "loss": 0.0615, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 3.948968329685542e-06, | |
| "loss": 0.0682, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 3.911555081654414e-06, | |
| "loss": 0.0678, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 3.874141833623286e-06, | |
| "loss": 0.0586, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 3.836728585592159e-06, | |
| "loss": 0.0641, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 3.799315337561031e-06, | |
| "loss": 0.0586, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 3.761902089529903e-06, | |
| "loss": 0.0684, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 3.724488841498775e-06, | |
| "loss": 0.0594, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 3.687075593467647e-06, | |
| "loss": 0.061, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 3.6496623454365193e-06, | |
| "loss": 0.0587, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 3.6122490974053913e-06, | |
| "loss": 0.0675, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 3.5748358493742636e-06, | |
| "loss": 0.0621, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 3.5374226013431355e-06, | |
| "loss": 0.0556, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 3.5000093533120083e-06, | |
| "loss": 0.0728, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 3.46259610528088e-06, | |
| "loss": 0.0554, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 3.4251828572497525e-06, | |
| "loss": 0.0634, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 3.3877696092186245e-06, | |
| "loss": 0.063, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 3.350356361187497e-06, | |
| "loss": 0.0613, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 3.3129431131563687e-06, | |
| "loss": 0.0755, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 3.275529865125241e-06, | |
| "loss": 0.0616, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 3.238116617094113e-06, | |
| "loss": 0.0513, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 3.2007033690629857e-06, | |
| "loss": 0.0609, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 3.1632901210318577e-06, | |
| "loss": 0.0625, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 3.12587687300073e-06, | |
| "loss": 0.057, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 3.088463624969602e-06, | |
| "loss": 0.0655, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 3.0510503769384743e-06, | |
| "loss": 0.0543, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 3.013637128907346e-06, | |
| "loss": 0.077, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 2.9762238808762185e-06, | |
| "loss": 0.0487, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 2.9388106328450904e-06, | |
| "loss": 0.0655, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 2.9013973848139628e-06, | |
| "loss": 0.0655, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 2.863984136782835e-06, | |
| "loss": 0.0596, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 2.8265708887517075e-06, | |
| "loss": 0.0594, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 2.7891576407205794e-06, | |
| "loss": 0.0737, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 2.7517443926894517e-06, | |
| "loss": 0.0616, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 2.7143311446583236e-06, | |
| "loss": 0.0531, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 2.676917896627196e-06, | |
| "loss": 0.0717, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 2.639504648596068e-06, | |
| "loss": 0.0643, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 2.60209140056494e-06, | |
| "loss": 0.0515, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 2.5646781525338126e-06, | |
| "loss": 0.0612, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 2.527264904502685e-06, | |
| "loss": 0.0562, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 2.489851656471557e-06, | |
| "loss": 0.0605, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 2.452438408440429e-06, | |
| "loss": 0.0621, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 2.415025160409301e-06, | |
| "loss": 0.0639, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 2.3776119123781734e-06, | |
| "loss": 0.0565, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 2.3401986643470454e-06, | |
| "loss": 0.0609, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 2.3027854163159177e-06, | |
| "loss": 0.0613, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 2.26537216828479e-06, | |
| "loss": 0.0673, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 2.227958920253662e-06, | |
| "loss": 0.0598, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 2.190545672222534e-06, | |
| "loss": 0.0649, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 2.1531324241914066e-06, | |
| "loss": 0.0615, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 2.1157191761602786e-06, | |
| "loss": 0.0538, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 2.0783059281291505e-06, | |
| "loss": 0.0462, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 2.040892680098023e-06, | |
| "loss": 0.0542, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 2.003479432066895e-06, | |
| "loss": 0.0529, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 1.966066184035767e-06, | |
| "loss": 0.0585, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 1.9286529360046394e-06, | |
| "loss": 0.0462, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 1.8912396879735116e-06, | |
| "loss": 0.0496, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 1.8538264399423839e-06, | |
| "loss": 0.0477, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 1.816413191911256e-06, | |
| "loss": 0.048, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 1.7789999438801282e-06, | |
| "loss": 0.0591, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 1.7415866958490003e-06, | |
| "loss": 0.0445, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 1.7041734478178726e-06, | |
| "loss": 0.0508, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 1.6667601997867448e-06, | |
| "loss": 0.0534, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 1.6293469517556169e-06, | |
| "loss": 0.046, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 1.591933703724489e-06, | |
| "loss": 0.0454, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 1.554520455693361e-06, | |
| "loss": 0.0601, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 1.5171072076622335e-06, | |
| "loss": 0.0543, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 1.4796939596311056e-06, | |
| "loss": 0.0587, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 1.4422807115999775e-06, | |
| "loss": 0.0526, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 1.4048674635688497e-06, | |
| "loss": 0.065, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 1.3674542155377222e-06, | |
| "loss": 0.0531, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 1.3300409675065941e-06, | |
| "loss": 0.0607, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 1.2926277194754663e-06, | |
| "loss": 0.0425, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 1.2552144714443384e-06, | |
| "loss": 0.0541, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 1.2178012234132107e-06, | |
| "loss": 0.0592, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 1.1803879753820829e-06, | |
| "loss": 0.0494, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 1.142974727350955e-06, | |
| "loss": 0.0548, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 1.1055614793198273e-06, | |
| "loss": 0.0439, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 1.0681482312886995e-06, | |
| "loss": 0.0543, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 1.0307349832575716e-06, | |
| "loss": 0.0604, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 9.933217352264437e-07, | |
| "loss": 0.0546, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 9.559084871953159e-07, | |
| "loss": 0.0576, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 9.184952391641881e-07, | |
| "loss": 0.0441, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 8.810819911330602e-07, | |
| "loss": 0.05, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 8.436687431019325e-07, | |
| "loss": 0.0523, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 8.062554950708046e-07, | |
| "loss": 0.053, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 7.688422470396768e-07, | |
| "loss": 0.0456, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 7.314289990085489e-07, | |
| "loss": 0.0508, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 6.940157509774212e-07, | |
| "loss": 0.0415, | |
| "step": 110000 | |
| } | |
| ], | |
| "max_steps": 113710, | |
| "num_train_epochs": 10, | |
| "total_flos": 2.6986684097812992e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |