bart-base-qqp / trainer_state.json
ZhangYunchenY
[Model] bart-base-qqp
fff6bd9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.67373142203852,
"global_step": 110000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 5.885815185403178e-07,
"loss": 0.6708,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 1.1771630370806356e-06,
"loss": 0.6457,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 1.7657445556209538e-06,
"loss": 0.6149,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 2.3543260741612712e-06,
"loss": 0.5516,
"step": 800
},
{
"epoch": 0.09,
"learning_rate": 2.942907592701589e-06,
"loss": 0.5014,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 3.5314891112419075e-06,
"loss": 0.484,
"step": 1200
},
{
"epoch": 0.12,
"learning_rate": 4.120070629782225e-06,
"loss": 0.4428,
"step": 1400
},
{
"epoch": 0.14,
"learning_rate": 4.7086521483225425e-06,
"loss": 0.4323,
"step": 1600
},
{
"epoch": 0.16,
"learning_rate": 5.297233666862861e-06,
"loss": 0.4234,
"step": 1800
},
{
"epoch": 0.18,
"learning_rate": 5.885815185403178e-06,
"loss": 0.402,
"step": 2000
},
{
"epoch": 0.19,
"learning_rate": 6.474396703943496e-06,
"loss": 0.4142,
"step": 2200
},
{
"epoch": 0.21,
"learning_rate": 7.062978222483815e-06,
"loss": 0.39,
"step": 2400
},
{
"epoch": 0.23,
"learning_rate": 7.651559741024132e-06,
"loss": 0.3842,
"step": 2600
},
{
"epoch": 0.25,
"learning_rate": 8.24014125956445e-06,
"loss": 0.3747,
"step": 2800
},
{
"epoch": 0.26,
"learning_rate": 8.828722778104768e-06,
"loss": 0.37,
"step": 3000
},
{
"epoch": 0.28,
"learning_rate": 9.417304296645085e-06,
"loss": 0.3688,
"step": 3200
},
{
"epoch": 0.3,
"learning_rate": 1.0005885815185404e-05,
"loss": 0.3656,
"step": 3400
},
{
"epoch": 0.32,
"learning_rate": 1.0594467333725723e-05,
"loss": 0.3488,
"step": 3600
},
{
"epoch": 0.33,
"learning_rate": 1.1183048852266041e-05,
"loss": 0.3532,
"step": 3800
},
{
"epoch": 0.35,
"learning_rate": 1.1771630370806357e-05,
"loss": 0.3553,
"step": 4000
},
{
"epoch": 0.37,
"learning_rate": 1.2360211889346675e-05,
"loss": 0.3597,
"step": 4200
},
{
"epoch": 0.39,
"learning_rate": 1.2948793407886992e-05,
"loss": 0.3427,
"step": 4400
},
{
"epoch": 0.4,
"learning_rate": 1.3537374926427311e-05,
"loss": 0.35,
"step": 4600
},
{
"epoch": 0.42,
"learning_rate": 1.412595644496763e-05,
"loss": 0.342,
"step": 4800
},
{
"epoch": 0.44,
"learning_rate": 1.4714537963507947e-05,
"loss": 0.3304,
"step": 5000
},
{
"epoch": 0.46,
"learning_rate": 1.5303119482048264e-05,
"loss": 0.3326,
"step": 5200
},
{
"epoch": 0.47,
"learning_rate": 1.589170100058858e-05,
"loss": 0.3273,
"step": 5400
},
{
"epoch": 0.49,
"learning_rate": 1.64802825191289e-05,
"loss": 0.3226,
"step": 5600
},
{
"epoch": 0.51,
"learning_rate": 1.706886403766922e-05,
"loss": 0.329,
"step": 5800
},
{
"epoch": 0.53,
"learning_rate": 1.7657445556209536e-05,
"loss": 0.3096,
"step": 6000
},
{
"epoch": 0.55,
"learning_rate": 1.8246027074749856e-05,
"loss": 0.3196,
"step": 6200
},
{
"epoch": 0.56,
"learning_rate": 1.883460859329017e-05,
"loss": 0.3228,
"step": 6400
},
{
"epoch": 0.58,
"learning_rate": 1.942319011183049e-05,
"loss": 0.3164,
"step": 6600
},
{
"epoch": 0.6,
"learning_rate": 1.999925173503938e-05,
"loss": 0.2999,
"step": 6800
},
{
"epoch": 0.62,
"learning_rate": 1.996183848700825e-05,
"loss": 0.3205,
"step": 7000
},
{
"epoch": 0.63,
"learning_rate": 1.9924425238977122e-05,
"loss": 0.3053,
"step": 7200
},
{
"epoch": 0.65,
"learning_rate": 1.9887011990945994e-05,
"loss": 0.2995,
"step": 7400
},
{
"epoch": 0.67,
"learning_rate": 1.984959874291487e-05,
"loss": 0.313,
"step": 7600
},
{
"epoch": 0.69,
"learning_rate": 1.981218549488374e-05,
"loss": 0.2996,
"step": 7800
},
{
"epoch": 0.7,
"learning_rate": 1.977477224685261e-05,
"loss": 0.3289,
"step": 8000
},
{
"epoch": 0.72,
"learning_rate": 1.9737358998821485e-05,
"loss": 0.3015,
"step": 8200
},
{
"epoch": 0.74,
"learning_rate": 1.9699945750790357e-05,
"loss": 0.3029,
"step": 8400
},
{
"epoch": 0.76,
"learning_rate": 1.966253250275923e-05,
"loss": 0.3075,
"step": 8600
},
{
"epoch": 0.77,
"learning_rate": 1.96251192547281e-05,
"loss": 0.2972,
"step": 8800
},
{
"epoch": 0.79,
"learning_rate": 1.9587706006696973e-05,
"loss": 0.2984,
"step": 9000
},
{
"epoch": 0.81,
"learning_rate": 1.9550292758665845e-05,
"loss": 0.2894,
"step": 9200
},
{
"epoch": 0.83,
"learning_rate": 1.9512879510634717e-05,
"loss": 0.2857,
"step": 9400
},
{
"epoch": 0.84,
"learning_rate": 1.9475466262603592e-05,
"loss": 0.2812,
"step": 9600
},
{
"epoch": 0.86,
"learning_rate": 1.943805301457246e-05,
"loss": 0.2872,
"step": 9800
},
{
"epoch": 0.88,
"learning_rate": 1.9400639766541333e-05,
"loss": 0.289,
"step": 10000
},
{
"epoch": 0.9,
"learning_rate": 1.9363226518510205e-05,
"loss": 0.2791,
"step": 10200
},
{
"epoch": 0.91,
"learning_rate": 1.932581327047908e-05,
"loss": 0.2912,
"step": 10400
},
{
"epoch": 0.93,
"learning_rate": 1.9288400022447952e-05,
"loss": 0.2872,
"step": 10600
},
{
"epoch": 0.95,
"learning_rate": 1.9250986774416824e-05,
"loss": 0.2766,
"step": 10800
},
{
"epoch": 0.97,
"learning_rate": 1.9213573526385692e-05,
"loss": 0.2819,
"step": 11000
},
{
"epoch": 0.98,
"learning_rate": 1.9176160278354568e-05,
"loss": 0.2795,
"step": 11200
},
{
"epoch": 1.0,
"learning_rate": 1.913874703032344e-05,
"loss": 0.2677,
"step": 11400
},
{
"epoch": 1.02,
"learning_rate": 1.910133378229231e-05,
"loss": 0.2488,
"step": 11600
},
{
"epoch": 1.04,
"learning_rate": 1.9063920534261183e-05,
"loss": 0.256,
"step": 11800
},
{
"epoch": 1.06,
"learning_rate": 1.9026507286230055e-05,
"loss": 0.2489,
"step": 12000
},
{
"epoch": 1.07,
"learning_rate": 1.8989094038198927e-05,
"loss": 0.2525,
"step": 12200
},
{
"epoch": 1.09,
"learning_rate": 1.89516807901678e-05,
"loss": 0.2483,
"step": 12400
},
{
"epoch": 1.11,
"learning_rate": 1.8914267542136674e-05,
"loss": 0.2528,
"step": 12600
},
{
"epoch": 1.13,
"learning_rate": 1.8876854294105543e-05,
"loss": 0.2481,
"step": 12800
},
{
"epoch": 1.14,
"learning_rate": 1.8839441046074415e-05,
"loss": 0.2517,
"step": 13000
},
{
"epoch": 1.16,
"learning_rate": 1.880202779804329e-05,
"loss": 0.2514,
"step": 13200
},
{
"epoch": 1.18,
"learning_rate": 1.8764614550012162e-05,
"loss": 0.2464,
"step": 13400
},
{
"epoch": 1.2,
"learning_rate": 1.8727201301981034e-05,
"loss": 0.2586,
"step": 13600
},
{
"epoch": 1.21,
"learning_rate": 1.8689788053949906e-05,
"loss": 0.2507,
"step": 13800
},
{
"epoch": 1.23,
"learning_rate": 1.8652374805918778e-05,
"loss": 0.2609,
"step": 14000
},
{
"epoch": 1.25,
"learning_rate": 1.861496155788765e-05,
"loss": 0.2368,
"step": 14200
},
{
"epoch": 1.27,
"learning_rate": 1.857754830985652e-05,
"loss": 0.2473,
"step": 14400
},
{
"epoch": 1.28,
"learning_rate": 1.8540135061825394e-05,
"loss": 0.2379,
"step": 14600
},
{
"epoch": 1.3,
"learning_rate": 1.8502721813794265e-05,
"loss": 0.2431,
"step": 14800
},
{
"epoch": 1.32,
"learning_rate": 1.8465308565763137e-05,
"loss": 0.2521,
"step": 15000
},
{
"epoch": 1.34,
"learning_rate": 1.842789531773201e-05,
"loss": 0.2473,
"step": 15200
},
{
"epoch": 1.35,
"learning_rate": 1.8390482069700885e-05,
"loss": 0.2496,
"step": 15400
},
{
"epoch": 1.37,
"learning_rate": 1.8353068821669757e-05,
"loss": 0.2529,
"step": 15600
},
{
"epoch": 1.39,
"learning_rate": 1.8315655573638625e-05,
"loss": 0.2484,
"step": 15800
},
{
"epoch": 1.41,
"learning_rate": 1.8278242325607497e-05,
"loss": 0.2393,
"step": 16000
},
{
"epoch": 1.42,
"learning_rate": 1.8240829077576372e-05,
"loss": 0.2394,
"step": 16200
},
{
"epoch": 1.44,
"learning_rate": 1.8203415829545244e-05,
"loss": 0.2265,
"step": 16400
},
{
"epoch": 1.46,
"learning_rate": 1.8166002581514116e-05,
"loss": 0.2435,
"step": 16600
},
{
"epoch": 1.48,
"learning_rate": 1.8128589333482988e-05,
"loss": 0.2513,
"step": 16800
},
{
"epoch": 1.5,
"learning_rate": 1.809117608545186e-05,
"loss": 0.2478,
"step": 17000
},
{
"epoch": 1.51,
"learning_rate": 1.8053762837420732e-05,
"loss": 0.2601,
"step": 17200
},
{
"epoch": 1.53,
"learning_rate": 1.8016349589389604e-05,
"loss": 0.2371,
"step": 17400
},
{
"epoch": 1.55,
"learning_rate": 1.7978936341358476e-05,
"loss": 0.2504,
"step": 17600
},
{
"epoch": 1.57,
"learning_rate": 1.7941523093327348e-05,
"loss": 0.2414,
"step": 17800
},
{
"epoch": 1.58,
"learning_rate": 1.790410984529622e-05,
"loss": 0.2296,
"step": 18000
},
{
"epoch": 1.6,
"learning_rate": 1.7866696597265095e-05,
"loss": 0.2413,
"step": 18200
},
{
"epoch": 1.62,
"learning_rate": 1.7829283349233967e-05,
"loss": 0.2447,
"step": 18400
},
{
"epoch": 1.64,
"learning_rate": 1.779187010120284e-05,
"loss": 0.2392,
"step": 18600
},
{
"epoch": 1.65,
"learning_rate": 1.7754456853171707e-05,
"loss": 0.2515,
"step": 18800
},
{
"epoch": 1.67,
"learning_rate": 1.7717043605140583e-05,
"loss": 0.2383,
"step": 19000
},
{
"epoch": 1.69,
"learning_rate": 1.7679630357109454e-05,
"loss": 0.2522,
"step": 19200
},
{
"epoch": 1.71,
"learning_rate": 1.7642217109078326e-05,
"loss": 0.244,
"step": 19400
},
{
"epoch": 1.72,
"learning_rate": 1.7604803861047198e-05,
"loss": 0.2368,
"step": 19600
},
{
"epoch": 1.74,
"learning_rate": 1.756739061301607e-05,
"loss": 0.2416,
"step": 19800
},
{
"epoch": 1.76,
"learning_rate": 1.7529977364984942e-05,
"loss": 0.2428,
"step": 20000
},
{
"epoch": 1.78,
"learning_rate": 1.7492564116953814e-05,
"loss": 0.2387,
"step": 20200
},
{
"epoch": 1.79,
"learning_rate": 1.745515086892269e-05,
"loss": 0.2363,
"step": 20400
},
{
"epoch": 1.81,
"learning_rate": 1.7417737620891558e-05,
"loss": 0.2433,
"step": 20600
},
{
"epoch": 1.83,
"learning_rate": 1.738032437286043e-05,
"loss": 0.2395,
"step": 20800
},
{
"epoch": 1.85,
"learning_rate": 1.7342911124829302e-05,
"loss": 0.237,
"step": 21000
},
{
"epoch": 1.86,
"learning_rate": 1.7305497876798177e-05,
"loss": 0.2382,
"step": 21200
},
{
"epoch": 1.88,
"learning_rate": 1.726808462876705e-05,
"loss": 0.2306,
"step": 21400
},
{
"epoch": 1.9,
"learning_rate": 1.723067138073592e-05,
"loss": 0.2356,
"step": 21600
},
{
"epoch": 1.92,
"learning_rate": 1.7193258132704793e-05,
"loss": 0.2473,
"step": 21800
},
{
"epoch": 1.93,
"learning_rate": 1.7155844884673665e-05,
"loss": 0.2428,
"step": 22000
},
{
"epoch": 1.95,
"learning_rate": 1.7118431636642537e-05,
"loss": 0.2302,
"step": 22200
},
{
"epoch": 1.97,
"learning_rate": 1.708101838861141e-05,
"loss": 0.2235,
"step": 22400
},
{
"epoch": 1.99,
"learning_rate": 1.704360514058028e-05,
"loss": 0.2271,
"step": 22600
},
{
"epoch": 2.01,
"learning_rate": 1.7006191892549152e-05,
"loss": 0.2229,
"step": 22800
},
{
"epoch": 2.02,
"learning_rate": 1.6968778644518024e-05,
"loss": 0.2,
"step": 23000
},
{
"epoch": 2.04,
"learning_rate": 1.69313653964869e-05,
"loss": 0.1905,
"step": 23200
},
{
"epoch": 2.06,
"learning_rate": 1.689395214845577e-05,
"loss": 0.1908,
"step": 23400
},
{
"epoch": 2.08,
"learning_rate": 1.685653890042464e-05,
"loss": 0.2062,
"step": 23600
},
{
"epoch": 2.09,
"learning_rate": 1.6819125652393512e-05,
"loss": 0.1926,
"step": 23800
},
{
"epoch": 2.11,
"learning_rate": 1.6781712404362387e-05,
"loss": 0.1867,
"step": 24000
},
{
"epoch": 2.13,
"learning_rate": 1.674429915633126e-05,
"loss": 0.1892,
"step": 24200
},
{
"epoch": 2.15,
"learning_rate": 1.670688590830013e-05,
"loss": 0.1853,
"step": 24400
},
{
"epoch": 2.16,
"learning_rate": 1.6669472660269003e-05,
"loss": 0.1972,
"step": 24600
},
{
"epoch": 2.18,
"learning_rate": 1.6632059412237875e-05,
"loss": 0.1916,
"step": 24800
},
{
"epoch": 2.2,
"learning_rate": 1.6594646164206747e-05,
"loss": 0.1836,
"step": 25000
},
{
"epoch": 2.22,
"learning_rate": 1.655723291617562e-05,
"loss": 0.1855,
"step": 25200
},
{
"epoch": 2.23,
"learning_rate": 1.651981966814449e-05,
"loss": 0.2007,
"step": 25400
},
{
"epoch": 2.25,
"learning_rate": 1.6482406420113363e-05,
"loss": 0.1857,
"step": 25600
},
{
"epoch": 2.27,
"learning_rate": 1.6444993172082235e-05,
"loss": 0.2067,
"step": 25800
},
{
"epoch": 2.29,
"learning_rate": 1.6407579924051106e-05,
"loss": 0.2096,
"step": 26000
},
{
"epoch": 2.3,
"learning_rate": 1.6370166676019982e-05,
"loss": 0.2039,
"step": 26200
},
{
"epoch": 2.32,
"learning_rate": 1.6332753427988854e-05,
"loss": 0.1985,
"step": 26400
},
{
"epoch": 2.34,
"learning_rate": 1.6295340179957722e-05,
"loss": 0.1927,
"step": 26600
},
{
"epoch": 2.36,
"learning_rate": 1.6257926931926597e-05,
"loss": 0.2015,
"step": 26800
},
{
"epoch": 2.37,
"learning_rate": 1.622051368389547e-05,
"loss": 0.1822,
"step": 27000
},
{
"epoch": 2.39,
"learning_rate": 1.618310043586434e-05,
"loss": 0.1957,
"step": 27200
},
{
"epoch": 2.41,
"learning_rate": 1.6145687187833213e-05,
"loss": 0.1936,
"step": 27400
},
{
"epoch": 2.43,
"learning_rate": 1.6108273939802085e-05,
"loss": 0.1896,
"step": 27600
},
{
"epoch": 2.44,
"learning_rate": 1.6070860691770957e-05,
"loss": 0.199,
"step": 27800
},
{
"epoch": 2.46,
"learning_rate": 1.603344744373983e-05,
"loss": 0.1954,
"step": 28000
},
{
"epoch": 2.48,
"learning_rate": 1.5996034195708704e-05,
"loss": 0.1918,
"step": 28200
},
{
"epoch": 2.5,
"learning_rate": 1.5958620947677573e-05,
"loss": 0.1902,
"step": 28400
},
{
"epoch": 2.52,
"learning_rate": 1.5921207699646445e-05,
"loss": 0.201,
"step": 28600
},
{
"epoch": 2.53,
"learning_rate": 1.5883794451615317e-05,
"loss": 0.1888,
"step": 28800
},
{
"epoch": 2.55,
"learning_rate": 1.5846381203584192e-05,
"loss": 0.1932,
"step": 29000
},
{
"epoch": 2.57,
"learning_rate": 1.5808967955553064e-05,
"loss": 0.2016,
"step": 29200
},
{
"epoch": 2.59,
"learning_rate": 1.5771554707521936e-05,
"loss": 0.199,
"step": 29400
},
{
"epoch": 2.6,
"learning_rate": 1.5734141459490804e-05,
"loss": 0.1854,
"step": 29600
},
{
"epoch": 2.62,
"learning_rate": 1.569672821145968e-05,
"loss": 0.1935,
"step": 29800
},
{
"epoch": 2.64,
"learning_rate": 1.565931496342855e-05,
"loss": 0.1902,
"step": 30000
},
{
"epoch": 2.66,
"learning_rate": 1.5621901715397423e-05,
"loss": 0.1831,
"step": 30200
},
{
"epoch": 2.67,
"learning_rate": 1.5584488467366295e-05,
"loss": 0.1978,
"step": 30400
},
{
"epoch": 2.69,
"learning_rate": 1.5547075219335167e-05,
"loss": 0.1938,
"step": 30600
},
{
"epoch": 2.71,
"learning_rate": 1.550966197130404e-05,
"loss": 0.1911,
"step": 30800
},
{
"epoch": 2.73,
"learning_rate": 1.547224872327291e-05,
"loss": 0.1847,
"step": 31000
},
{
"epoch": 2.74,
"learning_rate": 1.5434835475241786e-05,
"loss": 0.1919,
"step": 31200
},
{
"epoch": 2.76,
"learning_rate": 1.5397422227210655e-05,
"loss": 0.1944,
"step": 31400
},
{
"epoch": 2.78,
"learning_rate": 1.5360008979179527e-05,
"loss": 0.1809,
"step": 31600
},
{
"epoch": 2.8,
"learning_rate": 1.5322595731148402e-05,
"loss": 0.1986,
"step": 31800
},
{
"epoch": 2.81,
"learning_rate": 1.5285182483117274e-05,
"loss": 0.1927,
"step": 32000
},
{
"epoch": 2.83,
"learning_rate": 1.5247769235086144e-05,
"loss": 0.1955,
"step": 32200
},
{
"epoch": 2.85,
"learning_rate": 1.5210355987055016e-05,
"loss": 0.1909,
"step": 32400
},
{
"epoch": 2.87,
"learning_rate": 1.517294273902389e-05,
"loss": 0.2026,
"step": 32600
},
{
"epoch": 2.88,
"learning_rate": 1.5135529490992762e-05,
"loss": 0.1922,
"step": 32800
},
{
"epoch": 2.9,
"learning_rate": 1.5098116242961634e-05,
"loss": 0.1892,
"step": 33000
},
{
"epoch": 2.92,
"learning_rate": 1.5060702994930506e-05,
"loss": 0.1962,
"step": 33200
},
{
"epoch": 2.94,
"learning_rate": 1.502328974689938e-05,
"loss": 0.1987,
"step": 33400
},
{
"epoch": 2.95,
"learning_rate": 1.4985876498868251e-05,
"loss": 0.2,
"step": 33600
},
{
"epoch": 2.97,
"learning_rate": 1.4948463250837121e-05,
"loss": 0.1784,
"step": 33800
},
{
"epoch": 2.99,
"learning_rate": 1.4911050002805995e-05,
"loss": 0.1998,
"step": 34000
},
{
"epoch": 3.01,
"learning_rate": 1.4873636754774867e-05,
"loss": 0.1794,
"step": 34200
},
{
"epoch": 3.03,
"learning_rate": 1.4836223506743739e-05,
"loss": 0.1545,
"step": 34400
},
{
"epoch": 3.04,
"learning_rate": 1.479881025871261e-05,
"loss": 0.1596,
"step": 34600
},
{
"epoch": 3.06,
"learning_rate": 1.4761397010681484e-05,
"loss": 0.1522,
"step": 34800
},
{
"epoch": 3.08,
"learning_rate": 1.4723983762650356e-05,
"loss": 0.1633,
"step": 35000
},
{
"epoch": 3.1,
"learning_rate": 1.4686570514619227e-05,
"loss": 0.1421,
"step": 35200
},
{
"epoch": 3.11,
"learning_rate": 1.4649157266588102e-05,
"loss": 0.1506,
"step": 35400
},
{
"epoch": 3.13,
"learning_rate": 1.4611744018556972e-05,
"loss": 0.1563,
"step": 35600
},
{
"epoch": 3.15,
"learning_rate": 1.4574330770525844e-05,
"loss": 0.1557,
"step": 35800
},
{
"epoch": 3.17,
"learning_rate": 1.4536917522494716e-05,
"loss": 0.1641,
"step": 36000
},
{
"epoch": 3.18,
"learning_rate": 1.449950427446359e-05,
"loss": 0.1545,
"step": 36200
},
{
"epoch": 3.2,
"learning_rate": 1.4462091026432461e-05,
"loss": 0.1625,
"step": 36400
},
{
"epoch": 3.22,
"learning_rate": 1.4424677778401333e-05,
"loss": 0.165,
"step": 36600
},
{
"epoch": 3.24,
"learning_rate": 1.4387264530370207e-05,
"loss": 0.157,
"step": 36800
},
{
"epoch": 3.25,
"learning_rate": 1.4349851282339077e-05,
"loss": 0.1485,
"step": 37000
},
{
"epoch": 3.27,
"learning_rate": 1.4312438034307949e-05,
"loss": 0.1571,
"step": 37200
},
{
"epoch": 3.29,
"learning_rate": 1.4275024786276821e-05,
"loss": 0.1608,
"step": 37400
},
{
"epoch": 3.31,
"learning_rate": 1.4237611538245695e-05,
"loss": 0.162,
"step": 37600
},
{
"epoch": 3.32,
"learning_rate": 1.4200198290214567e-05,
"loss": 0.154,
"step": 37800
},
{
"epoch": 3.34,
"learning_rate": 1.4162785042183438e-05,
"loss": 0.1536,
"step": 38000
},
{
"epoch": 3.36,
"learning_rate": 1.4125371794152309e-05,
"loss": 0.164,
"step": 38200
},
{
"epoch": 3.38,
"learning_rate": 1.4087958546121184e-05,
"loss": 0.1655,
"step": 38400
},
{
"epoch": 3.39,
"learning_rate": 1.4050545298090054e-05,
"loss": 0.1606,
"step": 38600
},
{
"epoch": 3.41,
"learning_rate": 1.4013132050058926e-05,
"loss": 0.1533,
"step": 38800
},
{
"epoch": 3.43,
"learning_rate": 1.39757188020278e-05,
"loss": 0.1515,
"step": 39000
},
{
"epoch": 3.45,
"learning_rate": 1.3938305553996672e-05,
"loss": 0.1624,
"step": 39200
},
{
"epoch": 3.46,
"learning_rate": 1.3900892305965544e-05,
"loss": 0.1467,
"step": 39400
},
{
"epoch": 3.48,
"learning_rate": 1.3863479057934415e-05,
"loss": 0.1582,
"step": 39600
},
{
"epoch": 3.5,
"learning_rate": 1.3826065809903289e-05,
"loss": 0.163,
"step": 39800
},
{
"epoch": 3.52,
"learning_rate": 1.378865256187216e-05,
"loss": 0.1561,
"step": 40000
},
{
"epoch": 3.54,
"learning_rate": 1.3751239313841031e-05,
"loss": 0.1558,
"step": 40200
},
{
"epoch": 3.55,
"learning_rate": 1.3713826065809905e-05,
"loss": 0.1686,
"step": 40400
},
{
"epoch": 3.57,
"learning_rate": 1.3676412817778777e-05,
"loss": 0.161,
"step": 40600
},
{
"epoch": 3.59,
"learning_rate": 1.3638999569747649e-05,
"loss": 0.1574,
"step": 40800
},
{
"epoch": 3.61,
"learning_rate": 1.360158632171652e-05,
"loss": 0.1591,
"step": 41000
},
{
"epoch": 3.62,
"learning_rate": 1.3564173073685394e-05,
"loss": 0.1618,
"step": 41200
},
{
"epoch": 3.64,
"learning_rate": 1.3526759825654266e-05,
"loss": 0.1545,
"step": 41400
},
{
"epoch": 3.66,
"learning_rate": 1.3489346577623136e-05,
"loss": 0.1626,
"step": 41600
},
{
"epoch": 3.68,
"learning_rate": 1.345193332959201e-05,
"loss": 0.1588,
"step": 41800
},
{
"epoch": 3.69,
"learning_rate": 1.3414520081560882e-05,
"loss": 0.1538,
"step": 42000
},
{
"epoch": 3.71,
"learning_rate": 1.3377106833529754e-05,
"loss": 0.1557,
"step": 42200
},
{
"epoch": 3.73,
"learning_rate": 1.3339693585498626e-05,
"loss": 0.1554,
"step": 42400
},
{
"epoch": 3.75,
"learning_rate": 1.33022803374675e-05,
"loss": 0.1639,
"step": 42600
},
{
"epoch": 3.76,
"learning_rate": 1.3264867089436371e-05,
"loss": 0.1519,
"step": 42800
},
{
"epoch": 3.78,
"learning_rate": 1.3227453841405241e-05,
"loss": 0.1671,
"step": 43000
},
{
"epoch": 3.8,
"learning_rate": 1.3190040593374113e-05,
"loss": 0.1614,
"step": 43200
},
{
"epoch": 3.82,
"learning_rate": 1.3152627345342987e-05,
"loss": 0.1523,
"step": 43400
},
{
"epoch": 3.83,
"learning_rate": 1.3115214097311859e-05,
"loss": 0.1564,
"step": 43600
},
{
"epoch": 3.85,
"learning_rate": 1.307780084928073e-05,
"loss": 0.1662,
"step": 43800
},
{
"epoch": 3.87,
"learning_rate": 1.3040387601249604e-05,
"loss": 0.1667,
"step": 44000
},
{
"epoch": 3.89,
"learning_rate": 1.3002974353218476e-05,
"loss": 0.1631,
"step": 44200
},
{
"epoch": 3.9,
"learning_rate": 1.2965561105187348e-05,
"loss": 0.1561,
"step": 44400
},
{
"epoch": 3.92,
"learning_rate": 1.2928147857156219e-05,
"loss": 0.159,
"step": 44600
},
{
"epoch": 3.94,
"learning_rate": 1.2890734609125092e-05,
"loss": 0.1618,
"step": 44800
},
{
"epoch": 3.96,
"learning_rate": 1.2853321361093964e-05,
"loss": 0.1538,
"step": 45000
},
{
"epoch": 3.98,
"learning_rate": 1.2815908113062836e-05,
"loss": 0.1586,
"step": 45200
},
{
"epoch": 3.99,
"learning_rate": 1.277849486503171e-05,
"loss": 0.16,
"step": 45400
},
{
"epoch": 4.01,
"learning_rate": 1.2741081617000581e-05,
"loss": 0.1483,
"step": 45600
},
{
"epoch": 4.03,
"learning_rate": 1.2703668368969453e-05,
"loss": 0.1201,
"step": 45800
},
{
"epoch": 4.05,
"learning_rate": 1.2666255120938324e-05,
"loss": 0.1278,
"step": 46000
},
{
"epoch": 4.06,
"learning_rate": 1.2628841872907199e-05,
"loss": 0.1348,
"step": 46200
},
{
"epoch": 4.08,
"learning_rate": 1.2591428624876069e-05,
"loss": 0.1238,
"step": 46400
},
{
"epoch": 4.1,
"learning_rate": 1.2554015376844941e-05,
"loss": 0.1215,
"step": 46600
},
{
"epoch": 4.12,
"learning_rate": 1.2516602128813815e-05,
"loss": 0.132,
"step": 46800
},
{
"epoch": 4.13,
"learning_rate": 1.2479188880782687e-05,
"loss": 0.1244,
"step": 47000
},
{
"epoch": 4.15,
"learning_rate": 1.2441775632751559e-05,
"loss": 0.1235,
"step": 47200
},
{
"epoch": 4.17,
"learning_rate": 1.240436238472043e-05,
"loss": 0.1312,
"step": 47400
},
{
"epoch": 4.19,
"learning_rate": 1.2366949136689304e-05,
"loss": 0.1226,
"step": 47600
},
{
"epoch": 4.2,
"learning_rate": 1.2329535888658174e-05,
"loss": 0.1292,
"step": 47800
},
{
"epoch": 4.22,
"learning_rate": 1.2292122640627046e-05,
"loss": 0.1255,
"step": 48000
},
{
"epoch": 4.24,
"learning_rate": 1.2254709392595918e-05,
"loss": 0.1258,
"step": 48200
},
{
"epoch": 4.26,
"learning_rate": 1.2217296144564792e-05,
"loss": 0.1292,
"step": 48400
},
{
"epoch": 4.27,
"learning_rate": 1.2179882896533664e-05,
"loss": 0.1298,
"step": 48600
},
{
"epoch": 4.29,
"learning_rate": 1.2142469648502536e-05,
"loss": 0.1254,
"step": 48800
},
{
"epoch": 4.31,
"learning_rate": 1.210505640047141e-05,
"loss": 0.1241,
"step": 49000
},
{
"epoch": 4.33,
"learning_rate": 1.2067643152440281e-05,
"loss": 0.1302,
"step": 49200
},
{
"epoch": 4.34,
"learning_rate": 1.2030229904409151e-05,
"loss": 0.1309,
"step": 49400
},
{
"epoch": 4.36,
"learning_rate": 1.1992816656378023e-05,
"loss": 0.1182,
"step": 49600
},
{
"epoch": 4.38,
"learning_rate": 1.1955403408346897e-05,
"loss": 0.1331,
"step": 49800
},
{
"epoch": 4.4,
"learning_rate": 1.1917990160315769e-05,
"loss": 0.1289,
"step": 50000
},
{
"epoch": 4.41,
"learning_rate": 1.188057691228464e-05,
"loss": 0.1149,
"step": 50200
},
{
"epoch": 4.43,
"learning_rate": 1.1843163664253514e-05,
"loss": 0.1201,
"step": 50400
},
{
"epoch": 4.45,
"learning_rate": 1.1805750416222386e-05,
"loss": 0.1218,
"step": 50600
},
{
"epoch": 4.47,
"learning_rate": 1.1768337168191256e-05,
"loss": 0.1278,
"step": 50800
},
{
"epoch": 4.49,
"learning_rate": 1.1730923920160128e-05,
"loss": 0.1275,
"step": 51000
},
{
"epoch": 4.5,
"learning_rate": 1.1693510672129002e-05,
"loss": 0.1379,
"step": 51200
},
{
"epoch": 4.52,
"learning_rate": 1.1656097424097874e-05,
"loss": 0.1298,
"step": 51400
},
{
"epoch": 4.54,
"learning_rate": 1.1618684176066746e-05,
"loss": 0.1375,
"step": 51600
},
{
"epoch": 4.56,
"learning_rate": 1.1581270928035618e-05,
"loss": 0.1271,
"step": 51800
},
{
"epoch": 4.57,
"learning_rate": 1.1543857680004491e-05,
"loss": 0.1487,
"step": 52000
},
{
"epoch": 4.59,
"learning_rate": 1.1506444431973363e-05,
"loss": 0.1341,
"step": 52200
},
{
"epoch": 4.61,
"learning_rate": 1.1469031183942233e-05,
"loss": 0.1314,
"step": 52400
},
{
"epoch": 4.63,
"learning_rate": 1.1431617935911107e-05,
"loss": 0.1187,
"step": 52600
},
{
"epoch": 4.64,
"learning_rate": 1.1394204687879979e-05,
"loss": 0.1308,
"step": 52800
},
{
"epoch": 4.66,
"learning_rate": 1.1356791439848851e-05,
"loss": 0.1357,
"step": 53000
},
{
"epoch": 4.68,
"learning_rate": 1.1319378191817723e-05,
"loss": 0.1346,
"step": 53200
},
{
"epoch": 4.7,
"learning_rate": 1.1281964943786596e-05,
"loss": 0.1302,
"step": 53400
},
{
"epoch": 4.71,
"learning_rate": 1.1244551695755468e-05,
"loss": 0.1318,
"step": 53600
},
{
"epoch": 4.73,
"learning_rate": 1.120713844772434e-05,
"loss": 0.1406,
"step": 53800
},
{
"epoch": 4.75,
"learning_rate": 1.1169725199693214e-05,
"loss": 0.1287,
"step": 54000
},
{
"epoch": 4.77,
"learning_rate": 1.1132311951662084e-05,
"loss": 0.1296,
"step": 54200
},
{
"epoch": 4.78,
"learning_rate": 1.1094898703630956e-05,
"loss": 0.1239,
"step": 54400
},
{
"epoch": 4.8,
"learning_rate": 1.1057485455599828e-05,
"loss": 0.1289,
"step": 54600
},
{
"epoch": 4.82,
"learning_rate": 1.1020072207568702e-05,
"loss": 0.1371,
"step": 54800
},
{
"epoch": 4.84,
"learning_rate": 1.0982658959537573e-05,
"loss": 0.1371,
"step": 55000
},
{
"epoch": 4.85,
"learning_rate": 1.0945245711506445e-05,
"loss": 0.1197,
"step": 55200
},
{
"epoch": 4.87,
"learning_rate": 1.0907832463475319e-05,
"loss": 0.1316,
"step": 55400
},
{
"epoch": 4.89,
"learning_rate": 1.0870419215444191e-05,
"loss": 0.1275,
"step": 55600
},
{
"epoch": 4.91,
"learning_rate": 1.0833005967413061e-05,
"loss": 0.1287,
"step": 55800
},
{
"epoch": 4.92,
"learning_rate": 1.0795592719381933e-05,
"loss": 0.1266,
"step": 56000
},
{
"epoch": 4.94,
"learning_rate": 1.0758179471350807e-05,
"loss": 0.1275,
"step": 56200
},
{
"epoch": 4.96,
"learning_rate": 1.0720766223319679e-05,
"loss": 0.1244,
"step": 56400
},
{
"epoch": 4.98,
"learning_rate": 1.068335297528855e-05,
"loss": 0.1453,
"step": 56600
},
{
"epoch": 5.0,
"learning_rate": 1.0645939727257422e-05,
"loss": 0.1343,
"step": 56800
},
{
"epoch": 5.01,
"learning_rate": 1.0608526479226296e-05,
"loss": 0.114,
"step": 57000
},
{
"epoch": 5.03,
"learning_rate": 1.0571113231195166e-05,
"loss": 0.1038,
"step": 57200
},
{
"epoch": 5.05,
"learning_rate": 1.0533699983164038e-05,
"loss": 0.1064,
"step": 57400
},
{
"epoch": 5.07,
"learning_rate": 1.0496286735132912e-05,
"loss": 0.0928,
"step": 57600
},
{
"epoch": 5.08,
"learning_rate": 1.0458873487101784e-05,
"loss": 0.1079,
"step": 57800
},
{
"epoch": 5.1,
"learning_rate": 1.0421460239070656e-05,
"loss": 0.0976,
"step": 58000
},
{
"epoch": 5.12,
"learning_rate": 1.0384046991039528e-05,
"loss": 0.1086,
"step": 58200
},
{
"epoch": 5.14,
"learning_rate": 1.0346633743008401e-05,
"loss": 0.105,
"step": 58400
},
{
"epoch": 5.15,
"learning_rate": 1.0309220494977273e-05,
"loss": 0.1086,
"step": 58600
},
{
"epoch": 5.17,
"learning_rate": 1.0271807246946143e-05,
"loss": 0.0972,
"step": 58800
},
{
"epoch": 5.19,
"learning_rate": 1.0234393998915017e-05,
"loss": 0.1086,
"step": 59000
},
{
"epoch": 5.21,
"learning_rate": 1.0196980750883889e-05,
"loss": 0.1151,
"step": 59200
},
{
"epoch": 5.22,
"learning_rate": 1.015956750285276e-05,
"loss": 0.1076,
"step": 59400
},
{
"epoch": 5.24,
"learning_rate": 1.0122154254821633e-05,
"loss": 0.1061,
"step": 59600
},
{
"epoch": 5.26,
"learning_rate": 1.0084741006790506e-05,
"loss": 0.1087,
"step": 59800
},
{
"epoch": 5.28,
"learning_rate": 1.0047327758759378e-05,
"loss": 0.1081,
"step": 60000
},
{
"epoch": 5.29,
"learning_rate": 1.0009914510728248e-05,
"loss": 0.1051,
"step": 60200
},
{
"epoch": 5.31,
"learning_rate": 9.972501262697122e-06,
"loss": 0.1096,
"step": 60400
},
{
"epoch": 5.33,
"learning_rate": 9.935088014665994e-06,
"loss": 0.1067,
"step": 60600
},
{
"epoch": 5.35,
"learning_rate": 9.897674766634866e-06,
"loss": 0.1106,
"step": 60800
},
{
"epoch": 5.36,
"learning_rate": 9.86026151860374e-06,
"loss": 0.1019,
"step": 61000
},
{
"epoch": 5.38,
"learning_rate": 9.82284827057261e-06,
"loss": 0.1107,
"step": 61200
},
{
"epoch": 5.4,
"learning_rate": 9.785435022541483e-06,
"loss": 0.1045,
"step": 61400
},
{
"epoch": 5.42,
"learning_rate": 9.748021774510355e-06,
"loss": 0.1101,
"step": 61600
},
{
"epoch": 5.43,
"learning_rate": 9.710608526479227e-06,
"loss": 0.1001,
"step": 61800
},
{
"epoch": 5.45,
"learning_rate": 9.673195278448099e-06,
"loss": 0.1093,
"step": 62000
},
{
"epoch": 5.47,
"learning_rate": 9.635782030416971e-06,
"loss": 0.1091,
"step": 62200
},
{
"epoch": 5.49,
"learning_rate": 9.598368782385845e-06,
"loss": 0.1073,
"step": 62400
},
{
"epoch": 5.51,
"learning_rate": 9.560955534354715e-06,
"loss": 0.1175,
"step": 62600
},
{
"epoch": 5.52,
"learning_rate": 9.523542286323588e-06,
"loss": 0.1018,
"step": 62800
},
{
"epoch": 5.54,
"learning_rate": 9.48612903829246e-06,
"loss": 0.1015,
"step": 63000
},
{
"epoch": 5.56,
"learning_rate": 9.448715790261332e-06,
"loss": 0.1159,
"step": 63200
},
{
"epoch": 5.58,
"learning_rate": 9.411302542230204e-06,
"loss": 0.1104,
"step": 63400
},
{
"epoch": 5.59,
"learning_rate": 9.373889294199076e-06,
"loss": 0.1105,
"step": 63600
},
{
"epoch": 5.61,
"learning_rate": 9.33647604616795e-06,
"loss": 0.1037,
"step": 63800
},
{
"epoch": 5.63,
"learning_rate": 9.299062798136822e-06,
"loss": 0.103,
"step": 64000
},
{
"epoch": 5.65,
"learning_rate": 9.261649550105694e-06,
"loss": 0.1129,
"step": 64200
},
{
"epoch": 5.66,
"learning_rate": 9.224236302074565e-06,
"loss": 0.1005,
"step": 64400
},
{
"epoch": 5.68,
"learning_rate": 9.186823054043437e-06,
"loss": 0.1082,
"step": 64600
},
{
"epoch": 5.7,
"learning_rate": 9.14940980601231e-06,
"loss": 0.1157,
"step": 64800
},
{
"epoch": 5.72,
"learning_rate": 9.111996557981181e-06,
"loss": 0.1139,
"step": 65000
},
{
"epoch": 5.73,
"learning_rate": 9.074583309950053e-06,
"loss": 0.1101,
"step": 65200
},
{
"epoch": 5.75,
"learning_rate": 9.037170061918927e-06,
"loss": 0.1139,
"step": 65400
},
{
"epoch": 5.77,
"learning_rate": 8.999756813887799e-06,
"loss": 0.1107,
"step": 65600
},
{
"epoch": 5.79,
"learning_rate": 8.96234356585667e-06,
"loss": 0.1095,
"step": 65800
},
{
"epoch": 5.8,
"learning_rate": 8.924930317825543e-06,
"loss": 0.1127,
"step": 66000
},
{
"epoch": 5.82,
"learning_rate": 8.887517069794414e-06,
"loss": 0.1118,
"step": 66200
},
{
"epoch": 5.84,
"learning_rate": 8.850103821763288e-06,
"loss": 0.1042,
"step": 66400
},
{
"epoch": 5.86,
"learning_rate": 8.812690573732158e-06,
"loss": 0.1112,
"step": 66600
},
{
"epoch": 5.87,
"learning_rate": 8.775277325701032e-06,
"loss": 0.1116,
"step": 66800
},
{
"epoch": 5.89,
"learning_rate": 8.737864077669904e-06,
"loss": 0.1139,
"step": 67000
},
{
"epoch": 5.91,
"learning_rate": 8.700450829638776e-06,
"loss": 0.1082,
"step": 67200
},
{
"epoch": 5.93,
"learning_rate": 8.663037581607648e-06,
"loss": 0.1056,
"step": 67400
},
{
"epoch": 5.94,
"learning_rate": 8.62562433357652e-06,
"loss": 0.102,
"step": 67600
},
{
"epoch": 5.96,
"learning_rate": 8.588211085545393e-06,
"loss": 0.1026,
"step": 67800
},
{
"epoch": 5.98,
"learning_rate": 8.550797837514263e-06,
"loss": 0.1103,
"step": 68000
},
{
"epoch": 6.0,
"learning_rate": 8.513384589483137e-06,
"loss": 0.1147,
"step": 68200
},
{
"epoch": 6.02,
"learning_rate": 8.475971341452009e-06,
"loss": 0.0773,
"step": 68400
},
{
"epoch": 6.03,
"learning_rate": 8.43855809342088e-06,
"loss": 0.0812,
"step": 68600
},
{
"epoch": 6.05,
"learning_rate": 8.401144845389754e-06,
"loss": 0.0801,
"step": 68800
},
{
"epoch": 6.07,
"learning_rate": 8.363731597358625e-06,
"loss": 0.0884,
"step": 69000
},
{
"epoch": 6.09,
"learning_rate": 8.326318349327498e-06,
"loss": 0.0914,
"step": 69200
},
{
"epoch": 6.1,
"learning_rate": 8.28890510129637e-06,
"loss": 0.0868,
"step": 69400
},
{
"epoch": 6.12,
"learning_rate": 8.251491853265242e-06,
"loss": 0.0948,
"step": 69600
},
{
"epoch": 6.14,
"learning_rate": 8.214078605234114e-06,
"loss": 0.0808,
"step": 69800
},
{
"epoch": 6.16,
"learning_rate": 8.176665357202986e-06,
"loss": 0.092,
"step": 70000
},
{
"epoch": 6.17,
"learning_rate": 8.139252109171858e-06,
"loss": 0.0841,
"step": 70200
},
{
"epoch": 6.19,
"learning_rate": 8.10183886114073e-06,
"loss": 0.0951,
"step": 70400
},
{
"epoch": 6.21,
"learning_rate": 8.064425613109603e-06,
"loss": 0.0928,
"step": 70600
},
{
"epoch": 6.23,
"learning_rate": 8.027012365078475e-06,
"loss": 0.0935,
"step": 70800
},
{
"epoch": 6.24,
"learning_rate": 7.989599117047347e-06,
"loss": 0.0927,
"step": 71000
},
{
"epoch": 6.26,
"learning_rate": 7.952185869016219e-06,
"loss": 0.0923,
"step": 71200
},
{
"epoch": 6.28,
"learning_rate": 7.914772620985091e-06,
"loss": 0.0801,
"step": 71400
},
{
"epoch": 6.3,
"learning_rate": 7.877359372953963e-06,
"loss": 0.0937,
"step": 71600
},
{
"epoch": 6.31,
"learning_rate": 7.839946124922837e-06,
"loss": 0.0865,
"step": 71800
},
{
"epoch": 6.33,
"learning_rate": 7.802532876891707e-06,
"loss": 0.0871,
"step": 72000
},
{
"epoch": 6.35,
"learning_rate": 7.76511962886058e-06,
"loss": 0.0786,
"step": 72200
},
{
"epoch": 6.37,
"learning_rate": 7.727706380829452e-06,
"loss": 0.0934,
"step": 72400
},
{
"epoch": 6.38,
"learning_rate": 7.690293132798324e-06,
"loss": 0.0838,
"step": 72600
},
{
"epoch": 6.4,
"learning_rate": 7.652879884767196e-06,
"loss": 0.097,
"step": 72800
},
{
"epoch": 6.42,
"learning_rate": 7.615466636736069e-06,
"loss": 0.0885,
"step": 73000
},
{
"epoch": 6.44,
"learning_rate": 7.578053388704941e-06,
"loss": 0.0919,
"step": 73200
},
{
"epoch": 6.46,
"learning_rate": 7.540640140673813e-06,
"loss": 0.0822,
"step": 73400
},
{
"epoch": 6.47,
"learning_rate": 7.5032268926426856e-06,
"loss": 0.0837,
"step": 73600
},
{
"epoch": 6.49,
"learning_rate": 7.465813644611558e-06,
"loss": 0.0879,
"step": 73800
},
{
"epoch": 6.51,
"learning_rate": 7.428400396580429e-06,
"loss": 0.0927,
"step": 74000
},
{
"epoch": 6.53,
"learning_rate": 7.390987148549302e-06,
"loss": 0.0929,
"step": 74200
},
{
"epoch": 6.54,
"learning_rate": 7.353573900518174e-06,
"loss": 0.0871,
"step": 74400
},
{
"epoch": 6.56,
"learning_rate": 7.316160652487047e-06,
"loss": 0.0886,
"step": 74600
},
{
"epoch": 6.58,
"learning_rate": 7.278747404455918e-06,
"loss": 0.0887,
"step": 74800
},
{
"epoch": 6.6,
"learning_rate": 7.241334156424791e-06,
"loss": 0.0924,
"step": 75000
},
{
"epoch": 6.61,
"learning_rate": 7.203920908393663e-06,
"loss": 0.0971,
"step": 75200
},
{
"epoch": 6.63,
"learning_rate": 7.166507660362535e-06,
"loss": 0.0922,
"step": 75400
},
{
"epoch": 6.65,
"learning_rate": 7.129094412331407e-06,
"loss": 0.0866,
"step": 75600
},
{
"epoch": 6.67,
"learning_rate": 7.091681164300279e-06,
"loss": 0.0822,
"step": 75800
},
{
"epoch": 6.68,
"learning_rate": 7.054267916269152e-06,
"loss": 0.102,
"step": 76000
},
{
"epoch": 6.7,
"learning_rate": 7.016854668238023e-06,
"loss": 0.091,
"step": 76200
},
{
"epoch": 6.72,
"learning_rate": 6.979441420206896e-06,
"loss": 0.0937,
"step": 76400
},
{
"epoch": 6.74,
"learning_rate": 6.942028172175768e-06,
"loss": 0.0795,
"step": 76600
},
{
"epoch": 6.75,
"learning_rate": 6.9046149241446405e-06,
"loss": 0.0917,
"step": 76800
},
{
"epoch": 6.77,
"learning_rate": 6.8672016761135115e-06,
"loss": 0.0987,
"step": 77000
},
{
"epoch": 6.79,
"learning_rate": 6.829788428082384e-06,
"loss": 0.0946,
"step": 77200
},
{
"epoch": 6.81,
"learning_rate": 6.792375180051257e-06,
"loss": 0.0915,
"step": 77400
},
{
"epoch": 6.82,
"learning_rate": 6.754961932020129e-06,
"loss": 0.0889,
"step": 77600
},
{
"epoch": 6.84,
"learning_rate": 6.717548683989002e-06,
"loss": 0.0884,
"step": 77800
},
{
"epoch": 6.86,
"learning_rate": 6.680135435957873e-06,
"loss": 0.0854,
"step": 78000
},
{
"epoch": 6.88,
"learning_rate": 6.642722187926746e-06,
"loss": 0.0847,
"step": 78200
},
{
"epoch": 6.89,
"learning_rate": 6.6053089398956175e-06,
"loss": 0.0907,
"step": 78400
},
{
"epoch": 6.91,
"learning_rate": 6.5678956918644894e-06,
"loss": 0.0955,
"step": 78600
},
{
"epoch": 6.93,
"learning_rate": 6.530482443833361e-06,
"loss": 0.095,
"step": 78800
},
{
"epoch": 6.95,
"learning_rate": 6.493069195802234e-06,
"loss": 0.0948,
"step": 79000
},
{
"epoch": 6.97,
"learning_rate": 6.455655947771107e-06,
"loss": 0.0774,
"step": 79200
},
{
"epoch": 6.98,
"learning_rate": 6.418242699739978e-06,
"loss": 0.0973,
"step": 79400
},
{
"epoch": 7.0,
"learning_rate": 6.380829451708851e-06,
"loss": 0.0976,
"step": 79600
},
{
"epoch": 7.02,
"learning_rate": 6.343416203677723e-06,
"loss": 0.0674,
"step": 79800
},
{
"epoch": 7.04,
"learning_rate": 6.306002955646595e-06,
"loss": 0.0735,
"step": 80000
},
{
"epoch": 7.05,
"learning_rate": 6.2685897076154665e-06,
"loss": 0.0777,
"step": 80200
},
{
"epoch": 7.07,
"learning_rate": 6.231176459584339e-06,
"loss": 0.0688,
"step": 80400
},
{
"epoch": 7.09,
"learning_rate": 6.193763211553212e-06,
"loss": 0.0721,
"step": 80600
},
{
"epoch": 7.11,
"learning_rate": 6.156349963522084e-06,
"loss": 0.0787,
"step": 80800
},
{
"epoch": 7.12,
"learning_rate": 6.118936715490956e-06,
"loss": 0.0755,
"step": 81000
},
{
"epoch": 7.14,
"learning_rate": 6.081523467459828e-06,
"loss": 0.072,
"step": 81200
},
{
"epoch": 7.16,
"learning_rate": 6.0441102194287005e-06,
"loss": 0.0695,
"step": 81400
},
{
"epoch": 7.18,
"learning_rate": 6.0066969713975724e-06,
"loss": 0.0713,
"step": 81600
},
{
"epoch": 7.19,
"learning_rate": 5.969283723366444e-06,
"loss": 0.0771,
"step": 81800
},
{
"epoch": 7.21,
"learning_rate": 5.931870475335316e-06,
"loss": 0.0695,
"step": 82000
},
{
"epoch": 7.23,
"learning_rate": 5.894457227304189e-06,
"loss": 0.0676,
"step": 82200
},
{
"epoch": 7.25,
"learning_rate": 5.857043979273062e-06,
"loss": 0.0766,
"step": 82400
},
{
"epoch": 7.26,
"learning_rate": 5.819630731241933e-06,
"loss": 0.0766,
"step": 82600
},
{
"epoch": 7.28,
"learning_rate": 5.782217483210806e-06,
"loss": 0.0807,
"step": 82800
},
{
"epoch": 7.3,
"learning_rate": 5.7448042351796775e-06,
"loss": 0.0834,
"step": 83000
},
{
"epoch": 7.32,
"learning_rate": 5.70739098714855e-06,
"loss": 0.0789,
"step": 83200
},
{
"epoch": 7.33,
"learning_rate": 5.669977739117421e-06,
"loss": 0.0691,
"step": 83400
},
{
"epoch": 7.35,
"learning_rate": 5.632564491086294e-06,
"loss": 0.0777,
"step": 83600
},
{
"epoch": 7.37,
"learning_rate": 5.595151243055166e-06,
"loss": 0.0725,
"step": 83800
},
{
"epoch": 7.39,
"learning_rate": 5.557737995024039e-06,
"loss": 0.0788,
"step": 84000
},
{
"epoch": 7.4,
"learning_rate": 5.520324746992911e-06,
"loss": 0.075,
"step": 84200
},
{
"epoch": 7.42,
"learning_rate": 5.482911498961783e-06,
"loss": 0.0742,
"step": 84400
},
{
"epoch": 7.44,
"learning_rate": 5.4454982509306554e-06,
"loss": 0.0666,
"step": 84600
},
{
"epoch": 7.46,
"learning_rate": 5.4080850028995265e-06,
"loss": 0.0688,
"step": 84800
},
{
"epoch": 7.48,
"learning_rate": 5.370671754868399e-06,
"loss": 0.0747,
"step": 85000
},
{
"epoch": 7.49,
"learning_rate": 5.333258506837271e-06,
"loss": 0.0741,
"step": 85200
},
{
"epoch": 7.51,
"learning_rate": 5.295845258806144e-06,
"loss": 0.0657,
"step": 85400
},
{
"epoch": 7.53,
"learning_rate": 5.258432010775017e-06,
"loss": 0.0788,
"step": 85600
},
{
"epoch": 7.55,
"learning_rate": 5.221018762743888e-06,
"loss": 0.0791,
"step": 85800
},
{
"epoch": 7.56,
"learning_rate": 5.1836055147127605e-06,
"loss": 0.0752,
"step": 86000
},
{
"epoch": 7.58,
"learning_rate": 5.1461922666816325e-06,
"loss": 0.0762,
"step": 86200
},
{
"epoch": 7.6,
"learning_rate": 5.108779018650505e-06,
"loss": 0.0771,
"step": 86400
},
{
"epoch": 7.62,
"learning_rate": 5.071365770619376e-06,
"loss": 0.0727,
"step": 86600
},
{
"epoch": 7.63,
"learning_rate": 5.033952522588249e-06,
"loss": 0.0843,
"step": 86800
},
{
"epoch": 7.65,
"learning_rate": 4.996539274557121e-06,
"loss": 0.0741,
"step": 87000
},
{
"epoch": 7.67,
"learning_rate": 4.959126026525993e-06,
"loss": 0.0721,
"step": 87200
},
{
"epoch": 7.69,
"learning_rate": 4.921712778494866e-06,
"loss": 0.0814,
"step": 87400
},
{
"epoch": 7.7,
"learning_rate": 4.884299530463738e-06,
"loss": 0.0703,
"step": 87600
},
{
"epoch": 7.72,
"learning_rate": 4.8468862824326095e-06,
"loss": 0.0745,
"step": 87800
},
{
"epoch": 7.74,
"learning_rate": 4.809473034401481e-06,
"loss": 0.0788,
"step": 88000
},
{
"epoch": 7.76,
"learning_rate": 4.772059786370354e-06,
"loss": 0.0721,
"step": 88200
},
{
"epoch": 7.77,
"learning_rate": 4.734646538339226e-06,
"loss": 0.0689,
"step": 88400
},
{
"epoch": 7.79,
"learning_rate": 4.697233290308099e-06,
"loss": 0.0651,
"step": 88600
},
{
"epoch": 7.81,
"learning_rate": 4.659820042276971e-06,
"loss": 0.0775,
"step": 88800
},
{
"epoch": 7.83,
"learning_rate": 4.622406794245843e-06,
"loss": 0.069,
"step": 89000
},
{
"epoch": 7.84,
"learning_rate": 4.584993546214715e-06,
"loss": 0.0807,
"step": 89200
},
{
"epoch": 7.86,
"learning_rate": 4.547580298183587e-06,
"loss": 0.0817,
"step": 89400
},
{
"epoch": 7.88,
"learning_rate": 4.510167050152459e-06,
"loss": 0.0771,
"step": 89600
},
{
"epoch": 7.9,
"learning_rate": 4.472753802121331e-06,
"loss": 0.0683,
"step": 89800
},
{
"epoch": 7.91,
"learning_rate": 4.435340554090204e-06,
"loss": 0.0704,
"step": 90000
},
{
"epoch": 7.93,
"learning_rate": 4.397927306059076e-06,
"loss": 0.0852,
"step": 90200
},
{
"epoch": 7.95,
"learning_rate": 4.360514058027948e-06,
"loss": 0.0773,
"step": 90400
},
{
"epoch": 7.97,
"learning_rate": 4.323100809996821e-06,
"loss": 0.0694,
"step": 90600
},
{
"epoch": 7.99,
"learning_rate": 4.2856875619656925e-06,
"loss": 0.0771,
"step": 90800
},
{
"epoch": 8.0,
"learning_rate": 4.2482743139345644e-06,
"loss": 0.0702,
"step": 91000
},
{
"epoch": 8.02,
"learning_rate": 4.210861065903436e-06,
"loss": 0.0533,
"step": 91200
},
{
"epoch": 8.04,
"learning_rate": 4.173447817872308e-06,
"loss": 0.0666,
"step": 91400
},
{
"epoch": 8.06,
"learning_rate": 4.136034569841181e-06,
"loss": 0.0566,
"step": 91600
},
{
"epoch": 8.07,
"learning_rate": 4.098621321810054e-06,
"loss": 0.0611,
"step": 91800
},
{
"epoch": 8.09,
"learning_rate": 4.061208073778926e-06,
"loss": 0.0589,
"step": 92000
},
{
"epoch": 8.11,
"learning_rate": 4.023794825747798e-06,
"loss": 0.0602,
"step": 92200
},
{
"epoch": 8.13,
"learning_rate": 3.9863815777166695e-06,
"loss": 0.0615,
"step": 92400
},
{
"epoch": 8.14,
"learning_rate": 3.948968329685542e-06,
"loss": 0.0682,
"step": 92600
},
{
"epoch": 8.16,
"learning_rate": 3.911555081654414e-06,
"loss": 0.0678,
"step": 92800
},
{
"epoch": 8.18,
"learning_rate": 3.874141833623286e-06,
"loss": 0.0586,
"step": 93000
},
{
"epoch": 8.2,
"learning_rate": 3.836728585592159e-06,
"loss": 0.0641,
"step": 93200
},
{
"epoch": 8.21,
"learning_rate": 3.799315337561031e-06,
"loss": 0.0586,
"step": 93400
},
{
"epoch": 8.23,
"learning_rate": 3.761902089529903e-06,
"loss": 0.0684,
"step": 93600
},
{
"epoch": 8.25,
"learning_rate": 3.724488841498775e-06,
"loss": 0.0594,
"step": 93800
},
{
"epoch": 8.27,
"learning_rate": 3.687075593467647e-06,
"loss": 0.061,
"step": 94000
},
{
"epoch": 8.28,
"learning_rate": 3.6496623454365193e-06,
"loss": 0.0587,
"step": 94200
},
{
"epoch": 8.3,
"learning_rate": 3.6122490974053913e-06,
"loss": 0.0675,
"step": 94400
},
{
"epoch": 8.32,
"learning_rate": 3.5748358493742636e-06,
"loss": 0.0621,
"step": 94600
},
{
"epoch": 8.34,
"learning_rate": 3.5374226013431355e-06,
"loss": 0.0556,
"step": 94800
},
{
"epoch": 8.35,
"learning_rate": 3.5000093533120083e-06,
"loss": 0.0728,
"step": 95000
},
{
"epoch": 8.37,
"learning_rate": 3.46259610528088e-06,
"loss": 0.0554,
"step": 95200
},
{
"epoch": 8.39,
"learning_rate": 3.4251828572497525e-06,
"loss": 0.0634,
"step": 95400
},
{
"epoch": 8.41,
"learning_rate": 3.3877696092186245e-06,
"loss": 0.063,
"step": 95600
},
{
"epoch": 8.42,
"learning_rate": 3.350356361187497e-06,
"loss": 0.0613,
"step": 95800
},
{
"epoch": 8.44,
"learning_rate": 3.3129431131563687e-06,
"loss": 0.0755,
"step": 96000
},
{
"epoch": 8.46,
"learning_rate": 3.275529865125241e-06,
"loss": 0.0616,
"step": 96200
},
{
"epoch": 8.48,
"learning_rate": 3.238116617094113e-06,
"loss": 0.0513,
"step": 96400
},
{
"epoch": 8.5,
"learning_rate": 3.2007033690629857e-06,
"loss": 0.0609,
"step": 96600
},
{
"epoch": 8.51,
"learning_rate": 3.1632901210318577e-06,
"loss": 0.0625,
"step": 96800
},
{
"epoch": 8.53,
"learning_rate": 3.12587687300073e-06,
"loss": 0.057,
"step": 97000
},
{
"epoch": 8.55,
"learning_rate": 3.088463624969602e-06,
"loss": 0.0655,
"step": 97200
},
{
"epoch": 8.57,
"learning_rate": 3.0510503769384743e-06,
"loss": 0.0543,
"step": 97400
},
{
"epoch": 8.58,
"learning_rate": 3.013637128907346e-06,
"loss": 0.077,
"step": 97600
},
{
"epoch": 8.6,
"learning_rate": 2.9762238808762185e-06,
"loss": 0.0487,
"step": 97800
},
{
"epoch": 8.62,
"learning_rate": 2.9388106328450904e-06,
"loss": 0.0655,
"step": 98000
},
{
"epoch": 8.64,
"learning_rate": 2.9013973848139628e-06,
"loss": 0.0655,
"step": 98200
},
{
"epoch": 8.65,
"learning_rate": 2.863984136782835e-06,
"loss": 0.0596,
"step": 98400
},
{
"epoch": 8.67,
"learning_rate": 2.8265708887517075e-06,
"loss": 0.0594,
"step": 98600
},
{
"epoch": 8.69,
"learning_rate": 2.7891576407205794e-06,
"loss": 0.0737,
"step": 98800
},
{
"epoch": 8.71,
"learning_rate": 2.7517443926894517e-06,
"loss": 0.0616,
"step": 99000
},
{
"epoch": 8.72,
"learning_rate": 2.7143311446583236e-06,
"loss": 0.0531,
"step": 99200
},
{
"epoch": 8.74,
"learning_rate": 2.676917896627196e-06,
"loss": 0.0717,
"step": 99400
},
{
"epoch": 8.76,
"learning_rate": 2.639504648596068e-06,
"loss": 0.0643,
"step": 99600
},
{
"epoch": 8.78,
"learning_rate": 2.60209140056494e-06,
"loss": 0.0515,
"step": 99800
},
{
"epoch": 8.79,
"learning_rate": 2.5646781525338126e-06,
"loss": 0.0612,
"step": 100000
},
{
"epoch": 8.81,
"learning_rate": 2.527264904502685e-06,
"loss": 0.0562,
"step": 100200
},
{
"epoch": 8.83,
"learning_rate": 2.489851656471557e-06,
"loss": 0.0605,
"step": 100400
},
{
"epoch": 8.85,
"learning_rate": 2.452438408440429e-06,
"loss": 0.0621,
"step": 100600
},
{
"epoch": 8.86,
"learning_rate": 2.415025160409301e-06,
"loss": 0.0639,
"step": 100800
},
{
"epoch": 8.88,
"learning_rate": 2.3776119123781734e-06,
"loss": 0.0565,
"step": 101000
},
{
"epoch": 8.9,
"learning_rate": 2.3401986643470454e-06,
"loss": 0.0609,
"step": 101200
},
{
"epoch": 8.92,
"learning_rate": 2.3027854163159177e-06,
"loss": 0.0613,
"step": 101400
},
{
"epoch": 8.94,
"learning_rate": 2.26537216828479e-06,
"loss": 0.0673,
"step": 101600
},
{
"epoch": 8.95,
"learning_rate": 2.227958920253662e-06,
"loss": 0.0598,
"step": 101800
},
{
"epoch": 8.97,
"learning_rate": 2.190545672222534e-06,
"loss": 0.0649,
"step": 102000
},
{
"epoch": 8.99,
"learning_rate": 2.1531324241914066e-06,
"loss": 0.0615,
"step": 102200
},
{
"epoch": 9.01,
"learning_rate": 2.1157191761602786e-06,
"loss": 0.0538,
"step": 102400
},
{
"epoch": 9.02,
"learning_rate": 2.0783059281291505e-06,
"loss": 0.0462,
"step": 102600
},
{
"epoch": 9.04,
"learning_rate": 2.040892680098023e-06,
"loss": 0.0542,
"step": 102800
},
{
"epoch": 9.06,
"learning_rate": 2.003479432066895e-06,
"loss": 0.0529,
"step": 103000
},
{
"epoch": 9.08,
"learning_rate": 1.966066184035767e-06,
"loss": 0.0585,
"step": 103200
},
{
"epoch": 9.09,
"learning_rate": 1.9286529360046394e-06,
"loss": 0.0462,
"step": 103400
},
{
"epoch": 9.11,
"learning_rate": 1.8912396879735116e-06,
"loss": 0.0496,
"step": 103600
},
{
"epoch": 9.13,
"learning_rate": 1.8538264399423839e-06,
"loss": 0.0477,
"step": 103800
},
{
"epoch": 9.15,
"learning_rate": 1.816413191911256e-06,
"loss": 0.048,
"step": 104000
},
{
"epoch": 9.16,
"learning_rate": 1.7789999438801282e-06,
"loss": 0.0591,
"step": 104200
},
{
"epoch": 9.18,
"learning_rate": 1.7415866958490003e-06,
"loss": 0.0445,
"step": 104400
},
{
"epoch": 9.2,
"learning_rate": 1.7041734478178726e-06,
"loss": 0.0508,
"step": 104600
},
{
"epoch": 9.22,
"learning_rate": 1.6667601997867448e-06,
"loss": 0.0534,
"step": 104800
},
{
"epoch": 9.23,
"learning_rate": 1.6293469517556169e-06,
"loss": 0.046,
"step": 105000
},
{
"epoch": 9.25,
"learning_rate": 1.591933703724489e-06,
"loss": 0.0454,
"step": 105200
},
{
"epoch": 9.27,
"learning_rate": 1.554520455693361e-06,
"loss": 0.0601,
"step": 105400
},
{
"epoch": 9.29,
"learning_rate": 1.5171072076622335e-06,
"loss": 0.0543,
"step": 105600
},
{
"epoch": 9.3,
"learning_rate": 1.4796939596311056e-06,
"loss": 0.0587,
"step": 105800
},
{
"epoch": 9.32,
"learning_rate": 1.4422807115999775e-06,
"loss": 0.0526,
"step": 106000
},
{
"epoch": 9.34,
"learning_rate": 1.4048674635688497e-06,
"loss": 0.065,
"step": 106200
},
{
"epoch": 9.36,
"learning_rate": 1.3674542155377222e-06,
"loss": 0.0531,
"step": 106400
},
{
"epoch": 9.37,
"learning_rate": 1.3300409675065941e-06,
"loss": 0.0607,
"step": 106600
},
{
"epoch": 9.39,
"learning_rate": 1.2926277194754663e-06,
"loss": 0.0425,
"step": 106800
},
{
"epoch": 9.41,
"learning_rate": 1.2552144714443384e-06,
"loss": 0.0541,
"step": 107000
},
{
"epoch": 9.43,
"learning_rate": 1.2178012234132107e-06,
"loss": 0.0592,
"step": 107200
},
{
"epoch": 9.45,
"learning_rate": 1.1803879753820829e-06,
"loss": 0.0494,
"step": 107400
},
{
"epoch": 9.46,
"learning_rate": 1.142974727350955e-06,
"loss": 0.0548,
"step": 107600
},
{
"epoch": 9.48,
"learning_rate": 1.1055614793198273e-06,
"loss": 0.0439,
"step": 107800
},
{
"epoch": 9.5,
"learning_rate": 1.0681482312886995e-06,
"loss": 0.0543,
"step": 108000
},
{
"epoch": 9.52,
"learning_rate": 1.0307349832575716e-06,
"loss": 0.0604,
"step": 108200
},
{
"epoch": 9.53,
"learning_rate": 9.933217352264437e-07,
"loss": 0.0546,
"step": 108400
},
{
"epoch": 9.55,
"learning_rate": 9.559084871953159e-07,
"loss": 0.0576,
"step": 108600
},
{
"epoch": 9.57,
"learning_rate": 9.184952391641881e-07,
"loss": 0.0441,
"step": 108800
},
{
"epoch": 9.59,
"learning_rate": 8.810819911330602e-07,
"loss": 0.05,
"step": 109000
},
{
"epoch": 9.6,
"learning_rate": 8.436687431019325e-07,
"loss": 0.0523,
"step": 109200
},
{
"epoch": 9.62,
"learning_rate": 8.062554950708046e-07,
"loss": 0.053,
"step": 109400
},
{
"epoch": 9.64,
"learning_rate": 7.688422470396768e-07,
"loss": 0.0456,
"step": 109600
},
{
"epoch": 9.66,
"learning_rate": 7.314289990085489e-07,
"loss": 0.0508,
"step": 109800
},
{
"epoch": 9.67,
"learning_rate": 6.940157509774212e-07,
"loss": 0.0415,
"step": 110000
}
],
"max_steps": 113710,
"num_train_epochs": 10,
"total_flos": 2.6986684097812992e+17,
"trial_name": null,
"trial_params": null
}