| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9865470852017937, | |
| "eval_steps": 500, | |
| "global_step": 55, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017937219730941704, | |
| "grad_norm": 0.6536183953285217, | |
| "learning_rate": 4.995922759815339e-05, | |
| "loss": 0.8371, | |
| "num_input_tokens_seen": 2097152, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03587443946188341, | |
| "grad_norm": 0.517680823802948, | |
| "learning_rate": 4.9837043383713753e-05, | |
| "loss": 0.7804, | |
| "num_input_tokens_seen": 4194304, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.053811659192825115, | |
| "grad_norm": 0.4423481225967407, | |
| "learning_rate": 4.963384589619233e-05, | |
| "loss": 0.7695, | |
| "num_input_tokens_seen": 6291456, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.07174887892376682, | |
| "grad_norm": 0.39828750491142273, | |
| "learning_rate": 4.935029792355834e-05, | |
| "loss": 0.7419, | |
| "num_input_tokens_seen": 8388608, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.08968609865470852, | |
| "grad_norm": 0.31201115250587463, | |
| "learning_rate": 4.898732434036244e-05, | |
| "loss": 0.7166, | |
| "num_input_tokens_seen": 10485760, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.10762331838565023, | |
| "grad_norm": 0.2536958158016205, | |
| "learning_rate": 4.854610909098812e-05, | |
| "loss": 0.7194, | |
| "num_input_tokens_seen": 12582912, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12556053811659193, | |
| "grad_norm": 0.2193588763475418, | |
| "learning_rate": 4.802809132787125e-05, | |
| "loss": 0.6975, | |
| "num_input_tokens_seen": 14680064, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.14349775784753363, | |
| "grad_norm": 0.18916621804237366, | |
| "learning_rate": 4.743496071728396e-05, | |
| "loss": 0.7168, | |
| "num_input_tokens_seen": 16777216, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.16143497757847533, | |
| "grad_norm": 0.1561172604560852, | |
| "learning_rate": 4.6768651927994434e-05, | |
| "loss": 0.6707, | |
| "num_input_tokens_seen": 18874368, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.17937219730941703, | |
| "grad_norm": 0.12857139110565186, | |
| "learning_rate": 4.6031338320779534e-05, | |
| "loss": 0.6769, | |
| "num_input_tokens_seen": 20971520, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19730941704035873, | |
| "grad_norm": 0.11340289562940598, | |
| "learning_rate": 4.522542485937369e-05, | |
| "loss": 0.6873, | |
| "num_input_tokens_seen": 23068672, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.21524663677130046, | |
| "grad_norm": 0.10658581554889679, | |
| "learning_rate": 4.4353540265977064e-05, | |
| "loss": 0.6643, | |
| "num_input_tokens_seen": 25165824, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.23318385650224216, | |
| "grad_norm": 0.08937722444534302, | |
| "learning_rate": 4.341852844691012e-05, | |
| "loss": 0.6849, | |
| "num_input_tokens_seen": 27262976, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.25112107623318386, | |
| "grad_norm": 0.07756289094686508, | |
| "learning_rate": 4.242343921638234e-05, | |
| "loss": 0.6461, | |
| "num_input_tokens_seen": 29360128, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.26905829596412556, | |
| "grad_norm": 0.07581546157598495, | |
| "learning_rate": 4.137151834863213e-05, | |
| "loss": 0.6623, | |
| "num_input_tokens_seen": 31457280, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.28699551569506726, | |
| "grad_norm": 0.07386067509651184, | |
| "learning_rate": 4.0266196990885955e-05, | |
| "loss": 0.6751, | |
| "num_input_tokens_seen": 33554432, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.30493273542600896, | |
| "grad_norm": 0.06293580681085587, | |
| "learning_rate": 3.911108047166924e-05, | |
| "loss": 0.6472, | |
| "num_input_tokens_seen": 35651584, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.32286995515695066, | |
| "grad_norm": 0.06199085712432861, | |
| "learning_rate": 3.790993654097405e-05, | |
| "loss": 0.6728, | |
| "num_input_tokens_seen": 37748736, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.34080717488789236, | |
| "grad_norm": 0.060734592378139496, | |
| "learning_rate": 3.6666683080641846e-05, | |
| "loss": 0.7017, | |
| "num_input_tokens_seen": 39845888, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.35874439461883406, | |
| "grad_norm": 0.05623164027929306, | |
| "learning_rate": 3.5385375325047166e-05, | |
| "loss": 0.6502, | |
| "num_input_tokens_seen": 41943040, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.37668161434977576, | |
| "grad_norm": 0.0574677549302578, | |
| "learning_rate": 3.4070192633766025e-05, | |
| "loss": 0.6476, | |
| "num_input_tokens_seen": 44040192, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.39461883408071746, | |
| "grad_norm": 0.05185185372829437, | |
| "learning_rate": 3.272542485937369e-05, | |
| "loss": 0.6411, | |
| "num_input_tokens_seen": 46137344, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.4125560538116592, | |
| "grad_norm": 0.05139186978340149, | |
| "learning_rate": 3.135545835483718e-05, | |
| "loss": 0.6428, | |
| "num_input_tokens_seen": 48234496, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.4304932735426009, | |
| "grad_norm": 0.050159115344285965, | |
| "learning_rate": 2.996476166614364e-05, | |
| "loss": 0.6661, | |
| "num_input_tokens_seen": 50331648, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.4484304932735426, | |
| "grad_norm": 0.04851464927196503, | |
| "learning_rate": 2.8557870956832132e-05, | |
| "loss": 0.6378, | |
| "num_input_tokens_seen": 52428800, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4663677130044843, | |
| "grad_norm": 0.04896726831793785, | |
| "learning_rate": 2.7139375211970996e-05, | |
| "loss": 0.6532, | |
| "num_input_tokens_seen": 54525952, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.484304932735426, | |
| "grad_norm": 0.04698600620031357, | |
| "learning_rate": 2.5713901269842404e-05, | |
| "loss": 0.6403, | |
| "num_input_tokens_seen": 56623104, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.5022421524663677, | |
| "grad_norm": 0.048034097999334335, | |
| "learning_rate": 2.42860987301576e-05, | |
| "loss": 0.6248, | |
| "num_input_tokens_seen": 58720256, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.5201793721973094, | |
| "grad_norm": 0.044828303158283234, | |
| "learning_rate": 2.2860624788029013e-05, | |
| "loss": 0.6583, | |
| "num_input_tokens_seen": 60817408, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5381165919282511, | |
| "grad_norm": 0.04563640430569649, | |
| "learning_rate": 2.1442129043167874e-05, | |
| "loss": 0.6579, | |
| "num_input_tokens_seen": 62914560, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5560538116591929, | |
| "grad_norm": 0.044318560510873795, | |
| "learning_rate": 2.003523833385637e-05, | |
| "loss": 0.6659, | |
| "num_input_tokens_seen": 65011712, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.5739910313901345, | |
| "grad_norm": 0.04331167787313461, | |
| "learning_rate": 1.8644541645162834e-05, | |
| "loss": 0.6423, | |
| "num_input_tokens_seen": 67108864, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.5919282511210763, | |
| "grad_norm": 0.04475367069244385, | |
| "learning_rate": 1.7274575140626318e-05, | |
| "loss": 0.6509, | |
| "num_input_tokens_seen": 69206016, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.6098654708520179, | |
| "grad_norm": 0.045547887682914734, | |
| "learning_rate": 1.5929807366233977e-05, | |
| "loss": 0.6551, | |
| "num_input_tokens_seen": 71303168, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.6278026905829597, | |
| "grad_norm": 0.043985530734062195, | |
| "learning_rate": 1.4614624674952842e-05, | |
| "loss": 0.6232, | |
| "num_input_tokens_seen": 73400320, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6457399103139013, | |
| "grad_norm": 0.0414094403386116, | |
| "learning_rate": 1.3333316919358157e-05, | |
| "loss": 0.6137, | |
| "num_input_tokens_seen": 75497472, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.6636771300448431, | |
| "grad_norm": 0.041019294410943985, | |
| "learning_rate": 1.2090063459025955e-05, | |
| "loss": 0.6426, | |
| "num_input_tokens_seen": 77594624, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.6816143497757847, | |
| "grad_norm": 0.04383592680096626, | |
| "learning_rate": 1.0888919528330777e-05, | |
| "loss": 0.6512, | |
| "num_input_tokens_seen": 79691776, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.6995515695067265, | |
| "grad_norm": 0.040539514273405075, | |
| "learning_rate": 9.733803009114045e-06, | |
| "loss": 0.6269, | |
| "num_input_tokens_seen": 81788928, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7174887892376681, | |
| "grad_norm": 0.04238974675536156, | |
| "learning_rate": 8.628481651367876e-06, | |
| "loss": 0.6201, | |
| "num_input_tokens_seen": 83886080, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7354260089686099, | |
| "grad_norm": 0.04115669056773186, | |
| "learning_rate": 7.576560783617668e-06, | |
| "loss": 0.642, | |
| "num_input_tokens_seen": 85983232, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.7533632286995515, | |
| "grad_norm": 0.04178008437156677, | |
| "learning_rate": 6.5814715530898745e-06, | |
| "loss": 0.648, | |
| "num_input_tokens_seen": 88080384, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.7713004484304933, | |
| "grad_norm": 0.04329155012965202, | |
| "learning_rate": 5.646459734022938e-06, | |
| "loss": 0.6442, | |
| "num_input_tokens_seen": 90177536, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.7892376681614349, | |
| "grad_norm": 0.043740272521972656, | |
| "learning_rate": 4.7745751406263165e-06, | |
| "loss": 0.6488, | |
| "num_input_tokens_seen": 92274688, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.8071748878923767, | |
| "grad_norm": 0.04263562709093094, | |
| "learning_rate": 3.968661679220468e-06, | |
| "loss": 0.65, | |
| "num_input_tokens_seen": 94371840, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.8251121076233184, | |
| "grad_norm": 0.041693028062582016, | |
| "learning_rate": 3.2313480720055745e-06, | |
| "loss": 0.6584, | |
| "num_input_tokens_seen": 96468992, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.8430493273542601, | |
| "grad_norm": 0.04151754826307297, | |
| "learning_rate": 2.565039282716045e-06, | |
| "loss": 0.6392, | |
| "num_input_tokens_seen": 98566144, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.8609865470852018, | |
| "grad_norm": 0.04260968416929245, | |
| "learning_rate": 1.97190867212875e-06, | |
| "loss": 0.6524, | |
| "num_input_tokens_seen": 100663296, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.8789237668161435, | |
| "grad_norm": 0.04022514820098877, | |
| "learning_rate": 1.4538909090118846e-06, | |
| "loss": 0.6276, | |
| "num_input_tokens_seen": 102760448, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.8968609865470852, | |
| "grad_norm": 0.039072513580322266, | |
| "learning_rate": 1.0126756596375686e-06, | |
| "loss": 0.6282, | |
| "num_input_tokens_seen": 104857600, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9147982062780269, | |
| "grad_norm": 0.03952722251415253, | |
| "learning_rate": 6.497020764416633e-07, | |
| "loss": 0.6344, | |
| "num_input_tokens_seen": 106954752, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.9327354260089686, | |
| "grad_norm": 0.04045777767896652, | |
| "learning_rate": 3.6615410380767544e-07, | |
| "loss": 0.6464, | |
| "num_input_tokens_seen": 109051904, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.9506726457399103, | |
| "grad_norm": 0.03984501212835312, | |
| "learning_rate": 1.6295661628624447e-07, | |
| "loss": 0.6253, | |
| "num_input_tokens_seen": 111149056, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.968609865470852, | |
| "grad_norm": 0.040761884301900864, | |
| "learning_rate": 4.07724018466088e-08, | |
| "loss": 0.6375, | |
| "num_input_tokens_seen": 113246208, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.9865470852017937, | |
| "grad_norm": 0.04142209142446518, | |
| "learning_rate": 0.0, | |
| "loss": 0.6419, | |
| "num_input_tokens_seen": 115343360, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.9865470852017937, | |
| "num_input_tokens_seen": 115343360, | |
| "step": 55, | |
| "total_flos": 5.104238176512246e+18, | |
| "train_loss": 0.6637221011248502, | |
| "train_runtime": 9208.1472, | |
| "train_samples_per_second": 3.097, | |
| "train_steps_per_second": 0.006 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 55, | |
| "num_input_tokens_seen": 115343360, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.104238176512246e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |