|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export OMP_NUM_THREADS=64 |
|
|
|
|
|
|
|
|
export NCCL_NVLS_ENABLE=1 |
|
|
export NCCL_IB_ADAPTIVE_ROUTING=1 |
|
|
export NCCL_IB_SL=1 |
|
|
export NCCL_IB_QPS_PER_CONNECTION=2 |
|
|
export NCCL_IB_SPLIT_DATA_ON_QPS=0 |
|
|
export NCCL_IB_HCA=mlx5_15,mlx5_10,mlx5_14,mlx5_13,mlx5_8,mlx5_7,mlx5_9,mlx5_4 |
|
|
export NCCL_SOCKET_IFNAME=bond0 |
|
|
export NCCL_ALGO=RING |
|
|
export UCX_TLS=rc |
|
|
|
|
|
python ./peptide/rectify_train.py \ |
|
|
--train_dataset_path ./peptide/ectified_datasets/v3/train \ |
|
|
--val_dataset_path ./peptide/rectified_datasets/v3/validation \ |
|
|
--version 3 \ |
|
|
--model_dim 512 \ |
|
|
--n_heads 8 \ |
|
|
--n_layers 6 \ |
|
|
--vocab_size 24 \ |
|
|
--seq_len 100 \ |
|
|
--epochs 50 \ |
|
|
--learning_rate 1e-4 \ |
|
|
--weight_decay 2e-5 \ |
|
|
--label_smoothing 0.0 \ |
|
|
--checkpoint_dir ./peptide/ckpt \ |
|
|
--tc_batches 20 \ |
|
|
--tc_k_samples 50 \ |
|
|
--resume_from_checkpoint ./peptide/ckpt/PepReDi_v2.pt |
|
|
|