| DATA: | |
| dataset: multi | |
| data_root: sample_dataset | |
| wav_path: wav | |
| vertices_path: npy | |
| template_file: templates.pkl | |
| train_subjects: Arabic English French German Greek Italian Portuguese Russian Spanish Korean Mandarin Japanese | |
| NETWORK: | |
| arch: stage2 | |
| in_dim: 15069 | |
| hidden_size: 1024 | |
| num_hidden_layers: 6 | |
| num_attention_heads: 8 | |
| intermediate_size: 1536 | |
| window_size: 1 | |
| quant_factor: 0 | |
| face_quan_num: 16 | |
| neg: 0.2 | |
| autoencoder: stage1_vocaset | |
| INaffine: False | |
| style_emb_method: nnemb # onehot or nnemb | |
| VQuantizer: | |
| n_embed: 256 | |
| zquant_dim: 64 | |
| PREDICTOR: | |
| feature_dim: 1024 | |
| vertice_dim: 15069 | |
| device: cuda | |
| period: 25 | |
| vqvae_pretrained_path: checkpoints/stage1.pth.tar | |
| wav2vec2model_path: facebook/wav2vec2-large-xlsr-53 | |
| teacher_forcing: True | |
| num_layers: 6 | |
| n_head: 4 # not used | |
| DEMO: | |
| model_path: checkpoints/stage2.pth.tar | |
| #condition: False #if false, the waveform file has the cue for the type of language | |
| condition: English | |
| subject: id | |
| demo_wav_dir_path: demo/input/ | |
| demo_output_path: demo/output/ | |
| fps: 25 | |
| background_black: True # chose the background color of your rendered video |