Spaces:
Build error
Build error
| # @package _group_ | |
| common: | |
| fp16: true | |
| log_format: json | |
| log_interval: 200 | |
| seed: 1337 | |
| tensorboard_logdir: tblog | |
| checkpoint: | |
| save_dir: ??? | |
| save_interval: 4 | |
| keep_last_epochs: 4 | |
| save_interval_updates: 20000 | |
| keep_interval_updates: -1 | |
| keep_interval_updates_pattern: 50000 | |
| # no_epoch_checkpoints: true | |
| distributed_training: | |
| ddp_backend: no_c10d | |
| distributed_backend: 'nccl' | |
| distributed_world_size: 8 | |
| nprocs_per_node: 8 | |
| find_unused_parameters: true | |
| task: | |
| _name: denoising | |
| data: ??? | |
| mask: 0.15 | |
| dataset: | |
| num_workers: 6 | |
| max_tokens: 1400000 | |
| skip_invalid_size_inputs_valid_test: true | |
| validate_interval: ${checkpoint.save_interval} | |
| validate_interval_updates: ${checkpoint.save_interval_updates} | |
| required_batch_size_multiple: 1 | |
| criterion: | |
| _name: sc2t | |
| pred_masked_weight: 1.0 | |
| pred_nomask_weight: 0.0 | |
| loss_weights: [10,] | |
| label_smoothing: 0.1 | |
| text_weight: 0.1 | |
| optimization: | |
| max_update: 400000 | |
| lr: [0.0005] | |
| clip_norm: 10.0 | |
| optimizer: | |
| _name: adam | |
| adam_betas: (0.9,0.98) | |
| adam_eps: 1e-06 | |
| weight_decay: 0.01 | |
| lr_scheduler: | |
| _name: polynomial_decay | |
| warmup_updates: 32000 | |
| model: | |
| _name: stbert | |
| label_rate: ??? | |
| skip_masked: false | |
| skip_nomask: false | |
| mask_prob: 0.80 | |
| extractor_mode: default | |
| conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' | |
| final_dim: 256 | |
| encoder_layers: 6 | |
| encoder_attention_heads: 8 | |
| decoder_layerdrop: 0.05 | |
| dropout_input: 0.1 | |
| dropout_features: 0.1 | |
| dropout: 0.1 | |
| attention_dropout: 0.1 | |
| feature_grad_mult: 0.1 | |
| untie_final_proj: true | |
| activation_dropout: 0.0 | |
| use_rel_pos_enc: true | |
| add_code_encoder: true | |
| add_adaptor: false | |
| text_transformer: | |
| activation_fn: ${model.activation_fn} | |
| dropout: ${model.dropout} | |
| attention_dropout: ${model.attention_dropout} | |
| activation_dropout: ${model.activation_dropout} | |
| adaptive_input: ${model.adaptive_input} | |
| max_source_positions: 3000 | |
| checkpoint_activations: ${model.checkpoint_activations} | |
| no_scale_embedding: false | |
| layernorm_embedding: false | |
| quant_noise: | |
| pq: ${model.quant_noise_pq} | |
| encoder: | |
| embed_dim: 768 | |
| ffn_embed_dim: 3072 | |
| layers: 6 | |
| attention_heads: 8 | |
| normalize_before: false | |
| learned_pos: true | |
| layerdrop: ${model.encoder_layerdrop} | |
| hydra: | |
| job: | |
| config: | |
| override_dirname: | |
| kv_sep: '-' | |
| item_sep: '__' | |
| exclude_keys: | |
| - run | |
| - task.data | |
| - task.label_dir | |
| run: | |
| dir: ??? | |
| sweep: | |
| dir: ??? | |
| subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} | |