| from transformers import PretrainedConfig | |
| class LIMEConfig(PretrainedConfig): | |
| model_type = "lime" | |
| def __init__( | |
| self, | |
| vocab_size=50000, | |
| d_model=1536, | |
| num_encoder_layers=0, | |
| num_decoder_layers=32, | |
| num_heads=24, | |
| dff=6144, | |
| dropout_rate=0.0, | |
| max_position_embeddings=512, | |
| pad_token_id=0, | |
| eos_token_id=1, | |
| use_encoder=False, | |
| use_flash=True, | |
| multiple_of=256, | |
| **kwargs | |
| ): | |
| super().__init__( | |
| pad_token_id=pad_token_id, | |
| eos_token_id=eos_token_id, | |
| **kwargs | |
| ) | |
| self.vocab_size = vocab_size | |
| self.d_model = d_model | |
| self.num_encoder_layers = num_encoder_layers | |
| self.num_decoder_layers = num_decoder_layers | |
| self.num_heads = num_heads | |
| self.dff = dff | |
| self.dropout_rate = dropout_rate | |
| self.max_position_embeddings = max_position_embeddings | |
| self.pad_token_id = pad_token_id | |
| self.eos_token_id = eos_token_id | |
| self.use_encoder = use_encoder | |
| self.use_flash = use_flash | |
| self.multiple_of = multiple_of | |
| # For Transformers library. | |
| self.is_decoder = True | |
| self.is_encoder_decoder = False | |
| self.tie_word_embeddings = True | |
| self.use_cache = False |