Spaces:
Running
Running
| # Model mappings | |
| MODEL_MAP = { | |
| "llama-2": "meta-llama/Llama-2-7b-hf", | |
| "llama-3": "meta-llama/Llama-3.2-1B", | |
| "gemma-2": "google/gemma-2-2b", | |
| "qwen3": "Qwen/Qwen3-0.6B", | |
| "qwen2.5": "Qwen/Qwen2.5-0.5B", | |
| "bert": "bert-base-uncased", | |
| "bloom": "bigscience/bloom-560m", | |
| "aya-expanse": "CohereForAI/aya-expanse-8b", | |
| "comma": "common-pile/comma-v0.1-2t", | |
| "tokenmonster": "alasdairforsythe/tokenmonster", | |
| "byt5": "google/byt5-small", | |
| "phi-3": "microsoft/Phi-3-mini-4k-instruct", | |
| "xglm": "facebook/xglm-564M", | |
| "tekken": "mistralai/tekken", | |
| "mbert": "google-bert/bert-base-multilingual-cased" , | |
| } | |
| # "microsoft/Phi-3-mini-4k-instruct" "mistralai/tekken" "facebook/xglm-564M" "google-bert/bert-base-multilingual-cased" | |
| TOKENIZER_INFO = { | |
| "gpt-4": {"name": "GPT-4", "vocab_size": 100277, "encoding": "BPE"}, | |
| "gpt-4o": {"name": "GPT-4o", "vocab_size": 199997, "encoding": "BPE"}, | |
| "gpt-2": {"name": "GPT-2", "vocab_size": 50257, "encoding": "BPE"}, | |
| "llama-2": {"name": "LLaMA-2", "vocab_size": 32000, "encoding": "SentencePiece"}, | |
| "llama-3": {"name": "LLaMA-3", "vocab_size": 128000, "encoding": "SentencePiece"}, | |
| "gemma-2": {"name": "Gemma-2", "vocab_size": 256000, "encoding": "SentencePiece"}, | |
| "qwen3": {"name": "Qwen3", "vocab_size": 151936, "encoding": "BPE"}, | |
| "qwen2.5": {"name": "Qwen2.5", "vocab_size": 151936, "encoding": "BPE"}, | |
| "bert": {"name": "BERT", "vocab_size": 30522, "encoding": "WordPiece"}, | |
| "bloom": {"name": "BLOOM", "vocab_size": 250680, "encoding": "BPE"}, | |
| "aya-expanse": { | |
| "name": "Aya Expanse", | |
| "vocab_size": 256000, | |
| "encoding": "SentencePiece", | |
| }, | |
| "comma": {"name": "Comma AI", "vocab_size": 50257, "encoding": ""}, | |
| "byte-level": {"name": "Byte-Level BPE", "vocab_size": 50000, "encoding": "BPE"}, | |
| "tokenmonster": {"name": "TokenMonster", "vocab_size": 32000, "encoding": ""}, | |
| "byt5": {"name": "Byt5", "vocab_size": 50000, "encoding": "BPE"}, | |
| "phi-3": {"name": "Phi-3", "vocab_size": 32064, "encoding": "BPE"}, | |
| "xglm": {"name": "XGLM", "vocab_size": 256008, "encoding": "BPE"}, | |
| "tekken": {"name": "Tekken", "vocab_size": 32768, "encoding": "BPE"}, | |
| "mbert": {"name": "mBERT", "vocab_size": 119547, "encoding": "WordPiece"} | |
| } | |