ValueError: GGUF model is not supported yet.
#1
by
Gia-Minh
- opened
Hello, I have downloaded the Qwen3 1.7B GGUF model via git clone and am running it locally. I have updated to the latest version of the transformers library but still encounter the following error:
ValueError: GGUF model with architecture qwen3 is not supported yet.
Here is my source code:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
_model = None
_tokenizer = None
def get_shared_model_and_tokenizer():
global _model, _tokenizer # Ensure global variables are used
if _model is None or _tokenizer is None:
# Check if GPU is available, otherwise use CPU
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
if DEVICE == "cuda":
print("Using GPU for the model.")
# Path to the model
MODEL_NAME = "Qwen3-4B-AWQ"
# Load tokenizer from the model
_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Load model with quantization and optimization
_model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if "AWQ" in MODEL_NAME else torch.bfloat16,
device_map="auto"
).eval()
# Optimize inference with torch.compile() (requires PyTorch 2.0+)
_model = torch.compile(_model)
elif DEVICE == 'cpu':
print("Using CPU for the model.")
# Path to the GGUF model
MODEL_NAME = "Qwen3-1.7B-GGUF"
# GGUF filename
GGUF_FILE = "Qwen3-1.7B-Q8_0.gguf"
# Load tokenizer from GGUF model
_tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
gguf_file=GGUF_FILE
)
# Load model from GGUF model
_model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
gguf_file=GGUF_FILE,
torch_dtype=torch.float32, # GGUF usually uses fp32 for dequantization
device_map="auto"
).eval() # Set model to inference mode
return _model, _tokenizer