Spaces:
Running
on
Zero
Running
on
Zero
Upload 15 files
Browse files- app.py +10 -4
- ovis_image/model/tokenizer.py +3 -2
app.py
CHANGED
|
@@ -57,15 +57,21 @@ autoencoder = load_ae(
|
|
| 57 |
autoencoder.eval()
|
| 58 |
|
| 59 |
print("init ovis")
|
| 60 |
-
ovis_path = hf_hub_download(
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
subfolder="Ovis2.5-2B",
|
| 63 |
token=hf_token,
|
| 64 |
)
|
| 65 |
-
ovis_tokenizer = build_ovis_tokenizer(ovis_path)
|
| 66 |
ovis_encoder = OvisEmbedder(
|
| 67 |
-
model_path=
|
| 68 |
random_init=False,
|
|
|
|
|
|
|
| 69 |
low_cpu_mem_usage=True,
|
| 70 |
torch_dtype=torch.bfloat16,
|
| 71 |
).to(device=device, dtype=_dtype)
|
|
|
|
| 57 |
autoencoder.eval()
|
| 58 |
|
| 59 |
print("init ovis")
|
| 60 |
+
# ovis_path = hf_hub_download(
|
| 61 |
+
# repo_id="AIDC-AI/Ovis-Image-7B",
|
| 62 |
+
# subfolder="Ovis2.5-2B",
|
| 63 |
+
# token=hf_token,
|
| 64 |
+
# )
|
| 65 |
+
ovis_tokenizer = build_ovis_tokenizer(
|
| 66 |
+
"AIDC-AI/Ovis-Image-7B",
|
| 67 |
subfolder="Ovis2.5-2B",
|
| 68 |
token=hf_token,
|
| 69 |
)
|
|
|
|
| 70 |
ovis_encoder = OvisEmbedder(
|
| 71 |
+
model_path="AIDC-AI/Ovis-Image-7B",
|
| 72 |
random_init=False,
|
| 73 |
+
subfolder="Ovis2.5-2B",
|
| 74 |
+
token=hf_token,
|
| 75 |
low_cpu_mem_usage=True,
|
| 76 |
torch_dtype=torch.bfloat16,
|
| 77 |
).to(device=device, dtype=_dtype)
|
ovis_image/model/tokenizer.py
CHANGED
|
@@ -25,7 +25,7 @@ class OvisTokenizer:
|
|
| 25 |
**hf_kwargs
|
| 26 |
):
|
| 27 |
super().__init__()
|
| 28 |
-
self._tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 29 |
self.system_prompt = "Describe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: "
|
| 30 |
self.user_prompt_begin_id = 28
|
| 31 |
self._max_length = max_length + self.user_prompt_begin_id
|
|
@@ -64,11 +64,12 @@ class OvisTokenizer:
|
|
| 64 |
return self._tokenizer.decode(t, skip_special_tokens=False)
|
| 65 |
|
| 66 |
|
| 67 |
-
def build_ovis_tokenizer(tokenizer_path):
|
| 68 |
max_ovis_encoding_len = 256
|
| 69 |
ovis_tokenizer = OvisTokenizer(
|
| 70 |
tokenizer_path,
|
| 71 |
max_length=max_ovis_encoding_len,
|
|
|
|
| 72 |
)
|
| 73 |
return ovis_tokenizer
|
| 74 |
|
|
|
|
| 25 |
**hf_kwargs
|
| 26 |
):
|
| 27 |
super().__init__()
|
| 28 |
+
self._tokenizer = AutoTokenizer.from_pretrained(model_path, **hf_kwargs)
|
| 29 |
self.system_prompt = "Describe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: "
|
| 30 |
self.user_prompt_begin_id = 28
|
| 31 |
self._max_length = max_length + self.user_prompt_begin_id
|
|
|
|
| 64 |
return self._tokenizer.decode(t, skip_special_tokens=False)
|
| 65 |
|
| 66 |
|
| 67 |
+
def build_ovis_tokenizer(tokenizer_path, **hf_kwargs):
|
| 68 |
max_ovis_encoding_len = 256
|
| 69 |
ovis_tokenizer = OvisTokenizer(
|
| 70 |
tokenizer_path,
|
| 71 |
max_length=max_ovis_encoding_len,
|
| 72 |
+
**hf_kwargs,
|
| 73 |
)
|
| 74 |
return ovis_tokenizer
|
| 75 |
|