Ovis-Image-7B / app.py
linoyts's picture
linoyts HF Staff
small ux change: add random seed option
b05c6dd verified
raw
history blame
5.79 kB
import os
import torch
import gradio as gr
import spaces
import random
import numpy as np
from safetensors.torch import load_file
from huggingface_hub import hf_hub_download
from diffusers.utils import logging
from PIL import Image
from ovis_image.model.tokenizer import build_ovis_tokenizer
from ovis_image.model.autoencoder import load_ae
from ovis_image.model.hf_embedder import OvisEmbedder
from ovis_image.model.model import OvisImageModel
from ovis_image.sampling import generate_image
from ovis_image import ovis_image_configs
logging.set_verbosity_error()
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
device = "cuda"
_dtype = torch.bfloat16
hf_token = os.getenv("HF_TOKEN")
print("init ovis_image")
model_config = ovis_image_configs["ovis-image-7b"]
ovis_image = OvisImageModel(model_config)
ovis_image_path = hf_hub_download(
repo_id="AIDC-AI/Ovis-Image-7B",
filename="ovis_image.safetensors",
token=hf_token,
)
model_state_dict = load_file(ovis_image_path)
missing_keys, unexpected_keys = ovis_image.load_state_dict(model_state_dict)
print(f"Load Missing Keys {missing_keys}")
print(f"Load Unexpected Keys {unexpected_keys}")
ovis_image = ovis_image.to(device=device, dtype=_dtype)
ovis_image.eval()
print("init vae")
vae_path = hf_hub_download(
repo_id="AIDC-AI/Ovis-Image-7B",
filename="ae.safetensors",
token=hf_token,
)
autoencoder = load_ae(
vae_path,
model_config.autoencoder_params,
device=device,
dtype=_dtype,
random_init=False,
)
autoencoder.eval()
print("init ovis")
# ovis_path = hf_hub_download(
# repo_id="AIDC-AI/Ovis-Image-7B",
# subfolder="Ovis2.5-2B",
# token=hf_token,
# )
ovis_tokenizer = build_ovis_tokenizer(
"AIDC-AI/Ovis2.5-2B",
)
ovis_encoder = OvisEmbedder(
model_path="AIDC-AI/Ovis2.5-2B",
random_init=False,
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
).to(device=device, dtype=_dtype)
@spaces.GPU(duration=75)
def generate(prompt, img_height=1024, img_width=1024, seed=42, randomize_seed=True, steps=50, guidance_scale=5.0):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
print(f'inference with prompt : {prompt}, size: {img_height}x{img_width}, seed : {seed}, step : {steps}, cfg : {guidance_scale}')
image = generate_image(
device=next(ovis_image.parameters()).device,
dtype=_dtype,
model=ovis_image,
prompt=prompt,
autoencoder=autoencoder,
ovis_tokenizer=ovis_tokenizer,
ovis_encoder=ovis_encoder,
img_height=img_height,
img_width=img_width,
denoising_steps=steps,
cfg_scale=guidance_scale,
seed=seed,
)
# bring into PIL format and save
image = image.clamp(-1, 1)
image = image.cpu().permute(0, 2, 3, 1).float().numpy()
image = (image * 255).round().astype("uint8")
return image[0]
examples = [
"Solar punk vehicle in a bustling city",
"An anthropomorphic cat riding a Harley Davidson in Arizona with sunglasses and a leather jacket",
"An elderly woman poses for a high fashion photoshoot in colorful, patterned clothes with a cyberpunk 2077 vibe",
]
css="""
#col-container {
margin: 0 auto;
max-width: 520px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""# Ovis-Image
[[code](https://github.com/AIDC-AI/Ovis-Image)] [[model](https://huggingface.co/AIDC-AI/Ovis-Image-7B)]
""")
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt here",
container=False,
)
run_button = gr.Button("Run", scale=0)
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
img_height = gr.Slider(
label="Image Height",
minimum=256,
maximum=2048,
step=32,
value=1024,
)
img_width = gr.Slider(
label="Image Width",
minimum=256,
maximum=2048,
step=32,
value=1024,
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1,
maximum=14,
step=0.1,
value=5.0,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=100,
step=1,
value=50,
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
gr.Examples(
examples = examples,
fn = generate,
inputs = [prompt],
outputs = [result],
cache_examples="lazy"
)
gr.on(
triggers=[run_button.click, prompt.submit],
fn = generate,
inputs = [prompt, img_height, img_width, seed, randomize_seed, num_inference_steps, guidance_scale],
outputs = [result]
)
if __name__ == '__main__':
demo.launch()