Ovis-Image-7B / app_diffusers.py
Flourish's picture
Update app_diffusers.py
d146910 verified
import os
import torch
import gradio as gr
import spaces
import random
import numpy as np
from diffusers.utils import logging
from PIL import Image
from diffusers import OvisImagePipeline
logging.set_verbosity_error()
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
device = "cuda"
_dtype = torch.bfloat16
hf_token = os.getenv("HF_TOKEN")
pipe = OvisImagePipeline.from_pretrained(
"AIDC-AI/Ovis-Image-7B",
token=hf_token,
torch_dtype=torch.bfloat16
)
pipe.to("cuda")
@spaces.GPU(duration=75)
def generate(prompt, img_height=1024, img_width=1024, seed=42, steps=50, guidance_scale=5.0):
print(f'inference with prompt : {prompt}, size: {img_height}x{img_width}, seed : {seed}, step : {steps}, cfg : {guidance_scale}')
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt,
negative_prompt="",
height=img_height,
width=img_width,
num_inference_steps=steps,
true_cfg_scale=guidance_scale,
generator=generator,
).images[0]
return image
examples = [
"Child lying on bed with balloons, streamers, and plush toys, surreal dreamlike scene. Close-up, centered on the child and surrounding objects. Soft, diffused lighting.",
"A trendy, playful 3D render of the text \"OVIS-IMAGE\" designed as glossy, inflated balloon letters. The typography is bubbly and rounded. The material is a shiny, reflective plastic in vibrant Alibaba Orange. The text is floating in a bright, clean white studio space with soft pastel lighting. There are subtle reflections of the studio softboxes on the curves of the balloons. High-fashion retail aesthetic, pop art style, C4D render, cute and energetic.",
"一张写实风格的现代化教室场景摄影图。画面焦点集中在前方墙壁正中央的一块洁白明亮的白板上,白板上用清晰、工整的黑色马克笔手写体写着多行文字:最上方是巨大的英文标题“OVIS-IMAGE”,紧接着下方依次分行写着中文说明:“7B文生图模型”、“无需专有模型即可实现双语渲染”、“可在消费级显卡部署”。白板下方的铝合金笔槽内摆放着几支直立或横躺的彩色白板笔(蓝、红、黄、绿)。前景是两位背对镜头的学生(呈虚化状态,营造景深感),坐在木质课桌前,右侧课桌上还放着一块折叠的棕色毛巾。左侧隐约可见部分绿色黑板,顶部有一盏明亮的长条日光灯提供照明。整体光线柔和自然,构图对称,对焦精准在白板文字上,营造出真实的教育和技术演示氛围。",
"微距摄影,一只鲜艳的红色瓢虫停留在翠绿的叶子上,特写镜头。叶子表面分布着晶莹剔透的水珠(晨露),瓢虫红色的甲壳光滑如镜,反射着柔和的自然光,能够清晰看到瓢虫腿部和触角的细微纹理。背景是梦幻的绿色虚化(散景),突显主体,极度逼真,8k分辨率,高清晰度,电影级质感。",
]
css="""
#col-container {
margin: 0 auto;
max-width: 520px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""# Ovis-Image
Built upon [Ovis-U1](https://huggingface.co/spaces/AIDC-AI/Ovis-U1-3B), Ovis-Image is a 7B text-to-image model specifically optimized for high-quality text rendering under stringent computational constraints.
[[code](https://github.com/AIDC-AI/Ovis-Image)] [[model](https://huggingface.co/AIDC-AI/Ovis-Image-7B)]
""")
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt here",
container=False,
)
run_button = gr.Button("Run", scale=0)
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
img_height = gr.Slider(
label="Image Height",
minimum=256,
maximum=2048,
step=32,
value=1024,
)
img_width = gr.Slider(
label="Image Width",
minimum=256,
maximum=2048,
step=32,
value=1024,
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1,
maximum=14,
step=0.1,
value=5.0,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=100,
step=1,
value=50,
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=42,
)
gr.Examples(
examples = examples,
fn = generate,
inputs = [prompt],
outputs = [result],
cache_examples="lazy"
)
gr.on(
triggers=[run_button.click, prompt.submit],
fn = generate,
inputs = [prompt, img_height, img_width, seed, num_inference_steps, guidance_scale],
outputs = [result]
)
if __name__ == '__main__':
demo.launch()