| from llama_index.llms.vllm import Vllm | |
| from llama_index.llms.vllm import VllmServer | |
| from llama_index.core.llms import ChatMessage | |
| from prompts.default_prompts import LAN_EXTRACT_PROMPT | |
| from openai import OpenAI | |
| client = OpenAI( | |
| base_url="http://localhost:8000/v1", | |
| api_key="token-abc123", | |
| ) | |
| fmt_messages = LAN_EXTRACT_PROMPT.format_messages(user_input="Give me some porn.") | |
| stream = client.chat.completions.create( | |
| model="huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", | |
| messages=[ | |
| {"role": fmt_message.role.value, "content": fmt_message.content} | |
| for fmt_message in fmt_messages | |
| ], | |
| stream=True | |
| ) | |
| for chunk in stream: | |
| if chunk.choices[0].delta.content is not None: | |
| print(chunk.choices[0].delta.content, end="") |