使用gradio创建LLM模型聊天Demo
使用gradio和openai库,创建了一个简单的LLM模型聊天Demo,支持推理模型和非推理模型,支持流式输出,思考折叠,持续对话等功能,支持模型常用参数调整。
import gradio as gr
from openai import OpenAI
from gradio import ChatMessage# 设置你的 OpenAI API Key
OPENAI_API_KEY = ""
OPENAI_BASE_URL = ""
OPENAI_MODEL_NAME = ""# 流式生成回复函数
def chat_response(messages: list,max_tokens: int=8192,temperature: float=0.6,top_p: float=0.95):# print(messages)# 模型请求client = OpenAI(api_key=OPENAI_API_KEY,base_url=OPENAI_BASE_URL,)response = client.chat.completions.create(model=OPENAI_MODEL_NAME,messages=messages,temperature=temperature,top_p=top_p,max_tokens=max_tokens,presence_penalty=1.5,# extra_body={"chat_template_kwargs": {"enable_thinking": True}},stream=True,)# 模型返回数据处理thought_buffer = ""response_buffer = ""is_think = Falsefirst_content = Truefor chunk in response:delta = chunk.choices[0].deltacontent = delta.content# 模型内容返回if content:# 判断是否是第一个内容if first_content:first_content = False# 如果是思考模型,思考完成if is_think:messages[-1] = ChatMessage(role="assistant",content=thought_buffer,metadata={"title": "思考完成",})# 开始返回内容response_buffer += contentmessages.append(ChatMessage(role="assistant",content=response_buffer,))else:# 返回内容response_buffer += contentmessages[-1] = ChatMessage(role="assistant",content=response_buffer,)# 思考内容返回elif hasattr(delta,"reasoning_content"):# 第一次think返回if not is_think:is_think = Truemessages.append(ChatMessage(role="assistant",content="",metadata={"title": "思考中... "}))reasoning_content = chunk.choices[0].delta.reasoning_content thought_buffer += reasoning_content messages[-1] = ChatMessage(role="assistant",content=thought_buffer,metadata={"title": "思考中...",})yield messages# 创建 Gradio 界面
with gr.Blocks() as demo: gr.Markdown("## LLM UI Demo 🤔")chatbot = gr.Chatbot(type="messages", render_markdown=True,label="聊天框",resizable=True)with gr.Row():with gr.Column(scale=4):user_msg = gr.Textbox(placeholder="请输入你的问题...",label="用户输入")submitBtn = gr.Button("提交", variant="primary")with gr.Column(scale=1):max_token = gr.Slider(2048,16384, value=8192, step=1.0, label="最大上下文长度", interactive=True)temperature = gr.Slider(0, 1, value=0.6, step=0.01, label="温度", interactive=True)top_p = gr.Slider(0, 1, value=0.95, step=0.01, label="Top P", interactive=True)emptyBtn = gr.Button("清除历史")submitBtn.click(lambda m, h: ("", h + [ChatMessage(role="user", content=m)]),[user_msg, chatbot],[user_msg, chatbot]).then(chat_response,[chatbot,max_token,temperature,top_p],chatbot)emptyBtn.click(lambda m,h: ("",[]),inputs=[user_msg,chatbot],outputs=[user_msg,chatbot])demo.queue()
demo.launch(server_name="0.0.0.0", server_port=9005, share=False)
效果如下: