当前位置: 首页 > news >正文

ai之qwq 32B部署在 linux 与拓展使用在web参考

linux部署

Linux 命令行:
curl -fsSL https://ollama.com/install.sh | sh

2 将Ollama设置为系统启动时自动运行(建议)
创建系统用户和用户组

sudo useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama

sudo usermod -a -G ollama $(whoami)



3

# Start Ollama and verify it is running:

sudo systemctl start ollama

ollama run qwq:32b

python_client.py


model-gallery/deploy/llm/vLLM/python_client.py
import gradio as gr
from argparse import ArgumentParser
import json
from openai import OpenAI
from openai import NOT_GIVEN


def _get_args():
    parser = ArgumentParser()

    parser.add_argument("--eas_endpoint", type=str, required=True)
    parser.add_argument("--eas_token", type=str, required=True)
    parser.add_argument(
        "--share",
        action="store_true",
        default=False,
        help="Create a publicly shareable link for the interface.",
    )
    parser.add_argument(
        "--inbrowser",
        action="store_true",
        default=False,
        help="Automatically launch the interface in a new tab on the default browser.",
    )
    parser.add_argument(
        "--server-port", type=int, default=7860, help="Demo server port."
    )
    parser.add_argument(
        "--server-name", type=str, default="127.0.0.1", help="Demo server name."
    )

    args = parser.parse_args()
    return args


css = """
.checkbox {
    max-width: 2.5em;
    min-width: 2.5em !important;
    display:flex;
    align-items:center;
}
"""


def _launch_ui(model_name, client, args):
    def _post_process(text):
        return text.replace("<think>", "&lt;think&gt;").replace(
            "</think>", "&lt;/think&gt;"
        )

    def _transform_messages(history, max_rounds, apply_max_rounds, system_prompt):
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})

        if not apply_max_rounds:
            begin_index = 0
        else:
            begin_index = max(0, len(history) - max_rounds)

        for i in range(begin_index, len(history)):
            query, response = history[i]
            messages.append({"role": "user", "content": query})
            messages.append({"role": "assistant", "content": response})
        messages.pop()  # pop the None assistant response
        return messages

    def predict(
        _chatbot,
        max_completion_tokens,
        top_p,
        apply_top_p,
        temperature,
        apply_temperature,
        use_stream,
        max_rounds,
        apply_max_rounds,
        system_prompt,
    ):
        chat_query = _chatbot[-1][0]
        if len(chat_query) == 0:
            _chatbot.pop()
            return _chatbot
        messages = _transform_messages(
            _chatbot, max_rounds, apply_max_rounds, system_prompt
        )
        print(f"Messages: {json.dumps(messages, ensure_ascii=False, indent=2)}")
        gen = client.chat.completions.create(
            messages=messages,
            model=model_name,
            max_completion_tokens=max_completion_tokens,
            top_p=top_p if apply_top_p else NOT_GIVEN,
            temperature=temperature if apply_temperature else NOT_GIVEN,
            stream=use_stream,
        )
        print("Response:", end="")
        if use_stream:
            generated_text = ""
            for chunk in gen:
                generated_text += _post_process(chunk.choices[0].delta.content)
                print(chunk.choices[0].delta.content, end="")
                _chatbot[-1] = (chat_query, generated_text)
                yield _chatbot
        else:
            generated_text = _post_process(gen.choices[0].message.content)
            print(gen.choices[0].message.content, end="")
            _chatbot[-1] = (chat_query, generated_text)
            yield _chatbot
        print()

    def add_text(history, text):
        history = history if history is not None else []
        history.append([text, None])  # [user_query, bot_response]
        return history, None

    def clear_history(history):
        if history:
            history.clear()
        return []

    with gr.Blocks(analytics_enabled=False, css=css) as demo:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("""<h2><center>ChatLLM-WebUI</center></h2>""")
                gr.Markdown(f"""<h3><center>{model_name}</center></h3>""")
                with gr.Row():
                    with gr.Column(variant="panel"):
                        model_argument = gr.Accordion("Model Arguments")
                        with model_argument:
                            with gr.Row():
                                max_completion_tokens = gr.Slider(
                                    minimum=10,
                                    maximum=10240,
                                    step=10,
                                    label="max_completion_tokens",
                                    value=512,
                                )
                            with gr.Row():
                                apply_top_p = gr.Checkbox(
                                    label="", value=False, elem_classes="checkbox"
                                )
                                top_p = gr.Slider(
                                    minimum=0.0,
                                    maximum=1.0,
                                    step=0.01,
                                    label="top_p",
                                    value=0,
                                )
                            with gr.Row():
                                apply_temperature = gr.Checkbox(
                                    label="", value=True, elem_classes="checkbox"
                                )
                                temperature = gr.Slider(
                                    minimum=0.0,
                                    maximum=2.0,
                                    step=0.01,
                                    label="temperature",
                                    value=0.7,
                                )

                            with gr.Row():
                                use_stream_chat = gr.Checkbox(
                                    label="use_stream_chat", value=True
                                )

                        with gr.Row():
                            max_rounds = gr.Slider(
                                minimum=1,
                                maximum=100,
                                step=1,
                                label="max_rounds",
                                value=10,
                            )
                            apply_max_rounds = gr.Checkbox(
                                label="", value=True, elem_classes="checkbox"
                            )

                        with gr.Row():
                            system_prompt = gr.Textbox(
                                label="System Prompt",
                                lines=4,
                                value="You are a helpful assistant.",
                            )
                            clear_prompt_btn = gr.Button("Clear Prompt")

            with gr.Column(scale=4):
                chatbot = gr.Chatbot(elem_id="chat-box", show_label=False, height=560)
                with gr.Row():
                    query = gr.Textbox(label="Input", lines=3)

                with gr.Row():
                    submit_btn = gr.Button("submit", elem_id="c_generate")
                    clear_history_btn = gr.Button("clear history")

        submit_btn.click(add_text, [chatbot, query], [chatbot, query]).then(
            predict,
            [
                chatbot,
                max_completion_tokens,
                top_p,
                apply_top_p,
                temperature,
                apply_temperature,
                use_stream_chat,
                max_rounds,
                apply_max_rounds,
                system_prompt,
            ],
            [chatbot],
            show_progress=True,
        )
        clear_history_btn.click(clear_history, [chatbot], [chatbot], show_progress=True)
        clear_prompt_btn.click(lambda: "", None, [system_prompt])

    demo.queue().launch(
        share=args.share,
        inbrowser=args.inbrowser,
        server_port=args.server_port,
        server_name=args.server_name,
    )


def main():
    args = _get_args()
    openai_api_key = args.eas_token
    if not args.eas_endpoint.endswith("/"):
        args.eas_endpoint += "/"
    openai_api_base = f"{args.eas_endpoint}v1"
    client = OpenAI(api_key=openai_api_key, base_url=openai_api_base)
    models = client.models.list()
    model = models.data[0].id
    _launch_ui(model, client, args)


if __name__ == "__main__":
    main()


/webui_client.py

import gradio as gr
from argparse import ArgumentParser
import json
from openai import OpenAI
from openai import NOT_GIVEN


def _get_args():
    parser = ArgumentParser()

    parser.add_argument("--eas_endpoint", type=str, required=True)
    parser.add_argument("--eas_token", type=str, required=True)
    parser.add_argument(
        "--share",
        action="store_true",
        default=False,
        help="Create a publicly shareable link for the interface.",
    )
    parser.add_argument(
        "--inbrowser",
        action="store_true",
        default=False,
        help="Automatically launch the interface in a new tab on the default browser.",
    )
    parser.add_argument(
        "--server-port", type=int, default=7860, help="Demo server port."
    )
    parser.add_argument(
        "--server-name", type=str, default="127.0.0.1", help="Demo server name."
    )

    args = parser.parse_args()
    return args


css = """
.checkbox {
    max-width: 2.5em;
    min-width: 2.5em !important;
    display:flex;
    align-items:center;
}
"""


def _launch_ui(model_name, client, args):
    def _post_process(text):
        return text.replace("<think>", "&lt;think&gt;").replace(
            "</think>", "&lt;/think&gt;"
        )

    def _transform_messages(history, max_rounds, apply_max_rounds, system_prompt):
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})

        if not apply_max_rounds:
            begin_index = 0
        else:
            begin_index = max(0, len(history) - max_rounds)

        for i in range(begin_index, len(history)):
            query, response = history[i]
            messages.append({"role": "user", "content": query})
            messages.append({"role": "assistant", "content": response})
        messages.pop()  # pop the None assistant response
        return messages

    def predict(
        _chatbot,
        max_completion_tokens,
        top_p,
        apply_top_p,
        temperature,
        apply_temperature,
        use_stream,
        max_rounds,
        apply_max_rounds,
        system_prompt,
    ):
        chat_query = _chatbot[-1][0]
        if len(chat_query) == 0:
            _chatbot.pop()
            return _chatbot
        messages = _transform_messages(
            _chatbot, max_rounds, apply_max_rounds, system_prompt
        )
        print(f"Messages: {json.dumps(messages, ensure_ascii=False, indent=2)}")
        gen = client.chat.completions.create(
            messages=messages,
            model=model_name,
            max_completion_tokens=max_completion_tokens,
            top_p=top_p if apply_top_p else NOT_GIVEN,
            temperature=temperature if apply_temperature else NOT_GIVEN,
            stream=use_stream,
        )
        print("Response:", end="")
        if use_stream:
            generated_text = ""
            for chunk in gen:
                generated_text += _post_process(chunk.choices[0].delta.content)
                print(chunk.choices[0].delta.content, end="")
                _chatbot[-1] = (chat_query, generated_text)
                yield _chatbot
        else:
            generated_text = _post_process(gen.choices[0].message.content)
            print(gen.choices[0].message.content, end="")
            _chatbot[-1] = (chat_query, generated_text)
            yield _chatbot
        print()

    def add_text(history, text):
        history = history if history is not None else []
        history.append([text, None])  # [user_query, bot_response]
        return history, None

    def clear_history(history):
        if history:
            history.clear()
        return []

    with gr.Blocks(analytics_enabled=False, css=css) as demo:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("""<h2><center>ChatLLM-WebUI</center></h2>""")
                gr.Markdown(f"""<h3><center>{model_name}</center></h3>""")
                with gr.Row():
                    with gr.Column(variant="panel"):
                        model_argument = gr.Accordion("Model Arguments")
                        with model_argument:
                            with gr.Row():
                                max_completion_tokens = gr.Slider(
                                    minimum=10,
                                    maximum=10240,
                                    step=10,
                                    label="max_completion_tokens",
                                    value=512,
                                )
                            with gr.Row():
                                apply_top_p = gr.Checkbox(
                                    label="", value=False, elem_classes="checkbox"
                                )
                                top_p = gr.Slider(
                                    minimum=0.0,
                                    maximum=1.0,
                                    step=0.01,
                                    label="top_p",
                                    value=0,
                                )
                            with gr.Row():
                                apply_temperature = gr.Checkbox(
                                    label="", value=True, elem_classes="checkbox"
                                )
                                temperature = gr.Slider(
                                    minimum=0.0,
                                    maximum=2.0,
                                    step=0.01,
                                    label="temperature",
                                    value=0.7,
                                )

                            with gr.Row():
                                use_stream_chat = gr.Checkbox(
                                    label="use_stream_chat", value=True
                                )

                        with gr.Row():
                            max_rounds = gr.Slider(
                                minimum=1,
                                maximum=100,
                                step=1,
                                label="max_rounds",
                                value=10,
                            )
                            apply_max_rounds = gr.Checkbox(
                                label="", value=True, elem_classes="checkbox"
                            )

                        with gr.Row():
                            system_prompt = gr.Textbox(
                                label="System Prompt",
                                lines=4,
                                value="You are a helpful assistant.",
                            )
                            clear_prompt_btn = gr.Button("Clear Prompt")

            with gr.Column(scale=4):
                chatbot = gr.Chatbot(elem_id="chat-box", show_label=False, height=560)
                with gr.Row():
                    query = gr.Textbox(label="Input", lines=3)

                with gr.Row():
                    submit_btn = gr.Button("submit", elem_id="c_generate")
                    clear_history_btn = gr.Button("clear history")

        submit_btn.click(add_text, [chatbot, query], [chatbot, query]).then(
            predict,
            [
                chatbot,
                max_completion_tokens,
                top_p,
                apply_top_p,
                temperature,
                apply_temperature,
                use_stream_chat,
                max_rounds,
                apply_max_rounds,
                system_prompt,
            ],
            [chatbot],
            show_progress=True,
        )
        clear_history_btn.click(clear_history, [chatbot], [chatbot], show_progress=True)
        clear_prompt_btn.click(lambda: "", None, [system_prompt])

    demo.queue().launch(
        share=args.share,
        inbrowser=args.inbrowser,
        server_port=args.server_port,
        server_name=args.server_name,
    )


def main():
    args = _get_args()
    openai_api_key = args.eas_token
    if not args.eas_endpoint.endswith("/"):
        args.eas_endpoint += "/"
    openai_api_base = f"{args.eas_endpoint}v1"
    client = OpenAI(api_key=openai_api_key, base_url=openai_api_base)
    models = client.models.list()
    model = models.data[0].id
    _launch_ui(model, client, args)


if __name__ == "__main__":
    main()

参考-1

https://pai.console.aliyun.com/?regionId=cn-shanghai&spm=5176.12818093_47.resourceCenter.4.3be916d0Ymh6PS&workspaceId=695161#/quick-start/models/QwQ-32B-AWQ/intro

在这里插入图片描述

参考2

https://www.cnblogs.com/shenhuanjie/p/18661830/how-to-install-ollama-and-enable-the-service-1nixot

相关文章:

  • 【编译器】VSCODE搭建ESP32-C3
  • 【C++ vector 使用教程】
  • [通讯协议]485通信
  • DeepSeek:中国AGI破局者的技术革命与生态重构
  • 原生稀疏注意力NSA详解及代码复现
  • C++中的析构函数
  • 【cocos creator】热更新
  • SQL注入目录【绕过+布尔时间脚本】
  • 【从零开始学习计算机科学】计算机组成原理(六)异常事件处理
  • Manus无需邀请码即可使用的平替方案-OpenManus实测
  • 利用FatJar彻底解决Jar包冲突(一)
  • 【系统设计架构师】特定领域软件体系结构
  • MyBatis Mapper 接口的作用,以及如何将 Mapper 接口与 SQL 映射文件关联起来
  • 周鸿祎开始补录网安岗了
  • 2022IJCAI速读:SparseTT,使用稀疏Transformers进行视觉跟踪
  • EngineerCMS完整版发布,带freecad、math和mapus例子
  • es6+新增特性有哪些
  • DeepSeek 助力 Vue3 开发:打造丝滑的表格(Table)之功能优化,添加列宽调整功能Table12
  • 二分查找(递归和迭代)– Python
  • 我想写日记了
  • 住建部:目前已累计建设改造各类市政管网50万公里
  • 离休干部周惠梅逝世,曾从事多年地下革命工作
  • 4月22城新房价格上涨:上海一二手房价环比均上涨,核心城市土地热带动市场热度提升
  • 陈龙带你观察上海生物多样性,纪录片《我的城市邻居》明播出
  • 海军“吉祥方舟”号医院船开展海上卫勤演练
  • 马上评|中药液涉嫌添加安眠药?药品安全儿戏不得