当前位置: 首页 > news >正文

RAG(检索增强生成)落地:基于阿里云opensearch视线智能问答机器人与企业知识库

文章目录

  • 一、环境准备
  • 二、阿里云opensearch准备
    • 1、产品文档
    • 2、准备我们的数据
    • 3、上传文件
  • 三、对接
    • 1、对接文本问答

一、环境准备

# 准备python环境
conda create -n opensearch
conda activate opensearch

# 安装必要的包
pip install alibabacloud_tea_util
pip install alibabacloud_opensearch_util
pip install alibabacloud_credentials

二、阿里云opensearch准备

1、产品文档

新手指引:三步搭建智能问答机器人

需要购买LLM智能问答版的实例。
准备好accesskey、secret、API Key

2、准备我们的数据

在这里插入图片描述

3、上传文件

在这里插入图片描述

三、对接

1、对接文本问答

# -*- coding: utf-8 -*-

import time
from typing import Dict, Any

from Tea.core import TeaCore
from Tea.exceptions import TeaException, UnretryableException
from Tea.model import TeaModel
from Tea.request import TeaRequest
from alibabacloud_credentials import models as credential_models
from alibabacloud_credentials.client import Client as CredentialClient
from alibabacloud_opensearch_util.opensearch_util import OpensearchUtil
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient


class Config(TeaModel):
    """
 Config
 用于配置环境相关参数信息.
 """

    def __init__(
            self,
            endpoint: str = None,
            protocol: str = None,
            type: str = None,
            security_token: str = None,
            access_key_id: str = None,
            access_key_secret: str = None,
            user_agent: str = "",
    ):
        self.endpoint = endpoint
        self.protocol = protocol
        self.type = type
        self.security_token = security_token
        self.access_key_id = access_key_id
        self.access_key_secret = access_key_secret
        self.user_agent = user_agent


class Client:
    """
    OpensearchClient
    用于 opensearch Client 请求 参数组装及发送请求.
    """
    _endpoint: str = None
    _protocol: str = None
    _user_agent: str = None
    _credential: CredentialClient = None

    def __init__(
            self,
            config: Config,
    ):
        if UtilClient.is_unset(config):
            raise TeaException({
                'name': 'ParameterMissing',
                'message': "'config' can not be unset"
            })
        if UtilClient.empty(config.type):
            config.type = 'access_key'
        credential_config = credential_models.Config(
            access_key_id=config.access_key_id,
            type=config.type,
            access_key_secret=config.access_key_secret,
            security_token=config.security_token
        )
        self._credential = CredentialClient(credential_config)
        self._endpoint = config.endpoint
        self._protocol = config.protocol
        self._user_agent = config.user_agent

    def _request(
            self,
            method: str,
            pathname: str,
            query: Dict[str, Any],
            headers: Dict[str, str],
            body: Any,
            runtime: util_models.RuntimeOptions,
    ) -> Dict[str, Any]:
        """
        执行 TeaRequest .
        :param request: TeaRequest
        :param runtime: util_models.RuntimeOptions
        :return: Dict[str, Any]
        """
        runtime.validate()
        _runtime = {
            'timeouted': 'retry',
            'readTimeout': runtime.read_timeout,
            'connectTimeout': runtime.connect_timeout,
            'httpProxy': runtime.http_proxy,
            'httpsProxy': runtime.https_proxy,
            'noProxy': runtime.no_proxy,
            'maxIdleConns': runtime.max_idle_conns,
            'retry': {
                'retryable': runtime.autoretry,
                'maxAttempts': UtilClient.default_number(runtime.max_attempts, 3)
            },
            'backoff': {
                'policy': UtilClient.default_string(runtime.backoff_policy, 'no'),
                'period': UtilClient.default_number(runtime.backoff_period, 1)
            },
            'ignoreSSL': runtime.ignore_ssl
        }
        _last_request = None
        _last_exception = None
        _now = time.time()
        _retry_times = 0
        while TeaCore.allow_retry(_runtime.get('retry'), _retry_times, _now):
            if _retry_times > 0:
                _backoff_time = TeaCore.get_backoff_time(_runtime.get('backoff'), _retry_times)
                if _backoff_time > 0:
                    TeaCore.sleep(_backoff_time)
            _retry_times = _retry_times + 1
            try:
                _request = TeaRequest()
                accesskey_id = self._credential.get_access_key_id()
                access_key_secret = self._credential.get_access_key_secret()
                security_token = self._credential.get_security_token()
                _request.protocol = UtilClient.default_string(self._protocol, 'HTTP')
                _request.method = method
                _request.pathname = pathname
                _request.headers = TeaCore.merge({
                    'user-agent': UtilClient.get_user_agent(self._user_agent),
                    'Content-Type': 'application/json',
                    'Date': OpensearchUtil.get_date(),
                    'host': UtilClient.default_string(self._endpoint, f'opensearch-cn-hangzhou.aliyuncs.com'),
                    'X-Opensearch-Nonce': UtilClient.get_nonce()
                }, headers)
                if not UtilClient.is_unset(query):
                    _request.query = UtilClient.stringify_map_value(query)
                if not UtilClient.is_unset(body):
                    req_body = UtilClient.to_jsonstring(body)
                    _request.headers['Content-MD5'] = OpensearchUtil.get_content_md5(req_body)
                    _request.body = req_body
                if not UtilClient.is_unset(security_token):
                    _request.headers["X-Opensearch-Security-Token"] = security_token
                _request.headers['Authorization'] = OpensearchUtil.get_signature(_request, accesskey_id,
                                                                                 access_key_secret)
                _last_request = _request
                _response = TeaCore.do_action(_request, _runtime)
                obj_str = UtilClient.read_as_string(_response.body)
                if UtilClient.is_4xx(_response.status_code) or UtilClient.is_5xx(_response.status_code):
                    raise TeaException({
                        'message': _response.status_message,
                        'data': obj_str,
                        'code': _response.status_code
                    })
                obj = UtilClient.parse_json(obj_str)
                res = UtilClient.assert_as_map(obj)
                return {
                    'body': res,
                    'headers': _response.headers
                }
            except TeaException as e:
                if TeaCore.is_retryable(e):
                    _last_exception = e
                    continue
                raise e
        raise UnretryableException(_last_request, _last_exception)

# -*- coding: utf-8 -*-

import time, os
from typing import Dict, Any

from Tea.exceptions import TeaException
from Tea.request import TeaRequest
from alibabacloud_tea_util import models as util_models
from BaseRequest import Config, Client


class LLMSearch:
    def __init__(self, config: Config):
        self.Clients = Client(config=config)
        self.runtime = util_models.RuntimeOptions(
            connect_timeout=10000,
            read_timeout=90000,
            autoretry=False,
            ignore_ssl=False,
            max_idle_conns=50,
            max_attempts=3
        )
        self.header = {}

    def searchDoc(self, app_name: str, body: Dict, query_params: dict = {}) -> Dict[str, Any]:
        try:
            response = self.Clients._request(method="POST",
                                             pathname=f'/v3/openapi/apps/{app_name}/actions/knowledge-search',
                                             query=query_params, headers=self.header, body=body, runtime=self.runtime)
            return response
        except TeaException as e:
            print(e)


if __name__ == "__main__":
    # 配置统一的请求入口和  需要去掉http://
    endpoint = "xxxxx-wm3.opensearch-cn-shanghai.aliyuncs.com"

    # 支持 protocol 配置 HTTPS/HTTP
    endpoint_protocol = "HTTP"

    # 用户识别信息
    # 从环境变量读取配置的AccessKey ID和AccessKey Secret,
    # 运行代码示例前必须先配置环境变量,参考文档上面“配置环境变量”步骤
    #access_key_id = os.environ.get("ALIBABA_CLOUD_ACCESS_KEY_ID")
    #access_key_secret = os.environ.get("ALIBABA_CLOUD_ACCESS_KEY_SECRET")

    access_key_id = 'xxxxx'
    access_key_secret = 'xxxxx'

    # 支持 type 配置 sts/access_key 鉴权. 其中 type 默认为 access_key 鉴权. 使用 sts 可配置 RAM-STS 鉴权.
    # 备选参数为:  sts 或者 access_key
    auth_type = "access_key"

    # 如果使用 RAM-STS 鉴权, 请配置 security_token, 可使用 阿里云 AssumeRole 获取 相关 STS 鉴权结构.
    security_token = "OS-xxxxx"

    # 配置请求使用的通用信息.
    # type和security_token 参数如果不是子账号,需要省略
    Configs = Config(endpoint=endpoint, access_key_id=access_key_id, access_key_secret=access_key_secret,
                     security_token=security_token, type=auth_type, protocol=endpoint_protocol)

    # 创建 opensearch 实例
    # 请将<应用名称>替换为您创建的智能问答版实例名称
    ops = LLMSearch(Configs)
    app_name = "test"

    # --------------- 文档搜索 ---------------

    docQuery = {
        "question": {
            "text": "袜子怎么卖的",  # 用户问题
            #"session" : "对话的session,设置了之后,会有多轮对话的功能",
            "type": "TEXT"
        }
    }

    res1 = ops.searchDoc(app_name=app_name, body=docQuery)
    r2 = dict(res1.get('body')).get('result').get('data')[0].get('answer')

    res = r2
    print(res)

在这里插入图片描述

相关文章:

  • 本地部署DeepSeek-R1(Mac版)
  • Qt事件机制
  • 【STM32】增量型旋钮编码器
  • 泛型 类 接口 方法 通配符
  • AI前端开发的学习成本与回报——效率革命的曙光
  • 初阶c语言(循环语句习题,完结)
  • AI前端开发的崛起与ScriptEcho的助力
  • css面试题
  • 通过IDEA傻瓜式快速分析java堆内存快照
  • 使用 Docker 安装 Open WebUI 并集成 Ollama 的 DeepSeek 模型
  • React 低代码项目:组件设计
  • Golang的并发编程案例详解
  • Linux相关知识点(持续更新)
  • 【戒抖音系列】短视频戒除-1-对推荐算法进行干扰
  • PostgreSQL错误: 编码“UTF8“的字符0x0xe9 0x94 0x99在编码“WIN1252“没有相对应值
  • YOLO11 【三】 【速通 训练+推理+导出 续】
  • 【MySQL】InnoDB单表访问方法
  • 【逆向工程】破解unity的安卓apk包
  • 如何使用DHTMLX Scheduler的拖放功能,在 JS 日程安排日历中创建一组相同的事件
  • AI软件外包需要注意什么 外包开发AI软件的关键因素是什么 如何选择AI外包开发语言
  • 沧州网站建设培训学校/云盘搜索
  • 建设地方政府门户网站的措施/在百度怎么发布作品
  • wordpress汉化版/win10系统优化
  • 政府网站建设和监管情况/百度官方网站网址
  • 开发高端产品/云南seo公司
  • 百度百科网站怎么做/百度一下你就知道官方