当前位置：首页 > news >正文

10. 工具（Tools）集成：连接API、数据库与外部服务的桥梁

news 2025/7/12 16:21:50

引言：大模型的"手脚"革命

2025年某跨国零售集团通过LangChain工具集成，将30+业务系统接入AI助手，采购决策时效从3天缩短至2小时。基于LangChain最新文档，本文将揭示工具系统全新架构，并手把手教你用动态工具绑定+权限沙箱，让大模型安全高效地操作现实世界系统。

一、LangChain工具类型全景

二、四大核心工具开发模式

2.1 API工具：动态Headers绑定

import os

from fastapi.params import Query
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain.tools import tool
import requests
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_ollama import ChatOllama

os.environ["API_KEY"] = "your_api_key"

@tool
def inventory_check(
    sku: str = Query(...,
                   description="商品唯一标识码，由字母数字组成（如：IPHONE15-BK）"),
    warehouse: str = Query(...,
                         description="仓库编号，格式为'WH'+两位数字（如：WH02）")
):
    """
    实时库存查询工具 - 需同时提供以下两个参数:

    功能说明：
    1. 精确识别用户提到的商品SKU编号，必须包含型号和颜色代码
    2. 检测用户指定的仓库位置，优先使用用户明示的仓库编号
    3. 当参数缺失时需分情况询问：
       - 缺少SKU时追问："请问您需要查询哪个具体型号的商品？"
       - 缺少仓库时建议："当前可查询北京仓(WH01)、上海仓(WH02)，您要查哪个？"

    参数交互指南：
    ✅ 正确案例：
       用户说："查看上海仓库里IPHONE15-WHITE的库存"
       → 解析：sku="IPHONE15-WHITE", warehouse="WH02"

    ❌ 常见错误处理：
       1. 用户说："帮我查下库存" → 响应："请同时提供商品型号和仓库位置"
       2. 用户说："WH03有货吗" → 响应："需要知道具体商品型号才能查询"

    特殊场景处理：
    1. 当用户使用口语化名称（如"黑色手机"）时，需要引导用户提供标准SKU
    2. 接收到模糊仓库名称（如"华北仓"）时，应提示可用编码列表
    3. 遇到拼写错误（WHO1）应自动修正为WH01后验证

    响应示例：
    {
        "stock": 150,
        "sku": "IPHONE15-BK",
        "warehouse": "WH01"
    }
    """
    response = requests.get(
        f"https://api.example.com/inventory?sku={sku}&warehouse={warehouse}",
        headers={"Authorization": f"Bearer {os.getenv('API_KEY')}"}
    )
    return response.json()["stock"]

# 配置带格式要求的提示模板
prompt = ChatPromptTemplate.from_messages([
    ("system", """你是一个智能助手，用中文回答用户问题。"""),
    ("user", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

tools = [inventory_check]
# 创建智能体实例
agent = create_tool_calling_agent(
    llm=ChatOllama(model="qwen2.5"),
    tools=tools,
    prompt=prompt
)

# 配置执行器（增强错误处理）
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,  # 开启详细日志模式
    handle_parsing_errors=True,  # 自动处理解析错误
    max_iterations=3  # 限制最大迭代次数
)

# 测试用例（实际生产环境应移除）
result = agent_executor.invoke({
    "input": "请查询WH02仓库IPHONE15-PRO-256G的库存"
})
print("执行结果:", result)

测试桩：

from fastapi import FastAPI, HTTPException, Header, Depends, Query

app = FastAPI(title="Inventory API")


# 依赖项验证API Key
async def verify_api_key(authorization: str = Header(None)):
    if not authorization:
        raise HTTPException(status_code=401, detail="Missing Authorization header")

    try:
        scheme, token = authorization.split()
        if scheme.lower() != "bearer":
            raise ValueError
    except ValueError:
        raise HTTPException(status_code=401, detail="Invalid Authorization format")

    # if token != os.getenv("API_KEY"):
    #     raise HTTPException(status_code=403, detail="Invalid API Key")
    return True


@app.get("/inventory")
async def check_inventory(
        sku: str = Query(..., min_length=3, examples="ABC123"),
        warehouse: str = Query(..., min_length=2, examples="WH01"),
        _: bool = Depends(verify_api_key)
):
    """
    查询实时库存接口

    参数：
    - sku: 商品SKU编号（至少3个字符）
    - warehouse: 仓库代码（至少2个字符）

    返回：
    - JSON包含sku、仓库和库存量
    """
    return {
        "sku": sku,
        "warehouse": warehouse,
        "stock": 10
    }


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8000)

输出为：

> Entering new AgentExecutor chain...

Invoking: `inventory_check` with `{'sku': 'IPHONE15-PRO-256G', 'warehouse': 'WH02'}`


10当前上海仓库(WH02)中，IPHONE15-PRO-256G的库存数量为10台。

> Finished chain.
执行结果: {'input': '请查询WH02仓库IPHONE15-PRO-256G的库存', 'output': '当前上海仓库(WH02)中，IPHONE15-PRO-256G的库存数量为10台。'}

2.2 数据库工具：SQL安全沙箱

from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_community.tools import Tool
from langchain_ollama import ChatOllama

# 创建数据库连接（使用 PyMySQL 驱动）
db = SQLDatabase.from_uri(
    "mysql+pymysql://user:password@localhost:3306/crm",
    include_tables=["customers", "orders"],  # 允许访问的表
    sample_rows_in_table_info=2,  # 表结构采样行数
    max_string_length=500,        # 字符串截断长度
    view_support=True             # 支持视图查询
)

llm = ChatOllama(model="qwen2.5")
# 创建增强版工具包
toolkit = SQLDatabaseToolkit(
    db=db, llm=llm,
)

from langchain import hub
from langgraph.prebuilt import create_react_agent


prompt_template = hub.pull("langchain-ai/sql-agent-system-prompt")
system_message = prompt_template.format(dialect="SQLite", top_k=5)

agent_executor = create_react_agent(
    llm, toolkit.get_tools(), state_modifier=system_message
)

# 场景1：客户订单查询
result = agent_executor.invoke({"input": "ID为1的客户最近3个月订单金额总和是多少？"})
print(result)
print("---------")

# 场景2：数据校验
schema = agent_executor.invoke({"input": "请列出orders表所有字段及注释"})
print(schema)

输出为：

{'messages': [AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:01.6559609Z', 'done': True, 'done_reason': 'stop', 'total_duration': 125961962200, 'load_duration': 59620066500, 'prompt_eval_count': 702, 'prompt_eval_duration': 61369866500, 'eval_count': 18, 'eval_duration': 4970551200, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-eea1954a-21d7-4230-a810-fd9666d6be05-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': 'cb2548bd-5dbf-42a2-b316-54c993acd62d', 'type': 'tool_call'}], usage_metadata={'input_tokens': 702, 'output_tokens': 18, 'total_tokens': 720}), ToolMessage(content='customers, orders', name='sql_db_list_tables', id='6a7a629b-f851-4569-8769-756d0fcc33e3', tool_call_id='cb2548bd-5dbf-42a2-b316-54c993acd62d'), AIMessage(content=' RonaldoDB has two tables: customers and orders. Let\'s explore the schema of these tables.\n<tool_call>\n{"name": "sql_db_schema", "arguments": "customers, orders"}\n</tool_call>', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:16.300491Z', 'done': True, 'done_reason': 'stop', 'total_duration': 14482071200, 'load_duration': 1504567200, 'prompt_eval_count': 741, 'prompt_eval_duration': 2029415700, 'eval_count': 39, 'eval_duration': 10810406500, 'message': Message(role='assistant', content=' RonaldoDB has two tables: customers and orders. Let\'s explore the schema of these tables.\n<tool_call>\n{"name": "sql_db_schema", "arguments": "customers, orders"}\n</tool_call>', images=None, tool_calls=None)}, id='run-2e5facc5-f598-45e2-884e-602472198de5-0', usage_metadata={'input_tokens': 741, 'output_tokens': 39, 'total_tokens': 780})]}
---------
{'messages': [AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:21.827269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 5522579100, 'load_duration': 20329500, 'prompt_eval_count': 702, 'prompt_eval_duration': 302476500, 'eval_count': 18, 'eval_duration': 5199773100, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-0e74210c-e0f0-4a53-a4fe-fa54d600d5e4-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': '2cbdb3b5-f6e9-40c7-9a6c-51b23971649b', 'type': 'tool_call'}], usage_metadata={'input_tokens': 702, 'output_tokens': 18, 'total_tokens': 720}), ToolMessage(content='customers, orders', name='sql_db_list_tables', id='bf19148a-9211-438f-90d1-39b8e4971a7a', tool_call_id='2cbdb3b5-f6e9-40c7-9a6c-51b23971649b'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:31.49173Z', 'done': True, 'done_reason': 'stop', 'total_duration': 9659962700, 'load_duration': 24766500, 'prompt_eval_count': 741, 'prompt_eval_duration': 2361401100, 'eval_count': 24, 'eval_duration': 7262794300, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-6e538d8a-9863-44c8-af43-074c9f3d2099-0', tool_calls=[{'name': 'sql_db_schema', 'args': {'table_names': 'customers,orders'}, 'id': '33e20b40-ec35-4e5a-8d34-e9df191d59f2', 'type': 'tool_call'}], usage_metadata={'input_tokens': 741, 'output_tokens': 24, 'total_tokens': 765}), ToolMessage(content="\nCREATE TABLE customers (\n\tid INTEGER NOT NULL AUTO_INCREMENT, \n\tname VARCHAR(255), \n\tcreate_time DATETIME, \n\tPRIMARY KEY (id)\n)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci COMMENT='客户表' DEFAULT CHARSET=utf8mb4\n\n/*\n2 rows from customers table:\nid\tname\tcreate_time\n1\t李雷\tNone\n2\t韩梅梅\tNone\n*/\n\n\nCREATE TABLE orders (\n\tid INTEGER NOT NULL COMMENT 'id' AUTO_INCREMENT, \n\tcustomer_id INTEGER COMMENT '客户id', \n\tfee DECIMAL(10, 2) COMMENT '金额', \n\tcreate_time DATETIME, \n\tPRIMARY KEY (id)\n)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci COMMENT='订单表' DEFAULT CHARSET=utf8mb4\n\n/*\n2 rows from orders table:\nid\tcustomer_id\tfee\tcreate_time\n1\t1\t11.20\t2024-11-02 12:00:00\n2\t1\t20.00\t2025-01-05 15:00:00\n*/", name='sql_db_schema', id='b5369f98-de95-4960-8c51-e9721ae1da0b', tool_call_id='33e20b40-ec35-4e5a-8d34-e9df191d59f2'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:50:56.2460969Z', 'done': True, 'done_reason': 'stop', 'total_duration': 84640614900, 'load_duration': 56681300, 'prompt_eval_count': 1030, 'prompt_eval_duration': 23995596900, 'eval_count': 206, 'eval_duration': 60558135500, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-9b11eb06-5588-408e-99d9-3b44ab5de3b6-0', tool_calls=[{'name': 'sql_db_query_checker', 'args': {'query': 'SELECT c.name AS customer_name, o.fee AS order_fee FROM customers c JOIN orders o ON c.id = o.customer_id WHERE o.fee > 15.00 LIMIT 5'}, 'id': '308f8f47-b1fd-4b33-a128-56a39e8e2714', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1030, 'output_tokens': 206, 'total_tokens': 1236}), ToolMessage(content="The provided SQL query does not contain any of the common mistakes you mentioned:\n\n- It does not use `NOT IN` with NULL values.\n- It does not use `UNION` when `UNION ALL` should have been used.\n- It uses `>` which is correct for an exclusive range.\n- There are no data type mismatches in the predicates.\n- The identifiers (table and column names) are not quoted, but this is typically fine unless there are reserved words or special characters in the names.\n- There are no function calls with incorrect arguments.\n- It uses the proper columns for joins.\n\nTherefore, the query can be used as-is. Here it is reproduced:\n\n```sql\nSELECT c.name AS customer_name, o.fee AS order_fee \nFROM customers c \nJOIN orders o ON c.id = o.customer_id \nWHERE o.fee > 15.00 \nLIMIT 5;\n```\n\nIf you need to quote the identifiers (which is generally a good practice), here's how it would look:\n\n```sql\nSELECT `c.name` AS customer_name, `o.fee` AS order_fee \nFROM `customers` c \nJOIN `orders` o ON c.id = o.customer_id \nWHERE o.fee > 15.00 \nLIMIT 5;\n```\n\nHowever, the first query is correct and generally fine to use without quotes unless your database setup requires them.", name='sql_db_query_checker', id='d718c11d-12b7-4499-b0c3-de14a890dc98', tool_call_id='308f8f47-b1fd-4b33-a128-56a39e8e2714'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:55:31.7370847Z', 'done': True, 'done_reason': 'stop', 'total_duration': 174669424300, 'load_duration': 19545600, 'prompt_eval_count': 1400, 'prompt_eval_duration': 124359644500, 'eval_count': 154, 'eval_duration': 50253279400, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-f7aa14de-0e39-4420-833c-87080cf6dda0-0', tool_calls=[{'name': 'sql_db_query', 'args': {'query': 'SELECT c.name AS customer_name, o.fee AS order_fee FROM customers c JOIN orders o ON c.id = o.customer_id WHERE o.fee > 15.00 LIMIT 5'}, 'id': 'a2bb2bd1-7109-4ae5-81ab-41a7e1eeb4bc', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1400, 'output_tokens': 154, 'total_tokens': 1554}), ToolMessage(content="[('李雷', Decimal('20.00')), ('李雷', Decimal('21.00'))]", name='sql_db_query', id='460e6ff4-0be5-48a0-b9ae-d8a5a2a41d02', tool_call_id='a2bb2bd1-7109-4ae5-81ab-41a7e1eeb4bc'), AIMessage(content='The query returned the following results:\n\n- Customer: 李雷, Order Fee: 20.00\n- Customer: 李雷, Order Fee: 21.00\n\nBased on this data, here are the customers and their orders that have a fee greater than 15.00:\n\n1. **Customer:** 李雷, **Order Fee:** 20.00\n2. **Customer:** 李雷, **Order Fee:** 21.00\n\nThese results indicate that only customer "李雷" has placed orders with fees greater than 15.00, with the highest fee being 21.00.', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:56:25.9152145Z', 'done': True, 'done_reason': 'stop', 'total_duration': 54142390300, 'load_duration': 19204800, 'prompt_eval_count': 1507, 'prompt_eval_duration': 9807790900, 'eval_count': 143, 'eval_duration': 44258881800, 'message': Message(role='assistant', content='The query returned the following results:\n\n- Customer: 李雷, Order Fee: 20.00\n- Customer: 李雷, Order Fee: 21.00\n\nBased on this data, here are the customers and their orders that have a fee greater than 15.00:\n\n1. **Customer:** 李雷, **Order Fee:** 20.00\n2. **Customer:** 李雷, **Order Fee:** 21.00\n\nThese results indicate that only customer "李雷" has placed orders with fees greater than 15.00, with the highest fee being 21.00.', images=None, tool_calls=None)}, id='run-53ca8cba-f749-462d-9f69-656ebbab8450-0', usage_metadata={'input_tokens': 1507, 'output_tokens': 143, 'total_tokens': 1650})]}
#结果不理想，使用专业SQL的大模型效果会更好

2.3 自定义工具：Python函数封装

from pydantic import BaseModel, Field
from langchain.tools import tool

# 假设的运费计算参数（实际项目中可能从数据库读取）
base_rate = {"A": 50.0, "B": 40.0, "C": 30.0}
price_per_kg = {"A": 10.0, "B": 8.0, "C": 6.0}

# 内部函数示例实现
def get_shipping_zip_zone(destination: str) -> str:
    """根据邮编前三位判断区域（示例逻辑）"""
    prefix = destination[:3]
    if prefix < "200":
        return "A"
    elif prefix < "400":
        return "B"
    else:
        return "C"

# 输入参数模型
class ShippingInput(BaseModel):
    weight: float = Field(..., gt=0, description="包裹重量（千克）")
    destination: str = Field(..., pattern="^[0-9]{6}$", description="6位数字目的地邮编")

# 注册为 Langchain 工具
@tool(args_schema=ShippingInput, return_direct=True)
def shipping_calculator(weight: float, destination: str) -> str:
    """运费计算工具：根据重量和邮编计算精确运费"""
    zone = get_shipping_zip_zone(destination)
    total = base_rate[zone] + weight * price_per_kg[zone]
    return f"目的地 {destination}（区域 {zone}）运费：¥{total:.2f}"

print(shipping_calculator.run({"weight": 2.5, "destination": "123456"}))
# 输出：目的地 123456（区域 A）运费：¥75.00

2.4 插件市场：Tavily Search集成

from langchain_community.tools import TavilySearchResults

# 设置Tavily API密钥（实际开发建议使用环境变量管理密钥）
os.environ["TAVILY_API_KEY"] = "tvly-dev-xxxxxx"

def search(query: str) -> str:
    """
    实时信息搜索工具
    参数:
        query (str): 搜索关键词
    返回:
        str: 搜索结果摘要
    """
    print("查询关键词为：" + query)  # 调试日志：显示搜索关键词
    tool = TavilySearchResults(
        max_results=5,
        search_depth="advanced",
        include_answer=True,
    )
    response = tool.invoke(query)
    return response
#完整例子参考第8篇文章