10. 工具(Tools)集成:连接API、数据库与外部服务的桥梁
引言:大模型的"手脚"革命
2025年某跨国零售集团通过LangChain工具集成,将30+业务系统接入AI助手,采购决策时效从3天缩短至2小时。基于LangChain最新文档,本文将揭示工具系统全新架构,并手把手教你用动态工具绑定+权限沙箱,让大模型安全高效地操作现实世界系统。
一、LangChain工具类型全景
二、四大核心工具开发模式
2.1 API工具:动态Headers绑定
import os
from fastapi.params import Query
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain.tools import tool
import requests
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_ollama import ChatOllama
os.environ["API_KEY"] = "your_api_key"
@tool
def inventory_check(
sku: str = Query(...,
description="商品唯一标识码,由字母数字组成(如:IPHONE15-BK)"),
warehouse: str = Query(...,
description="仓库编号,格式为'WH'+两位数字(如:WH02)")
):
"""
实时库存查询工具 - 需同时提供以下两个参数:
功能说明:
1. 精确识别用户提到的商品SKU编号,必须包含型号和颜色代码
2. 检测用户指定的仓库位置,优先使用用户明示的仓库编号
3. 当参数缺失时需分情况询问:
- 缺少SKU时追问:"请问您需要查询哪个具体型号的商品?"
- 缺少仓库时建议:"当前可查询北京仓(WH01)、上海仓(WH02),您要查哪个?"
参数交互指南:
✅ 正确案例:
用户说:"查看上海仓库里IPHONE15-WHITE的库存"
→ 解析:sku="IPHONE15-WHITE", warehouse="WH02"
❌ 常见错误处理:
1. 用户说:"帮我查下库存" → 响应:"请同时提供商品型号和仓库位置"
2. 用户说:"WH03有货吗" → 响应:"需要知道具体商品型号才能查询"
特殊场景处理:
1. 当用户使用口语化名称(如"黑色手机")时,需要引导用户提供标准SKU
2. 接收到模糊仓库名称(如"华北仓")时,应提示可用编码列表
3. 遇到拼写错误(WHO1)应自动修正为WH01后验证
响应示例:
{
"stock": 150,
"sku": "IPHONE15-BK",
"warehouse": "WH01"
}
"""
response = requests.get(
f"https://api.example.com/inventory?sku={sku}&warehouse={warehouse}",
headers={"Authorization": f"Bearer {os.getenv('API_KEY')}"}
)
return response.json()["stock"]
# 配置带格式要求的提示模板
prompt = ChatPromptTemplate.from_messages([
("system", """你是一个智能助手,用中文回答用户问题。"""),
("user", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad")
])
tools = [inventory_check]
# 创建智能体实例
agent = create_tool_calling_agent(
llm=ChatOllama(model="qwen2.5"),
tools=tools,
prompt=prompt
)
# 配置执行器(增强错误处理)
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=True, # 开启详细日志模式
handle_parsing_errors=True, # 自动处理解析错误
max_iterations=3 # 限制最大迭代次数
)
# 测试用例(实际生产环境应移除)
result = agent_executor.invoke({
"input": "请查询WH02仓库IPHONE15-PRO-256G的库存"
})
print("执行结果:", result)
测试桩:
from fastapi import FastAPI, HTTPException, Header, Depends, Query
app = FastAPI(title="Inventory API")
# 依赖项验证API Key
async def verify_api_key(authorization: str = Header(None)):
if not authorization:
raise HTTPException(status_code=401, detail="Missing Authorization header")
try:
scheme, token = authorization.split()
if scheme.lower() != "bearer":
raise ValueError
except ValueError:
raise HTTPException(status_code=401, detail="Invalid Authorization format")
# if token != os.getenv("API_KEY"):
# raise HTTPException(status_code=403, detail="Invalid API Key")
return True
@app.get("/inventory")
async def check_inventory(
sku: str = Query(..., min_length=3, examples="ABC123"),
warehouse: str = Query(..., min_length=2, examples="WH01"),
_: bool = Depends(verify_api_key)
):
"""
查询实时库存接口
参数:
- sku: 商品SKU编号(至少3个字符)
- warehouse: 仓库代码(至少2个字符)
返回:
- JSON包含sku、仓库和库存量
"""
return {
"sku": sku,
"warehouse": warehouse,
"stock": 10
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
输出为:
> Entering new AgentExecutor chain...
Invoking: `inventory_check` with `{'sku': 'IPHONE15-PRO-256G', 'warehouse': 'WH02'}`
10当前上海仓库(WH02)中,IPHONE15-PRO-256G的库存数量为10台。
> Finished chain.
执行结果: {'input': '请查询WH02仓库IPHONE15-PRO-256G的库存', 'output': '当前上海仓库(WH02)中,IPHONE15-PRO-256G的库存数量为10台。'}
2.2 数据库工具:SQL安全沙箱
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_community.tools import Tool
from langchain_ollama import ChatOllama
# 创建数据库连接(使用 PyMySQL 驱动)
db = SQLDatabase.from_uri(
"mysql+pymysql://user:password@localhost:3306/crm",
include_tables=["customers", "orders"], # 允许访问的表
sample_rows_in_table_info=2, # 表结构采样行数
max_string_length=500, # 字符串截断长度
view_support=True # 支持视图查询
)
llm = ChatOllama(model="qwen2.5")
# 创建增强版工具包
toolkit = SQLDatabaseToolkit(
db=db, llm=llm,
)
from langchain import hub
from langgraph.prebuilt import create_react_agent
prompt_template = hub.pull("langchain-ai/sql-agent-system-prompt")
system_message = prompt_template.format(dialect="SQLite", top_k=5)
agent_executor = create_react_agent(
llm, toolkit.get_tools(), state_modifier=system_message
)
# 场景1:客户订单查询
result = agent_executor.invoke({"input": "ID为1的客户最近3个月订单金额总和是多少?"})
print(result)
print("---------")
# 场景2:数据校验
schema = agent_executor.invoke({"input": "请列出orders表所有字段及注释"})
print(schema)
输出为:
{'messages': [AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:01.6559609Z', 'done': True, 'done_reason': 'stop', 'total_duration': 125961962200, 'load_duration': 59620066500, 'prompt_eval_count': 702, 'prompt_eval_duration': 61369866500, 'eval_count': 18, 'eval_duration': 4970551200, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-eea1954a-21d7-4230-a810-fd9666d6be05-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': 'cb2548bd-5dbf-42a2-b316-54c993acd62d', 'type': 'tool_call'}], usage_metadata={'input_tokens': 702, 'output_tokens': 18, 'total_tokens': 720}), ToolMessage(content='customers, orders', name='sql_db_list_tables', id='6a7a629b-f851-4569-8769-756d0fcc33e3', tool_call_id='cb2548bd-5dbf-42a2-b316-54c993acd62d'), AIMessage(content=' RonaldoDB has two tables: customers and orders. Let\'s explore the schema of these tables.\n<tool_call>\n{"name": "sql_db_schema", "arguments": "customers, orders"}\n</tool_call>', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:16.300491Z', 'done': True, 'done_reason': 'stop', 'total_duration': 14482071200, 'load_duration': 1504567200, 'prompt_eval_count': 741, 'prompt_eval_duration': 2029415700, 'eval_count': 39, 'eval_duration': 10810406500, 'message': Message(role='assistant', content=' RonaldoDB has two tables: customers and orders. Let\'s explore the schema of these tables.\n<tool_call>\n{"name": "sql_db_schema", "arguments": "customers, orders"}\n</tool_call>', images=None, tool_calls=None)}, id='run-2e5facc5-f598-45e2-884e-602472198de5-0', usage_metadata={'input_tokens': 741, 'output_tokens': 39, 'total_tokens': 780})]}
---------
{'messages': [AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:21.827269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 5522579100, 'load_duration': 20329500, 'prompt_eval_count': 702, 'prompt_eval_duration': 302476500, 'eval_count': 18, 'eval_duration': 5199773100, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-0e74210c-e0f0-4a53-a4fe-fa54d600d5e4-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': '2cbdb3b5-f6e9-40c7-9a6c-51b23971649b', 'type': 'tool_call'}], usage_metadata={'input_tokens': 702, 'output_tokens': 18, 'total_tokens': 720}), ToolMessage(content='customers, orders', name='sql_db_list_tables', id='bf19148a-9211-438f-90d1-39b8e4971a7a', tool_call_id='2cbdb3b5-f6e9-40c7-9a6c-51b23971649b'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:49:31.49173Z', 'done': True, 'done_reason': 'stop', 'total_duration': 9659962700, 'load_duration': 24766500, 'prompt_eval_count': 741, 'prompt_eval_duration': 2361401100, 'eval_count': 24, 'eval_duration': 7262794300, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-6e538d8a-9863-44c8-af43-074c9f3d2099-0', tool_calls=[{'name': 'sql_db_schema', 'args': {'table_names': 'customers,orders'}, 'id': '33e20b40-ec35-4e5a-8d34-e9df191d59f2', 'type': 'tool_call'}], usage_metadata={'input_tokens': 741, 'output_tokens': 24, 'total_tokens': 765}), ToolMessage(content="\nCREATE TABLE customers (\n\tid INTEGER NOT NULL AUTO_INCREMENT, \n\tname VARCHAR(255), \n\tcreate_time DATETIME, \n\tPRIMARY KEY (id)\n)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci COMMENT='客户表' DEFAULT CHARSET=utf8mb4\n\n/*\n2 rows from customers table:\nid\tname\tcreate_time\n1\t李雷\tNone\n2\t韩梅梅\tNone\n*/\n\n\nCREATE TABLE orders (\n\tid INTEGER NOT NULL COMMENT 'id' AUTO_INCREMENT, \n\tcustomer_id INTEGER COMMENT '客户id', \n\tfee DECIMAL(10, 2) COMMENT '金额', \n\tcreate_time DATETIME, \n\tPRIMARY KEY (id)\n)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci COMMENT='订单表' DEFAULT CHARSET=utf8mb4\n\n/*\n2 rows from orders table:\nid\tcustomer_id\tfee\tcreate_time\n1\t1\t11.20\t2024-11-02 12:00:00\n2\t1\t20.00\t2025-01-05 15:00:00\n*/", name='sql_db_schema', id='b5369f98-de95-4960-8c51-e9721ae1da0b', tool_call_id='33e20b40-ec35-4e5a-8d34-e9df191d59f2'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:50:56.2460969Z', 'done': True, 'done_reason': 'stop', 'total_duration': 84640614900, 'load_duration': 56681300, 'prompt_eval_count': 1030, 'prompt_eval_duration': 23995596900, 'eval_count': 206, 'eval_duration': 60558135500, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-9b11eb06-5588-408e-99d9-3b44ab5de3b6-0', tool_calls=[{'name': 'sql_db_query_checker', 'args': {'query': 'SELECT c.name AS customer_name, o.fee AS order_fee FROM customers c JOIN orders o ON c.id = o.customer_id WHERE o.fee > 15.00 LIMIT 5'}, 'id': '308f8f47-b1fd-4b33-a128-56a39e8e2714', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1030, 'output_tokens': 206, 'total_tokens': 1236}), ToolMessage(content="The provided SQL query does not contain any of the common mistakes you mentioned:\n\n- It does not use `NOT IN` with NULL values.\n- It does not use `UNION` when `UNION ALL` should have been used.\n- It uses `>` which is correct for an exclusive range.\n- There are no data type mismatches in the predicates.\n- The identifiers (table and column names) are not quoted, but this is typically fine unless there are reserved words or special characters in the names.\n- There are no function calls with incorrect arguments.\n- It uses the proper columns for joins.\n\nTherefore, the query can be used as-is. Here it is reproduced:\n\n```sql\nSELECT c.name AS customer_name, o.fee AS order_fee \nFROM customers c \nJOIN orders o ON c.id = o.customer_id \nWHERE o.fee > 15.00 \nLIMIT 5;\n```\n\nIf you need to quote the identifiers (which is generally a good practice), here's how it would look:\n\n```sql\nSELECT `c.name` AS customer_name, `o.fee` AS order_fee \nFROM `customers` c \nJOIN `orders` o ON c.id = o.customer_id \nWHERE o.fee > 15.00 \nLIMIT 5;\n```\n\nHowever, the first query is correct and generally fine to use without quotes unless your database setup requires them.", name='sql_db_query_checker', id='d718c11d-12b7-4499-b0c3-de14a890dc98', tool_call_id='308f8f47-b1fd-4b33-a128-56a39e8e2714'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:55:31.7370847Z', 'done': True, 'done_reason': 'stop', 'total_duration': 174669424300, 'load_duration': 19545600, 'prompt_eval_count': 1400, 'prompt_eval_duration': 124359644500, 'eval_count': 154, 'eval_duration': 50253279400, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-f7aa14de-0e39-4420-833c-87080cf6dda0-0', tool_calls=[{'name': 'sql_db_query', 'args': {'query': 'SELECT c.name AS customer_name, o.fee AS order_fee FROM customers c JOIN orders o ON c.id = o.customer_id WHERE o.fee > 15.00 LIMIT 5'}, 'id': 'a2bb2bd1-7109-4ae5-81ab-41a7e1eeb4bc', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1400, 'output_tokens': 154, 'total_tokens': 1554}), ToolMessage(content="[('李雷', Decimal('20.00')), ('李雷', Decimal('21.00'))]", name='sql_db_query', id='460e6ff4-0be5-48a0-b9ae-d8a5a2a41d02', tool_call_id='a2bb2bd1-7109-4ae5-81ab-41a7e1eeb4bc'), AIMessage(content='The query returned the following results:\n\n- Customer: 李雷, Order Fee: 20.00\n- Customer: 李雷, Order Fee: 21.00\n\nBased on this data, here are the customers and their orders that have a fee greater than 15.00:\n\n1. **Customer:** 李雷, **Order Fee:** 20.00\n2. **Customer:** 李雷, **Order Fee:** 21.00\n\nThese results indicate that only customer "李雷" has placed orders with fees greater than 15.00, with the highest fee being 21.00.', additional_kwargs={}, response_metadata={'model': 'qwen2.5', 'created_at': '2025-04-08T02:56:25.9152145Z', 'done': True, 'done_reason': 'stop', 'total_duration': 54142390300, 'load_duration': 19204800, 'prompt_eval_count': 1507, 'prompt_eval_duration': 9807790900, 'eval_count': 143, 'eval_duration': 44258881800, 'message': Message(role='assistant', content='The query returned the following results:\n\n- Customer: 李雷, Order Fee: 20.00\n- Customer: 李雷, Order Fee: 21.00\n\nBased on this data, here are the customers and their orders that have a fee greater than 15.00:\n\n1. **Customer:** 李雷, **Order Fee:** 20.00\n2. **Customer:** 李雷, **Order Fee:** 21.00\n\nThese results indicate that only customer "李雷" has placed orders with fees greater than 15.00, with the highest fee being 21.00.', images=None, tool_calls=None)}, id='run-53ca8cba-f749-462d-9f69-656ebbab8450-0', usage_metadata={'input_tokens': 1507, 'output_tokens': 143, 'total_tokens': 1650})]}
#结果不理想,使用专业SQL的大模型效果会更好
2.3 自定义工具:Python函数封装
from pydantic import BaseModel, Field
from langchain.tools import tool
# 假设的运费计算参数(实际项目中可能从数据库读取)
base_rate = {"A": 50.0, "B": 40.0, "C": 30.0}
price_per_kg = {"A": 10.0, "B": 8.0, "C": 6.0}
# 内部函数示例实现
def get_shipping_zip_zone(destination: str) -> str:
"""根据邮编前三位判断区域(示例逻辑)"""
prefix = destination[:3]
if prefix < "200":
return "A"
elif prefix < "400":
return "B"
else:
return "C"
# 输入参数模型
class ShippingInput(BaseModel):
weight: float = Field(..., gt=0, description="包裹重量(千克)")
destination: str = Field(..., pattern="^[0-9]{6}$", description="6位数字目的地邮编")
# 注册为 Langchain 工具
@tool(args_schema=ShippingInput, return_direct=True)
def shipping_calculator(weight: float, destination: str) -> str:
"""运费计算工具:根据重量和邮编计算精确运费"""
zone = get_shipping_zip_zone(destination)
total = base_rate[zone] + weight * price_per_kg[zone]
return f"目的地 {destination}(区域 {zone})运费:¥{total:.2f}"
print(shipping_calculator.run({"weight": 2.5, "destination": "123456"}))
# 输出:目的地 123456(区域 A)运费:¥75.00
2.4 插件市场:Tavily Search集成
from langchain_community.tools import TavilySearchResults
# 设置Tavily API密钥(实际开发建议使用环境变量管理密钥)
os.environ["TAVILY_API_KEY"] = "tvly-dev-xxxxxx"
def search(query: str) -> str:
"""
实时信息搜索工具
参数:
query (str): 搜索关键词
返回:
str: 搜索结果摘要
"""
print("查询关键词为:" + query) # 调试日志:显示搜索关键词
tool = TavilySearchResults(
max_results=5,
search_depth="advanced",
include_answer=True,
)
response = tool.invoke(query)
return response
#完整例子参考第8篇文章
三、企业级实战:智能采购系统
3.1 安全架构设计
3.2 性能优化技巧
-
批量执行:对多个SKU库存查询启用并行处理
-
缓存策略:高频数据设置TTL缓存
-
流量控制:限制每分钟API调用次数
四、避坑指南:生产环境七大陷阱
-
权限逃逸:未验证用户上下文中的角色声明
-
SQL注入:即使使用工具包也要参数化查询
-
敏感信息泄露:错误日志打印完整响应体
-
版本锁定:插件市场工具需固定版本号
-
僵尸工具:定期清理长期未使用的工具
-
超时失控:设置全局超时(建议API工具<5s)
-
监控缺失:未记录工具调用指标
下期预告
《输出解析(Output Parsers):从自由文本到结构化数据》
-
揭秘:如何让大模型的"自由发挥"变成规整数据?
-
实战:合同关键信息自动抽取系统
-
陷阱:解析失败导致的业务流程中断
工具系统是大模型落地应用的最后一公里。记住:强大的工具需要配以严谨的安全设计,方能释放AI的真正潜力!