当前位置: 首页 > news >正文

华为云Flexus+DeepSeek征文 | ​​华为云ModelArts Studio大模型与企业AI会议纪要场景的对接方案

一、方案架构与核心代码

  1. 基础环境配置

华为云ModelArts SDK初始化

from modelarts.session import Session
from modelarts.estimator import Estimatorsession = Session(access_key='your_access_key',secret_key='your_secret_key',project_id='your_project_id',region='cn-north-4')# 创建Notebook开发环境
estimator = Estimator(modelarts_session=session,train_instance_type='ml.p3.large',train_instance_count=1,framework_type='PyTorch-1.8',framework_version='py3',log_url='obs://your-bucket/logs/'
)
  1. 会议音频处理模块
import numpy as np
from huaweicloud_sis.client.rasr_client import RasrClient
from huaweicloud_sis.bean.rasr_request import RasrRequest# 华为云语音识别服务初始化
def init_speech_client():ak = 'your_ak'sk = 'your_sk'region = 'cn-north-4'project_id = 'your_project_id'return RasrClient(ak, sk, region, project_id)# 音频转文字处理
def audio_to_text(audio_path):client = init_speech_client()request = RasrRequest()request.set_audio_format('wav')  # 支持wav, mp3等格式request.set_property('speaker_diarization', 'true')  # 开启说话人分离request.add_word('公司术语')  # 添加自定义词汇with open(audio_path, 'rb') as f:audio_data = f.read()request.set_data(audio_data)result = client.short_audio_recognize(request)return result.get_result()# 示例使用
transcript = audio_to_text('meeting_audio.wav')

二、大模型集成代码

  1. Flexus+DeepSeek模型加载
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM# 加载华为云ModelArts上的预训练模型
model_path = 'obs://your-bucket/models/flexus-deepseek-meeting/'tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="auto",torch_dtype=torch.float16,trust_remote_code=True
)# 会议纪要生成prompt模板
MEETING_PROMPT = """
作为专业的会议纪要助手,请根据以下会议转录内容生成结构化会议纪要:会议主题: {meeting_topic}
参会人员: {participants}
会议时间: {meeting_time}
转录内容:
{transcript}请按照以下格式输出:
### 会议摘要
- 主要讨论点1
- 主要讨论点2### 决策事项
1. 事项描述 (负责人: xxx, 截止时间: yyyy-mm-dd)### 待办事项
- [ ] 任务1 (负责人: xxx)
- [ ] 任务2 (负责人: xxx)
"""
  1. 会议纪要生成核心逻辑
def generate_meeting_minutes(transcript, meeting_info):# 预处理输入inputs = MEETING_PROMPT.format(meeting_topic=meeting_info['topic'],participants=", ".join(meeting_info['participants']),meeting_time=meeting_info['time'],transcript=transcript)# 模型推理input_ids = tokenizer.encode(inputs, return_tensors="pt").to('cuda')outputs = model.generate(input_ids,max_length=2048,temperature=0.7,top_p=0.9,do_sample=True,num_return_sequences=1)# 后处理generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)return post_process_output(generated_text)def post_process_output(text):# 提取结构化结果import repattern = r'### (.*?)\n(.*?)(?=###|$)'sections = re.findall(pattern, text, re.DOTALL)result = {'summary': [],'decisions': [],'action_items': []}for section in sections:title, content = sectionif '摘要' in title:result['summary'] = [item.strip() for item in content.split('-')[1:]]elif '决策' in title:result['decisions'] = parse_decisions(content)elif '待办' in title:result['action_items'] = parse_action_items(content)return resultdef parse_decisions(content):# 解析决策事项decisions = []for line in content.split('\n'):if not line.strip():continuematch = re.match(r'\d+\. (.*?) \(负责人: (.*?), 截止时间: (.*?)\)', line)if match:desc, owner, deadline = match.groups()decisions.append({'description': desc,'owner': owner,'deadline': deadline})return decisions

三、企业系统对接代码

  1. 与华为云会议服务集成
from huaweicloudsdkcore.auth.credentials import BasicCredentials
from huaweicloudsdkmeeting.v1 import *# 初始化会议服务客户端
def init_meeting_client():credentials = BasicCredentials('your_ak','your_sk','your_project_id')return MeetingClient.new_builder() \.with_credentials(credentials) \.with_region(MeetingRegion.value_of('cn-north-4')) \.build()# 获取会议录制文件
def get_meeting_recordings(meeting_id):client = init_meeting_client()request = ListRecordingsRequest()request.conference_id = meeting_idresponse = client.list_recordings(request)return response.recordings# 下载会议录音
def download_recording(recording_id, save_path):client = init_meeting_client()request = DownloadRecordingRequest()request.recording_id = recording_idresponse = client.download_recording(request, stream=True)with open(save_path, 'wb') as f:for chunk in response.iter_content(chunk_size=8192):f.write(chunk)
  1. 与企业知识库对接
    import requests
    from datetime import datetime

将会议纪要存入企业知识库

def save_to_knowledge_base(meeting_minutes, meeting_info):
payload = {
“document_type”: “meeting_minutes”,
“title”: f"{meeting_info[‘topic’]}会议纪要",
“content”: meeting_minutes,
“metadata”: {
“participants”: meeting_info[‘participants’],
“meeting_time”: meeting_info[‘time’],
“created_at”: datetime.now().isoformat(),
“related_projects”: meeting_info.get(‘projects’, [])
},
“tags”: [“auto-generated”, “meeting”]
}

headers = {"Content-Type": "application/json","Authorization": "Bearer your_kb_token"
}response = requests.post("https://your-kb-api/api/v1/documents",json=payload,headers=headers
)
return response.json()

四、完整工作流示例

主处理流程

def process_meeting(meeting_id):
# 1. 从会议服务获取录音
recordings = get_meeting_recordings(meeting_id)
if not recordings:
raise Exception(“未找到会议录音”)

# 2. 下载最新录音
latest_recording = sorted(recordings, key=lambda x: x.create_time)[-1]
audio_path = f"/tmp/{meeting_id}.wav"
download_recording(latest_recording.id, audio_path)# 3. 语音转文字
transcript = audio_to_text(audio_path)# 4. 获取会议元数据
meeting_info = get_meeting_info(meeting_id)  # 实现略# 5. 生成会议纪要
minutes = generate_meeting_minutes(transcript, meeting_info)# 6. 保存到知识库
save_result = save_to_knowledge_base(minutes, meeting_info)# 7. 通知相关人员
notify_participants(meeting_info['participants'], save_result['url'])return {'status': 'success','minutes_url': save_result['url']
}

示例调用

if name == “main”:
result = process_meeting(“meeting123”)
print(f"会议纪要处理完成,访问地址: {result[‘minutes_url’]}")
五、模型训练与优化代码

  1. 领域适配微调
    from modelarts.train import TrainingJob

创建微调训练任务

def fine_tune_model(train_data_path):
job = TrainingJob(
name=‘flexus-deepseek-meeting-ft’,
algorithm=‘PyTorch-1.8’,
inputs=[
{
‘data_url’: train_data_path,
‘type’: ‘obs’
}
],
outputs=[
{
‘train_url’: ‘obs://your-bucket/output/’,
‘type’: ‘obs’
}
],
parameters={
‘learning_rate’: 5e-5,
‘epochs’: 3,
‘batch_size’: 8,
‘max_seq_length’: 2048
},
code_dir=‘obs://your-bucket/code/’,
boot_file=‘train.py’,
instance_type=‘ml.p3.8xlarge’,
instance_count=2
)

job.create()
job.run()
return job
  1. 评估脚本示例

train.py

import torch
from transformers import Trainer, TrainingArguments

自定义评估指标

def compute_metrics(eval_pred):
predictions, labels = eval_pred
decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

# 计算关键信息提取准确率
key_info_acc = calculate_key_info_accuracy(decoded_preds, decoded_labels)# 计算格式合规率
format_score = calculate_format_score(decoded_preds)return {'key_info_accuracy': key_info_acc,'format_score': format_score
}

训练配置

training_args = TrainingArguments(
output_dir=‘./results’,
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
evaluation_strategy=“epoch”,
save_strategy=“epoch”,
logging_dir=‘./logs’,
logging_steps=50,
learning_rate=5e-5,
fp16=True,
report_to=“none”
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics
)

trainer.train()
六、部署与API服务

  1. ModelArts在线服务部署
    from modelarts.deploy import Predictor

创建在线推理服务

def deploy_model(model_path):
predictor = Predictor(
name=‘meeting-minutes-service’,
model_path=model_path,
inference_spec=‘inference.py’,
instance_type=‘ml.p2.large’,
instance_count=1,
framework=‘PyTorch-1.8’,
framework_version=‘py3’,
wait=True
)
predictor.create()
return predictor

inference.py示例

from flask import Flask, request, jsonify

app = Flask(name)

@app.route(‘/predict’, methods=[‘POST’])
def predict():
data = request.json
transcript = data[‘transcript’]
meeting_info = data[‘meeting_info’]

minutes = generate_meeting_minutes(transcript, meeting_info)
return jsonify(minutes)

if name == ‘main’:
app.run(host=‘0.0.0.0’, port=8080)
2. API调用示例
import requests

def call_meeting_minutes_api(transcript, meeting_info):
url = “https://your-endpoint/predict”
headers = {
“Content-Type”: “application/json”,
“X-Auth-Token”: “your_api_token”
}
payload = {
“transcript”: transcript,
“meeting_info”: meeting_info
}

response = requests.post(url, json=payload, headers=headers)
return response.json()

使用示例

result = call_meeting_minutes_api(transcript, {
“topic”: “Q3产品规划会议”,
“participants”: [“张三”, “李四”, “王五”],
“time”: “2023-09-15 14:00”
})
七、安全与权限控制

华为云IAM权限验证装饰器

def iam_required(permission):
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
token = request.headers.get(‘X-Auth-Token’)
if not verify_iam_token(token, permission):
return jsonify({“error”: “Unauthorized”}), 403
return f(*args, **kwargs)
return decorated_function
return decorator

数据加密处理

from huaweicloudsdkcore.auth.encryption_signer import EncryptionSigner

def encrypt_sensitive_data(data):
signer = EncryptionSigner(‘your_encryption_key’)
return signer.encrypt(data)

使用示例

@iam_required(‘MeetingMinutes.Write’)
def save_minutes():
data = request.json
encrypted_data = encrypt_sensitive_data(data[‘content’])
# 存储处理…
本方案提供了从音频处理、大模型集成到企业系统对接的完整代码实现,开发者可根据实际需求调整参数和流程。建议在实际部署前进行充分的测试和性能优化,特别是针对企业特定的会议场景和术语进行模型微调。

http://www.dtcms.com/a/263565.html

相关文章:

  • 数据库事务全面指南:概念、语法、机制与最佳实践
  • C++ 快速回顾(五)
  • 【冷知识】Spring Boot 配置文件外置
  • SpringBoot -- 自动配置原理
  • Bessel位势方程求解步骤
  • STL简介+string模拟实现
  • 「Java案例」计算矩形面积
  • 大数据(3)-Hive
  • 【算法】动态规划:1137. 第 N 个泰波那契数
  • 初等变换 线性代数
  • C++ STL之string类
  • Windows11系统中安装docker并配置docker镜像到pycharm中
  • EA自动交易完全指南:从策略设计到实盘部署
  • SpringBoot 启动入口深度解析:main方法执行全流程
  • Android Telephony 网络状态中的 NAS 信息
  • 反射,枚举和lambda表达式
  • 《垒球百科》老年俱乐部有哪些项目·垒球1号位
  • 从零到一通过Web技术开发一个五子棋
  • 【MySQL基础】MySQL索引全面解析:从原理到实践
  • 人形机器人_双足行走动力学:MIT机器人跌落自恢复算法及应用
  • 使用Verilog设计模块输出中位数,尽可能较少资源使用
  • 本周股指想法
  • 产品背景知识——API、SDK、Library、Framework、Protocol
  • 10.【C语言学习笔记】指针(二)
  • Python 数据分析与机器学习入门 (八):用 Scikit-Learn 跑通第一个机器学习模型
  • stm32达到什么程度叫精通?
  • 百度文心大模型4.5系列正式开源,开源会给百度带来什么?
  • docker-compose部署Nacos、Seata、MySQL
  • API接口安全-1:身份认证之传统Token VS JWT
  • 【甲方安全建设】SDL基线建设及审计评估