当前位置: 首页 > news >正文

36.优化方法

1.梯度下降算法

%matplotlib inline
import numpy as np
import torch
from d2l import torch as d2l
##########################################################################
#绘图函数
def show_trace(results, f):n = max(abs(min(results)), abs(max(results)))f_line = torch.arange(-n, n, 0.01)d2l.set_figsize()d2l.plot([f_line, results], [[f(x) for x in f_line], [f(x) for x in results]], 'x', 'f(x)', fmts=['-', '-o'])
# 目标函数
def f(x):  return x ** 2
# 目标函数的梯度(导数)
def f_grad(x):  return 2 * xdef gd(eta,f_grad):x=10.0results=[x]for i in range(10):x=x-eta*f_grad(x)results.append(float(x))print(f"epoch {i}:x={x:.5f}")return results
##########################################################################
results=gd(0.2,f_grad)
show_trace(results, f)
##########################################################################

2.随机梯度下降算法

%matplotlib inline
import numpy as np
import torch
from d2l import torch as d2ldef f(x1, x2):  # 目标函数return x1 ** 2 + 2 * x2 ** 2def f_grad(x1, x2):  # 目标函数的梯度return 2 * x1, 4 * x2def sgd(x1, x2, s1, s2, f_grad):g1,g2=f_grad(x1,x2)#模拟有噪声的梯度g1 += np.random.normal(0.0, 1, (1,)).item()g2 += np.random.normal(0.0, 1, (1,)).item()#这里eta_t要进行更新,或者说叫衰减eta_t = eta * lrreturn (x1 - eta_t * g1, x2 - eta_t * g2, 0, 0)
eta=0.1
lr=0.9
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))

3.小批量随机梯度下降算法

import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt
from d2l import torch as d2l
np.random.seed(0)
torch.manual_seed(0)
#定义个线性回归网络
class LinearRegressionModel(torch.nn.Module):def __init__(self):super(LinearRegressionModel, self).__init__()self.linear = torch.nn.Linear(1, 1)  def forward(self, x):return self.linear(x)
#数据集生成
#100个样本
X = torch.randn(100, 1) * 10 
y = 2 * X + 1 + torch.randn(100, 1) * 2  
# 设置超参数
batch_size = 16  
lr = 0.01  
num_epochs = 100 
model = LinearRegressionModel()
loss_fn = torch.nn.MSELoss()dataset = torch.utils.data.TensorDataset(X, y)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
#训练:
losses = []
for epoch in range(num_epochs):epoch_loss = 0.0for i, (batch_X, batch_y) in enumerate(data_loader):model.zero_grad()predictions = model(batch_X)loss = loss_fn(predictions, batch_y)loss.backward()#每一个batch都进行param的更新with torch.no_grad(): for param in model.parameters():param -= lr * param.gradepoch_loss += loss.item()avg_loss = epoch_loss / len(data_loader)losses.append(avg_loss)print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
#绘制对应图像
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
axs[0].plot(range(1, num_epochs + 1), losses, label='Loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')
axs[0].set_title('Training Loss')
axs[0].legend()
with torch.no_grad():predicted = model(X).numpy()axs[1].scatter(X.numpy(), y.numpy(), label='True Data')
axs[1].plot(X.numpy(), predicted, label='Fitted Line', color='r')
axs[1].set_xlabel('X')
axs[1].set_ylabel('y')
axs[1].legend()
axs[1].set_title('Model Prediction vs True Data')
plt.tight_layout() 
plt.show()

4.动量法

#动量法
%matplotlib inline
import torch
from d2l import torch as d2leta = 0.4
def f_2d(x1, x2):return 0.1 * x1 ** 2 + 2 * x2 ** 2
def momentum_2d(x1,x2,v1,v2):v1=beta*v1+0.2*x1v2=beta*v2+4*x2return x1-eta*v1,x2-eta*v2,v1,v2
eta, beta = 0.6, 0.5
d2l.show_trace_2d(f_2d, d2l.train_2d(momentum_2d))

5.Adam

%matplotlib inline
import torch
from d2l import torch as d2ldef init_adam_states(feature_dim):v_w, v_b = torch.zeros((feature_dim, 1)), torch.zeros(1)s_w, s_b = torch.zeros((feature_dim, 1)), torch.zeros(1)return ((v_w, s_w), (v_b, s_b))def adam(params, states, hyperparams):beta1, beta2, eps = 0.9, 0.999, 1e-6for p, (v, s) in zip(params, states):with torch.no_grad():#v_t=beta1*v_{t-1}+(1-beta1)*g_t#s_t=beta2*s_{t-1}+(1-beta2)*g_t^2v[:] = beta1 * v + (1 - beta1) * p.grads[:] = beta2 * s + (1 - beta2) * torch.square(p.grad)#v_hat_t=v_t/(1-beta1^t)#s_hat_t=s_t/(1-beta2^t)v_bias_corr = v / (1 - beta1 ** hyperparams['t'])s_bias_corr = s / (1 - beta2 ** hyperparams['t'])#g'_t=eta*v_hat_t/((sqrt(s_hat_t)+epsion)#x_t=x_t-1-g'_tp[:] -= hyperparams['lr'] * v_bias_corr / (torch.sqrt(s_bias_corr)+ eps)p.grad.data.zero_()hyperparams['t'] += 1data_iter, feature_dim = d2l.get_data_ch11(batch_size=10)
d2l.train_ch11(adam, init_adam_states(feature_dim),{'lr': 0.01, 't': 1}, data_iter, feature_dim)

http://www.dtcms.com/a/597447.html

相关文章:

  • 手写self-attention的三重境界
  • 功能安全/ASPICE合规保障:高效模型测试驱动零缺陷开发
  • k8s DaemonSet 控制器从原理到实践
  • 睢宁做网站公司WordPress同步某个表
  • Note:高电压工况下温度测量:挑战与应对策略全解析
  • PostgreSQL 实战分析:UPDATE 语句性能异常与缓存击穿诊断
  • java接口自动化之allure本地生成报告
  • 基于spring boot房屋租赁管理系统的设计与实现
  • Android中使用SQLCipher加密GreenDao数据库不成功
  • AI泡沫量化预警:基于多因子模型的1999年互联网泡沫历史回溯与风险映射
  • 网站建设多少钱一个平台wordpress 查看菜单
  • 网站导航设置婚恋网站建设教程
  • 黑马JAVAWeb - Maven高级-分模块设计与开发-继承-版本锁定-聚合-私服
  • 34.来自Transformers的双向编码器表示(BERT)
  • 风啸之上,科技为盾——VR台风避险体验
  • 免费个人网站域名外贸wordpress模板下载
  • 如何在PHP框架中高效处理HTTP请求:从基础到最佳实践!
  • 语义抽取逻辑概念
  • 【大数据技术06】大数据技术
  • 即刻搜索收录网站重庆网站建设推广优化
  • 高明骏域网站建设特定ip段访问网站代码
  • 数组有哪些算法?
  • PCB之电源完整性之电源网络的PDN仿真CST---07
  • 学校网站的页头图片做有没有专业做咖啡店设计的网站
  • Dify Docker Compose 安装指南
  • Spring Boot 2.x 集成 Knife4j (OpenAPI 3) 完整操作指南
  • 郑州企业网站模板建站中国建设银行大学助学贷款网站
  • 微信 网站模板网站毕业设计图怎么做
  • RTMP推流平台EasyDSS:视频推拉流技术赋能幼儿园安全可视化与家园共育新实践
  • iChat:RabbitMQ封装