建设公司网站哪家好人工智能培训机构排名
代码来自up主【强化学习的数学原理-作业】GridWorld示例代码(已更新至DQN、REINFORCE、A2C)_哔哩哔哩_bilibili
SGD、GD、MGD举例:
# 先初始化一个列表,未来要在这100个样本里面再sample出来
np.random.seed(0)
X = np.linspace(-10, 10, 1000)
Y = 2 * X ** 2 + 3*X +5 # 用作真实值#定义二次函数,找到一组参数a、b、c使得损失函数的值最小
def quadratic_function(X, a, b, c):return a * X ** 2 + b * X + c#定义损失函数
def loss_function(Y_pred, Y):return np.mean((Y_pred - Y)**2)def train(learning_rate, batch_size, note):a = np.random.randn() b = np.random.randn()c = np.random.randn()loss = 1000cnt = 0results = np.array([0])while loss > 0.01:cnt += 1batch = np.random.randint(0,1000,size=(1,batch_size)) # 大小为1 * batch_sizex = X[batch]y = Y[batch]if cnt < 2:print(batch)print(x)y_pred = quadratic_function(x,a,b,c)loss = loss_function(y_pred,y)results = np.append(results,loss)# 这些是计算得到的梯度,是最小化损失函数,通过损失函数对a、b、c分别求导grad_a = (2 * (y_pred - y) * x ** 2).mean()grad_b = (2 * (y_pred - y) * x).mean()grad_c = (2 * (y_pred - y)).mean()a -= learning_rate * grad_ab -= learning_rate * grad_bc -= learning_rate * grad_c# 检验误差valid_batch = np.random.randint(0,1000,size=(1,5))x = X[valid_batch]y = Y[valid_batch]y_pred = quadratic_function(x,a,b,c)loss = loss_function(y_pred,y)# results = np.append(results,loss)print("最终系数为:",a,b,c)print("最后迭代次数:",cnt)y_pred = quadratic_function(X,a,b,c)plt.figure(figsize=(8,3))# plt.plot(X,y_pred,label="predict")plt.plot(X,Y,label="target")plt.plot(X,y_pred,label="predict")plt.title(note)plt.legend()plt.show()# print(a,b,c)plt.figure(figsize=(8,3))plt.plot(results[:150],label='x')# plt.plot(results[:,1],label='y')# plt.yticks(np.arange(-5,5,1))plt.legend()plt.title(note)plt.show()