当前位置：首页 > news >正文

强化学习的数学原理-六、随机近似与随机梯度下降

news 2025/10/14 22:08:25

代码来自up主【强化学习的数学原理-作业】GridWorld示例代码（已更新至DQN、REINFORCE、A2C）_哔哩哔哩_bilibili

SGD、GD、MGD举例：

# 先初始化一个列表，未来要在这100个样本里面再sample出来
np.random.seed(0)
X = np.linspace(-10, 10, 1000)
Y = 2 * X ** 2 + 3*X +5 # 用作真实值

#定义二次函数，找到一组参数a、b、c使得损失函数的值最小
def quadratic_function(X, a, b, c):
    return a * X ** 2 + b * X + c

#定义损失函数
def loss_function(Y_pred, Y):
    return np.mean((Y_pred - Y)**2)

def train(learning_rate, batch_size, note):
    a = np.random.randn() 
    b = np.random.randn()
    c = np.random.randn()
    loss = 1000
    cnt = 0
    results = np.array([0])
    while loss > 0.01:
        cnt += 1
        batch = np.random.randint(0,1000,size=(1,batch_size)) # 大小为1 * batch_size

        x = X[batch]
        y = Y[batch]
        if cnt < 2:
            print(batch)
            print(x)
        y_pred = quadratic_function(x,a,b,c)

        loss = loss_function(y_pred,y)
        results = np.append(results,loss)
        # 这些是计算得到的梯度，是最小化损失函数，通过损失函数对a、b、c分别求导
        grad_a = (2 * (y_pred - y) * x ** 2).mean()
        grad_b = (2 * (y_pred - y) * x).mean()
        grad_c = (2 * (y_pred - y)).mean()

        a -= learning_rate * grad_a
        b -= learning_rate * grad_b
        c -= learning_rate * grad_c

        # 检验误差
        valid_batch = np.random.randint(0,1000,size=(1,5))
        x = X[valid_batch]
        y = Y[valid_batch]
        y_pred = quadratic_function(x,a,b,c)
        loss = loss_function(y_pred,y)
        
        # results = np.append(results,loss)
    
    print("最终系数为：",a,b,c)
    print("最后迭代次数：",cnt)
    y_pred = quadratic_function(X,a,b,c)
    plt.figure(figsize=(8,3))
    # plt.plot(X,y_pred,label="predict")
    plt.plot(X,Y,label="target")
    plt.plot(X,y_pred,label="predict")
    plt.title(note)
    plt.legend()
    plt.show()
    # print(a,b,c)

    plt.figure(figsize=(8,3))
    plt.plot(results[:150],label='x')
    # plt.plot(results[:,1],label='y')
    # plt.yticks(np.arange(-5,5,1))
    plt.legend()
    plt.title(note)
    plt.show()

查看全文

http://www.dtcms.com/a/31561.html