基于梯度下降、随机梯度下降和牛顿法的逻辑回归MATLAB实现
一、核心算法
1. 数据预处理与模型初始化
%% 数据准备
% 生成示例数据(二分类)
[X, y] = make_classification(n_samples=100, n_features=2, n_redundant=0, n_clusters_per_class=1);
X = [ones(size(X,1),1) X]; % 添加截距项%% 参数设置
learning_rate = 0.1; % 学习率
max_iter = 1000; % 最大迭代次数
tol = 1e-4; % 收敛阈值
2. Sigmoid函数定义
function g = sigmoid(z)g = 1.0 ./ (1.0 + exp(-z));
end
3. 损失函数与梯度计算
function [J, grad] = computeCost(X, y, theta)m = length(y);h = sigmoid(X * theta);J = (-1/m) * sum(y .* log(h) + (1-y) .* log(1-h));grad = (1/m) * X' * (h - y);
end
二、梯度下降法实现
function theta = gradientDescent(X, y, alpha, max_iter)[m, n] = size(X);theta = zeros(n,1);J_history = zeros(max_iter,1);for iter = 1:max_iter[J, grad] = computeCost(X, y, theta);theta = theta - alpha * grad;J_history(iter) = J;% 收敛判断if iter > 1 && abs(J_history(iter) - J_history(iter-1)) < tolbreak;endend
end
三、随机梯度下降法实现
function theta = stochasticGradientDescent(X, y, alpha, max_epoch)[m, n] = size(X);theta = zeros(n,1);J_history = zeros(max_epoch,1);for epoch = 1:max_epoch% 随机打乱样本顺序idx = randperm(m);X = X(idx,:);y = y(idx);for i = 1:mxi = X(i,:)';yi = y(i);[h, grad] = computeCost(X, y, theta);theta = theta - alpha * grad;end% 记录平均损失J_history(epoch) = computeCost(X, y, theta);end
end
四、牛顿法实现
function theta = newtonMethod(X, y, max_iter)[m, n] = size(X);theta = zeros(n,1);H = zeros(n,n);for iter = 1:max_iterh = sigmoid(X * theta);grad = (1/m) * X' * (h - y);% 计算Hessian矩阵(添加正则化项)S = diag(h .* (1-h));H = (1/m) * X' * S * X + 1e-4 * eye(n); % 正则化防止奇异% 参数更新theta = theta - H \ grad;% 收敛判断if iter > 1 && norm(grad) < tolbreak;endend
end
五、性能对比与可视化
%% 训练与比较
theta_gd = gradientDescent(X, y, 0.1, 1000);
theta_sgd = stochasticGradientDescent(X, y, 0.1, 1000);
theta_newton = newtonMethod(X, y, 100);%% 决策边界绘制
figure;
hold on;
scatter(X(y==1,2), X(y==1,3), 'r', 'filled');
scatter(X(y==0,2), X(y==0,3), 'b', 'filled');% 绘制各方法决策边界
x1 = linspace(min(X(:,2)), max(X(:,2)), 100);
x2_gd = (-theta_gd(1) - theta_gd(2)*x1) / theta_gd(3);
x2_sgd = (-theta_sgd(1) - theta_sgd(2)*x1) / theta_sgd(3);
x2_newton = (-theta_newton(1) - theta_newton(2)*x1) / theta_newton(3);plot(x1, x2_gd, 'g', 'LineWidth', 2);
plot(x1, x2_sgd, 'm', 'LineWidth', 2);
plot(x1, x2_newton, 'k', 'LineWidth', 2);
legend('Positive', 'Negative', 'GD', 'SGD', 'Newton');
title('不同优化方法的决策边界对比');
六、优化
- 学习率调整
- 动态衰减策略:
alpha = initial_alpha / (1 + decay_rate * iter)
- 自适应方法:结合AdaGrad或RMSProp
- 动态衰减策略:
- 正则化增强
- L2正则化:在损失函数中添加
lambda/2 * sum(theta(2:end).^2)
- L1正则化:使用次梯度法处理稀疏性
- L2正则化:在损失函数中添加
- 数值稳定性优化
- Sigmoid函数截断:
sigmoid(z) = max(min(z, 30), -30)
- Hessian矩阵正则化:添加小量对角项防止奇异
- Sigmoid函数截断:
参考代码 使用梯度下降法、随机梯度下降法和牛顿法实现的逻辑回归算法 www.youwenfan.com/contentcsi/59877.html
七、扩展应用示例
%% 多分类扩展(One-vs-All)
function models = oneVsAll(X, y, num_classes, method)models = cell(num_classes,1);for c = 1:num_classes% 二分类标签转换binary_y = (y == c);% 训练单个分类器switch methodcase 'gd'models{c} = gradientDescent(X, binary_y);case 'sgd'models{c} = stochasticGradientDescent(X, binary_y);case 'newton'models{c} = newtonMethod(X, binary_y);endend
end