当前位置：首页 > news >正文

机器学习三要素

news 2025/10/26 9:08:39

机器学习的三个核心要素是：模型、策略和算法。这三个要素构成了机器学习方法的基本框架。

1. 模型（Model）

模型定义了从输入到输出的映射关系，是机器学习的基础。

常见模型类型：

概率模型：条件概率分布 P(Y|X)
非概率模型：决策函数 Y = f(X)
线性模型：线性回归、逻辑回归
非线性模型：神经网络、决策树
生成模型：朴素贝叶斯、高斯混合模型
判别模型：SVM、条件随机场

# 示例：简单的线性模型
import numpy as npclass LinearModel:def __init__(self):self.weights = Noneself.bias = Nonedef forward(self, X):"""模型前向传播"""return np.dot(X, self.weights) + self.bias

2. 策略（Strategy）

策略定义了如何评价模型的好坏，即损失函数或风险函数的选择。

常见的损失函数：

回归问题

import numpy as np# 均方误差（MSE）
def mean_squared_error(y_true, y_pred):return np.mean((y_true - y_pred) ** 2)# 平均绝对误差（MAE）
def mean_absolute_error(y_true, y_pred):return np.mean(np.abs(y_true - y_pred))

分类问题

# 交叉熵损失
def cross_entropy_loss(y_true, y_pred):epsilon = 1e-15y_pred = np.clip(y_pred, epsilon, 1 - epsilon)return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))# 铰链损失（SVM）
def hinge_loss(y_true, y_pred):return np.mean(np.maximum(0, 1 - y_true * y_pred))

正则化策略

# L1 正则化（Lasso）
def l1_regularization(weights, lambda_val):return lambda_val * np.sum(np.abs(weights))# L2 正则化（Ridge）
def l2_regularization(weights, lambda_val):return lambda_val * np.sum(weights ** 2)# 弹性网络正则化
def elastic_net_regularization(weights, lambda_val, alpha=0.5):l1_term = alpha * np.sum(np.abs(weights))l2_term = (1 - alpha) * np.sum(weights ** 2)return lambda_val * (l1_term + l2_term)

3. 算法（Algorithm）

算法是求解最优模型的具体计算方法，即如何找到最小化损失函数的参数。

优化算法：

梯度下降法

class GradientDescent:def __init__(self, learning_rate=0.01, max_iters=1000):self.learning_rate = learning_rateself.max_iters = max_itersdef optimize(self, model, X, y, loss_function):"""批量梯度下降"""n_samples = X.shape[0]losses = []for i in range(self.max_iters):# 前向传播y_pred = model.forward(X)# 计算损失loss = loss_function(y, y_pred)losses.append(loss)# 计算梯度error = y_pred - ydw = (1/n_samples) * np.dot(X.T, error)db = (1/n_samples) * np.sum(error)# 更新参数model.weights -= self.learning_rate * dwmodel.bias -= self.learning_rate * dbif i % 100 == 0:print(f"Iteration {i}, Loss: {loss:.4f}")return losses

随机梯度下降（SGD）

class StochasticGradientDescent:def __init__(self, learning_rate=0.01, max_epochs=100):self.learning_rate = learning_rateself.max_epochs = max_epochsdef optimize(self, model, X, y, loss_function, batch_size=32):"""小批量随机梯度下降"""n_samples = X.shape[0]losses = []for epoch in range(self.max_epochs):# 打乱数据indices = np.random.permutation(n_samples)X_shuffled = X[indices]y_shuffled = y[indices]epoch_loss = 0for i in range(0, n_samples, batch_size):# 获取小批量数据X_batch = X_shuffled[i:i+batch_size]y_batch = y_shuffled[i:i+batch_size]# 前向传播y_pred = model.forward(X_batch)# 计算损失batch_loss = loss_function(y_batch, y_pred)epoch_loss += batch_loss# 计算梯度error = y_pred - y_batchbatch_size_actual = X_batch.shape[0]dw = (1/batch_size_actual) * np.dot(X_batch.T, error)db = (1/batch_size_actual) * np.sum(error)# 更新参数model.weights -= self.learning_rate * dwmodel.bias -= self.learning_rate * dbavg_loss = epoch_loss / (n_samples // batch_size)losses.append(avg_loss)if epoch % 10 == 0:print(f"Epoch {epoch}, Loss: {avg_loss:.4f}")return losses

完整示例：线性回归实现

import numpy as np
import matplotlib.pyplot as pltclass LinearRegression:def __init__(self):self.weights = Noneself.bias = Nonedef fit(self, X, y, learning_rate=0.01, epochs=1000):"""训练线性回归模型"""n_samples, n_features = X.shape# 初始化参数self.weights = np.zeros(n_features)self.bias = 0losses = []# 梯度下降for epoch in range(epochs):# 前向传播y_pred = self.predict(X)# 计算损失（MSE）loss = np.mean((y - y_pred) ** 2)losses.append(loss)# 计算梯度dw = -(2/n_samples) * np.dot(X.T, (y - y_pred))db = -(2/n_samples) * np.sum(y - y_pred)# 更新参数self.weights -= learning_rate * dwself.bias -= learning_rate * dbif epoch % 100 == 0:print(f"Epoch {epoch}, Loss: {loss:.4f}")return lossesdef predict(self, X):"""预测"""return np.dot(X, self.weights) + self.bias# 使用示例
if __name__ == "__main__":# 生成示例数据np.random.seed(42)X = np.random.randn(100, 1)y = 3 * X.squeeze() + 2 + np.random.randn(100) * 0.1# 训练模型model = LinearRegression()losses = model.fit(X, y)print(f"权重: {model.weights[0]:.4f}, 偏置: {model.bias:.4f}")# 绘制结果plt.figure(figsize=(12, 4))plt.subplot(1, 2, 1)plt.scatter(X, y, alpha=0.7)plt.plot(X, model.predict(X), color='red', linewidth=2)plt.title('线性回归拟合')plt.xlabel('X')plt.ylabel('y')plt.subplot(1, 2, 2)plt.plot(losses)plt.title('损失函数下降曲线')plt.xlabel('迭代次数')plt.ylabel('损失')plt.tight_layout()plt.show()