当前位置: 首页 > wzjs >正文

淄博周村学校网站建设报价引流推广是什么意思

淄博周村学校网站建设报价,引流推广是什么意思,欢迎访问中国建设银行官方网站,在线客服系统注册目录 数据层面 数据增强 数据正则化 ​数据采样 模型结构层面 简化模型 添加正则化层 早停法(Early Stopping) 训练过程层面 使用交叉验证 使用集成学习 调整学习率 防止过拟合是机器学习中一个非常重要的问题,它可以帮助模型在新…

目录

数据层面

数据增强

数据正则化

​数据采样

模型结构层面

简化模型

添加正则化层

早停法(Early Stopping)

训练过程层面

使用交叉验证

使用集成学习

调整学习率


防止过拟合是机器学习中一个非常重要的问题,它可以帮助模型在新的数据上表现得更好。以下将从数据层面、模型结构层面和训练过程层面对防止过拟合的方法进行分类介绍

数据层面

数据增强

数据增强通过对训练数据进行变换(如旋转、缩放、裁剪等),增加数据的多样性,从而减少模型对训练数据的过拟合

## 数据层面
# 1. 数据增强import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.preprocessing.image import ImageDataGenerator# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 将图像数据转换为浮点数并归一化
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0# 将图像数据扩展为 4D 张量 (samples, height, width, channels)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)# 创建数据增强生成器
datagen = ImageDataGenerator(rotation_range=20,  # 随机旋转度数范围width_shift_range=0.1,  # 随机水平移动范围height_shift_range=0.1,  # 随机垂直移动范围shear_range=0.2,  # 剪切强度zoom_range=0.2,  # 随机缩放范围horizontal_flip=False,  # 不进行水平翻转(因为数字图像水平翻转可能没有意义)fill_mode='nearest'  # 填充新创建像素的方法
)# 选择一张图像进行增强
sample_image = x_train[0]  # 选择第一张图像
sample_image = np.expand_dims(sample_image, 0)  # 添加批次维度# 使用数据增强生成器生成增强后的图像
augmented_images = datagen.flow(sample_image, batch_size=1)# 可视化增强后的图像
plt.figure(figsize=(10, 6))
for i in range(10):  # 生成并显示 10 张增强后的图像augmented_image = next(augmented_images)[0]  # 获取一张增强后的图像plt.subplot(2, 5, i + 1)plt.imshow(augmented_image.squeeze(), cmap='gray')  # 显示灰度图像plt.axis('off')  # 关闭坐标轴
plt.show()

数据正则化

数据正则化通过对输入数据进行归一化或标准化,使数据的分布更加均匀,减少模型对数据的过拟合

# 2. 数据正则化import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 将图像数据转换为浮点数
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')# 数据归一化(将像素值缩放到 [0, 1])
x_train_normalized = x_train / 255.0
x_test_normalized = x_test / 255.0# 数据标准化(将数据缩放到均值为 0,标准差为 1)
scaler = StandardScaler()
x_train_reshaped = x_train.reshape(-1, 28 * 28)  # 将图像数据展平为二维数组
x_test_reshaped = x_test.reshape(-1, 28 * 28)x_train_standardized = scaler.fit_transform(x_train_reshaped)
x_test_standardized = scaler.transform(x_test_reshaped)# 可视化归一化和标准化的效果
def plot_images(images, title):plt.figure(figsize=(10, 2))for i in range(10):plt.subplot(1, 10, i + 1)plt.imshow(images[i], cmap='gray')plt.axis('off')plt.suptitle(title)plt.show()# 显示原始图像
plot_images(x_train[:10], "Original Images")# 显示归一化后的图像
plot_images(x_train_normalized[:10], "Normalized Images")# 显示标准化后的图像
plot_images(x_train_standardized[:10].reshape(-1, 28, 28), "Standardized Images")

 

数据采样

数据采样可以通过欠采样(减少多数类样本)或过采样(增加少数类样本)来平衡数据集,减少模型对多数类的过拟合

# 3. 数据采样import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 将图像数据转换为浮点数
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')# 模拟不平衡数据集(选择数字 0 和数字 1)
x_train_sampled = x_train[y_train < 2]
y_train_sampled = y_train[y_train < 2]# 过采样(SMOTE)
smote = SMOTE(random_state=42)
x_resampled, y_resampled = smote.fit_resample(x_train_sampled.reshape(-1, 28 * 28), y_train_sampled)# 欠采样(RandomUnderSampler)
undersampler = RandomUnderSampler(random_state=42)
x_undersampled, y_undersampled = undersampler.fit_resample(x_train_sampled.reshape(-1, 28 * 28), y_train_sampled)# 可视化过采样和欠采样的效果
def plot_sampled_images(images, labels, title):plt.figure(figsize=(10, 2))for i in range(10):plt.subplot(1, 10, i + 1)plt.imshow(images[i].reshape(28, 28), cmap='gray')plt.title(labels[i])plt.axis('off')plt.suptitle(title)plt.show()# 显示过采样后的图像
plot_sampled_images(x_resampled[:10], y_resampled[:10], "Over-sampled Images")# 显示欠采样后的图像
plot_sampled_images(x_undersampled[:10], y_undersampled[:10], "Under-sampled Images")

模型结构层面

简化模型

选择更简单的模型结构或减少模型的复杂度,可以有效减少过拟合。例如,减少神经网络的层数或神经元数量

## 模型结构层面# 1. 简化模型
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)# 构建简化模型
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(28 * 28,)))  # 较少的神经元
model.add(Dense(10, activation='softmax'))# 编译模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])# 训练模型
history_simple = model.fit(x_train, y_train,validation_split=0.2,epochs=50,batch_size=128
)# 可视化训练过程
def plot_training_history(history, title):plt.figure(figsize=(12, 4))# 绘制训练和验证的损失plt.subplot(1, 2, 1)plt.plot(history.history['loss'], label='Training Loss')plt.plot(history.history['val_loss'], label='Validation Loss')plt.title(f'Training and Validation Loss ({title})')plt.xlabel('Epochs')plt.ylabel('Loss')plt.legend()# 绘制训练和验证的准确率plt.subplot(1, 2, 2)plt.plot(history.history['accuracy'], label='Training Accuracy')plt.plot(history.history['val_accuracy'], label='Validation Accuracy')plt.title(f'Training and Validation Accuracy ({title})')plt.xlabel('Epochs')plt.ylabel('Accuracy')plt.legend()plt.tight_layout()plt.show()# 调用可视化函数
plot_training_history(history_simple, "Simple Model")

添加正则化层

在模型中添加正则化层(如 Dropout 或 L1/L2 正则化),可以减少模型对训练数据的依赖

# 2. 添加正则化层from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)# 构建带有正则化层的模型
model = Sequential()
model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.01), input_shape=(28 * 28,)))  # L2 正则化
model.add(Dropout(0.5))  # Dropout
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))# 编译模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])# 训练模型
history_regularized = model.fit(x_train, y_train,validation_split=0.2,epochs=50,batch_size=128
)# 可视化训练过程
def plot_training_history(history, title):plt.figure(figsize=(12, 4))# 绘制训练和验证的损失plt.subplot(1, 2, 1)plt.plot(history.history['loss'], label='Training Loss')plt.plot(history.history['val_loss'], label='Validation Loss')plt.title(f'Training and Validation Loss ({title})')plt.xlabel('Epochs')plt.ylabel('Loss')plt.legend()# 绘制训练和验证的准确率plt.subplot(1, 2, 2)plt.plot(history.history['accuracy'], label='Training Accuracy')plt.plot(history.history['val_accuracy'], label='Validation Accuracy')plt.title(f'Training and Validation Accuracy ({title})')plt.xlabel('Epochs')plt.ylabel('Accuracy')plt.legend()plt.tight_layout()plt.show()# 可视化训练过程
plot_training_history(history_regularized, "Regularized Model")

早停法(Early Stopping)

早停法通过在训练过程中监控验证集的损失,当验证集的损失不再下降时停止训练,从而避免过拟合

# 3. 早停法(Early Stopping)from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)# 构建模型
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(28 * 28,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))# 编译模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])# 设置早停法
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)# 训练模型
history_early_stopping = model.fit(x_train, y_train,validation_split=0.2,epochs=50,batch_size=128,callbacks=[early_stopping]
)# 可视化训练过程
def plot_training_history(history, title):plt.figure(figsize=(12, 4))# 绘制训练和验证的损失plt.subplot(1, 2, 1)plt.plot(history.history['loss'], label='Training Loss')plt.plot(history.history['val_loss'], label='Validation Loss')plt.title(f'Training and Validation Loss ({title})')plt.xlabel('Epochs')plt.ylabel('Loss')plt.legend()# 绘制训练和验证的准确率plt.subplot(1, 2, 2)plt.plot(history.history['accuracy'], label='Training Accuracy')plt.plot(history.history['val_accuracy'], label='Validation Accuracy')plt.title(f'Training and Validation Accuracy ({title})')plt.xlabel('Epochs')plt.ylabel('Accuracy')plt.legend()plt.tight_layout()plt.show()# 可视化训练过程
plot_training_history(history_early_stopping, "Early Stopping")

训练过程层面

使用交叉验证

交叉验证可以更好地评估模型的泛化能力,避免模型对特定训练集的过拟合

## 训练过程层面# 1. 使用交叉验证
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)# 定义模型
def create_model():model = Sequential()model.add(Dense(256, activation='relu', input_shape=(28 * 28,)))model.add(Dense(128, activation='relu'))model.add(Dense(10, activation='softmax'))model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])return model# K 折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1
accuracies = []for train_index, val_index in kf.split(x_train):print(f'Training on fold {fold_no}...')x_train_fold, x_val_fold = x_train[train_index], x_train[val_index]y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]model = create_model()model.fit(x_train_fold, y_train_fold, epochs=10, batch_size=128, verbose=0)scores = model.evaluate(x_val_fold, y_val_fold, verbose=0)accuracies.append(scores[1])print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1] * 100}%')fold_no += 1# 输出交叉验证的平均准确率
print(f'Average accuracy: {np.mean(accuracies) * 100}%') # 97.5766670703888%

使用集成学习

集成学习通过组合多个模型来提高模型的泛化能力,减少过拟合

# 2. 使用集成学习from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from tensorflow.keras.datasets import mnist
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)# 定义多个模型
model1 = LogisticRegression(max_iter=1000, random_state=42)
model2 = SVC(probability=True, random_state=42)
model3 = RandomForestClassifier(random_state=42)# 创建集成模型
ensemble_model = VotingClassifier(estimators=[('lr', model1), ('svc', model2), ('rf', model3)], voting='soft')# 训练集成模型
ensemble_model.fit(x_train, y_train)# 预测并评估
y_pred = ensemble_model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Ensemble model accuracy: {accuracy * 100}%')   # 97.21%

调整学习率

适当调整学习率可以避免模型在训练过程中过度拟合训练数据

# 3. 调整学习率
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)# 定义模型
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(28 * 28,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])# 设置动态调整学习率
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.00001)# 训练模型
history = model.fit(x_train, y_train,validation_split=0.2,epochs=50,batch_size=128,callbacks=[reduce_lr]
)# 可视化训练过程
def plot_training_history(history, title):plt.figure(figsize=(12, 4))# 绘制训练和验证的损失plt.subplot(1, 2, 1)plt.plot(history.history['loss'], label='Training Loss')plt.plot(history.history['val_loss'], label='Validation Loss')plt.title(f'Training and Validation Loss ({title})')plt.xlabel('Epochs')plt.ylabel('Loss')plt.legend()# 绘制训练和验证的准确率plt.subplot(1, 2, 2)plt.plot(history.history['accuracy'], label='Training Accuracy')plt.plot(history.history['val_accuracy'], label='Validation Accuracy')plt.title(f'Training and Validation Accuracy ({title})')plt.xlabel('Epochs')plt.ylabel('Accuracy')plt.legend()plt.tight_layout()plt.show()# 可视化训练过程
plot_training_history(history, "Dynamic Learning Rate")

http://www.dtcms.com/wzjs/147538.html

相关文章:

  • 做产品推广哪个网站好上海专业seo公司
  • 腾讯云可以做网站吗app开发需要多少费用
  • 利用国外网站文章图片做书营利整合营销理论主要是指
  • 招商加盟的网站应该怎么做注册公司流程和费用
  • wordpress隐藏图片地址seo竞价
  • 如何做淘宝联盟网站的推广seo优化包括哪些
  • seo案例网站百度实名认证
  • 电子商务网站购物车怎么做青岛 google seo
  • 白云区手机版网站建设长尾关键词挖掘精灵官网
  • wordpress dz论坛济南seo公司报价
  • 广西建设监理协会网站大地seo
  • 中国建设监理官方网站提升seo搜索排名
  • dw建立网站之后怎么做优化大师官方免费下载
  • 找项目去哪个网站今天最新军事新闻视频
  • 用wordpress建站一定要先有域名和空间吗北京seo顾问
  • 大神部落 网站建设seo优化技术
  • 动漫网站实现功能十个有创意的线上活动
  • 打开百度搜索网站百度网站排名搜行者seo
  • 重庆教育网站建设惠州seo关键词推广
  • 重庆永川网站建设公司外贸推广平台哪家好
  • 大型电商网站开发方案seo含义
  • 做网站哪个服务商便宜中牟网络推广
  • 电商网站模块介绍十大少儿编程教育品牌
  • 模板网站可以做推广吗sem优化技巧
  • 两学一做电脑答题网站谷粉搜索谷歌搜索
  • 网站开发并发处理抖音seo代理
  • 做推广网站的文章中国足彩网竞彩推荐
  • 广州中企动力网站制作关键对话
  • wordpress 子菜单顺序关键词优化哪家强
  • 做网站要准备的资料怎么做网络广告推广