import numpy as np
import h5py
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical# 中文显示配置
plt.rcParams["font.family"] = ["SimHei", "SimSun", "Microsoft YaHei"]
plt.rcParams["axes.unicode_minus"] = Falsedef load_dataset():"""加载并分割数据集为训练/验证/测试集"""with h5py.File('./train_signs.h5', "r") as f:train_x, train_y = np.array(f["train_set_x"][:]), np.array(f["train_set_y"][:])with h5py.File('./test_signs.h5', "r") as f:test_x, test_y = np.array(f["test_set_x"][:]), np.array(f["test_set_y"][:])classes = np.array(f["list_classes"][:])# 分层分割训练集为训练/验证集train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2, random_state=42, stratify=train_y)return train_x, train_y, val_x, val_y, test_x, test_y, classesdef preprocess_data(images):"""图像预处理:归一化到[0,1]并确保3通道"""images = images.astype('float32') / 255.0if images.shape[-1] == 1:images = np.repeat(images, 3, axis=-1)return imagesdef create_data_augmenter():"""创建轻量数据增强生成器"""return ImageDataGenerator(horizontal_flip=True,width_shift_range=0.03,height_shift_range=0.03,fill_mode='nearest')# 加载并预处理数据
X_train_orig, Y_train_orig, X_val_orig, Y_val_orig, X_test_orig, Y_test_orig, classes = load_dataset()X_train = preprocess_data(X_train_orig)
X_val = preprocess_data(X_val_orig)
X_test = preprocess_data(X_test_orig)# 标签转为one-hot编码
Y_train = to_categorical(Y_train_orig, 6)
Y_val = to_categorical(Y_val_orig, 6)
Y_test = to_categorical(Y_test_orig, 6)# 验证数据形状
print(f"Y_train形状: {Y_train.shape} (应为 (864,6))")
print(f"Y_val形状: {Y_val.shape} (应为 (216,6))")
print(f"Y_test形状: {Y_test.shape} (应为 (120,6))")# 数据增强配置
datagen = create_data_augmenter()
augmented_train_generator = datagen.flow(X_train, Y_train, batch_size=32, shuffle=True)
steps_per_epoch = X_train.shape[0] // 32