单分类线性逻辑回归
一.多神经网络_单分类1线性逻辑回归TF 1.x
1.1 单分类线性逻辑回归 TF 1.X
1.1.1 单分类线性逻辑回归 TF .X'''
多神经网络1_单分类线性逻辑回归TF 1.x
版本过老,无参考价值,优化见TF 2.X 版本的代码
'''#1.生存样本集合
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tfdef generate(sample_size,mean,cov,diff,regression):num_classes = 2sample_per_class = int(sample_size/2)X0 = np.random.multivariate_normal(mean,cov,sample_per_class)Y0 = np.zeros(sample_per_class)for ci,d in enumerate(diff):X1 = np.random.multivariate_normal(mean,cov,sample_per_class)Y1 = (ci + 1)* np.ones(sample_per_class)X0 = np.concatenate((X0,X1))Y0 = np.concatenate((Y0,Y1))if regression == False:#one-hot 编码,将0转为1 0class_ind = [Y == class_number for class_number in range(num_classes)]Y = np.asarray(np.hstack(class_ind),dtype=np.float32)X,Y = shuffle(X0,Y0)return X,Ynp.random.seed(10)
num_classes = 2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes)
X,Y = generate(1000,mean,cov,[3,0],True)
colors = ['r' if 1 == 0 else 'b' for 1 in Y [:]]
plt.scatter(X[:,0],X[:,1],c=colors)
plt.xlabel('Scaled age (in yrs)')
plt.ylabel('Tumor size (in cm)')
plt.show()
lad_dim = 1#2.构建网络结构
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_labels = tf.placeholder(tf.float32, [None, lab_dim])
#定义学习参数
W = tf.Variable(tf.random_normal([input_dim,lab_dim]),name = "weight")
b = tf.Variable(tf.zeros([lab_dim]),name = "bias")output = tf.nn.sigmoid(tf.matmul(input_features, W) + b)
cross_entropy = -(input_labels * tf.log(output) + (1.0 - input_labels) * tf.log(1.0 - output))
ser = tf.square(input_features - output)
loss = tf.reduce_mean(cross_entropy)
err = tf.reduce_mean(ser)
optimizer = tf.train.AdamOptimizer(0.04)
#尽量用这个,因其收敛快,会动态调节梯度
train = optimizer.minimize(loss)#3.设置参数进行训练
maxEpochs = 50
minibatch_size = 25
#启动session
with tf.Session() as sess:sess.run(tf.global_variables_initializer())#向模型输入数据for epoch in range(maxEpochs):sumerr = 0for i in range(np.int32(len(Y)/minibatch_size)):x1 = X[i*minibatch_size:(i+1)*minibatch_size,:]y1 = np.reshape(Y[i*minibatch_size:(i + 1) * minibatch_size],[-1,1])tf.reshape(y1,[-1,1])_,lossval,outputval,errval = sess.run([train,loss,output,err],feed_dict={input_features:x1,input_labels:y1})sumerr = sumerr + errvalprint('Epoch:','%04d' % (epoch + 1),"cost=","{:.9f}".format(lossval),\'err=',sumerr/np.int32(len(Y)/minibatch_size))#4.数据可视化
train_X, train_Y = generate(1000,mean,cov,[3,0],True)
colors = ['r' if l == 0 else 'b' for l in train_Y [:]]
plt.scatter(train_X[:,0],train_X[:,1],c=colors)
x = np.linspace(-1, 8,200)
y = -x * (sess.run(W)[0]/sess.run(W)[0]/sess.run(W)[1]) - sess.run(b)/sess.run(W)[1]
plt.plot(x,y,label = 'Fitted line')
plt.legend()
plt.show()
1.2 单分类线性逻辑回归
#1.1.2 单分类线性逻辑回归# 多神经网络1_单分类线性逻辑回归 匹配TF 2.X#1.生存样本集合
# import numpy as np
# from matplotlib import pyplot as plt
# def generate(sample_size,mean,cov,diff,regression):
# num_classes = 2
# sample_per_class = int(sample_size/2)
#
# X0 = np.random.multivariate_normal(mean,cov,sample_per_class)
# Y0 = np.zeros(sample_per_class)
#
# for ci,d in enumerate(diff):
# X1 = np.random.multivariate_normal(mean,cov,sample_per_class)
# Y1 = (ci + 1)* np.ones(sample_per_class)
#
# X0 = np.concatenate((X0,X1))
# Y0 = np.concatenate((Y0,Y1))
#
# if regression == False:#one-hot 编码,将0转为1 0
# class_ind = [Y == class_number for class_number in range(num_classes)]
# Y = np.asarray(np.hstack(class_ind),dtype=np.float32)
# X,Y = shuffle(X0,Y0)
# return X,Y
#
# np.random.seed(10)
# num_classes = 2
# mean = np.random.randn(num_classes)
# cov = np.eye(num_classes)
# X,Y = generate(1000,mean,cov,[3,0],True)
# colors = ['r' if 1 == 0 else 'b' for 1 in Y [:]]
# plt.scatter(X[:,0],X[:,1],c=colors)
# plt.xlabel('Scaled age (in yrs)')
# plt.ylabel('Tumor size (in cm)')
# plt.show()
# lad_dim = 1
import numpy as np
from matplotlib import pyplot as plt
from sklearn.utils import shuffle # 添加缺失的导入# 1. 生成样本集合
def generate(sample_size, mean, cov, diff, regression):num_classes = len(diff) + 1 # 根据diff长度确定类别数sample_per_class = int(sample_size / num_classes)Xs = []Ys = []# 生成基准类别数据(类别0)X0 = np.random.multivariate_normal(mean, cov, sample_per_class)Y0 = np.zeros(sample_per_class)Xs.append(X0)Ys.append(Y0)# 生成其他类别的数据for ci, d in enumerate(diff):# 计算当前类别的均值(在基准均值上添加偏移)class_mean = mean + np.array([d, 0]) # 在x轴方向添加偏移X1 = np.random.multivariate_normal(class_mean, cov, sample_per_class)Y1 = (ci + 1) * np.ones(sample_per_class)Xs.append(X1)Ys.append(Y1)# 合并所有类别数据X = np.vstack(Xs)Y = np.hstack(Ys)# 打乱数据X, Y = shuffle(X, Y)# 如果不是回归问题,则进行one-hot编码if not regression:class_ind = [Y == class_number for class_number in range(num_classes)]Y = np.asarray(np.hstack(class_ind), dtype=np.float32)return X, Y# 设置随机种子保证可重复性
np.random.seed(10)# 定义参数
num_classes = 2
mean = np.random.randn(2) # 2维特征
cov = np.eye(2) # 2x2单位矩阵
diff = [3.0] # 偏移量# 生成数据
X, Y = generate(1000, mean, cov, diff, True)# 可视化数据
plt.figure(figsize=(10, 6))
colors = ['r' if y == 0 else 'b' for y in Y] # 修正颜色映射
plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6)
plt.xlabel('Scaled age (in yrs)')
plt.ylabel('Tumor size (in cm)')
plt.title('Generated Dataset (Two Classes)')
plt.grid(True)
plt.show()
二.多神经网络_单分类2线性逻辑回归TF 2.x
2.1 单分类线性逻辑回归
#1.2.1 单分类线性逻辑回归#全匹配 TF2.x
'''
多神经网络1_单分类线性逻辑回归TF 2.x
'''
import numpy as np
from matplotlib import pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() # 启用TF1.x兼容模式
from sklearn.utils import shuffle# 1. 生成模拟数据集
def generate(sample_size, mean, cov, diff, regression):"""生成两类分类数据集"""num_classes = len(diff) + 1 # 计算总类别数sample_per_class = sample_size // num_classes # 每类样本数Xs, Ys = [], [] # 初始化数据存储列表# 生成基准类别数据(类别0)X0 = np.random.multivariate_normal(mean, cov, sample_per_class)Y0 = np.zeros(sample_per_class)Xs.append(X0)Ys.append(Y0)# 生成其他类别数据(类别1)for ci, d in enumerate(diff):class_mean = mean + np.array([d, 0]) # x轴方向偏移X1 = np.random.multivariate_normal(class_mean, cov, sample_per_class)Y1 = (ci + 1) * np.ones(sample_per_class) # 类别标签设为1Xs.append(X1)Ys.append(Y1)# 合并并打乱数据X = np.vstack(Xs) # 垂直堆叠特征矩阵Y = np.hstack(Ys) # 水平堆叠标签向量return shuffle(X, Y) # 返回打乱后的数据# 参数设置
np.random.seed(10) # 设置随机种子
input_dim, lab_dim = 2, 1 # 输入特征维度和输出维度
num_classes = 2 # 类别总数
mean = np.random.randn(input_dim) # 基准类别均值向量
cov = np.eye(input_dim) # 协方差矩阵(单位矩阵)
diff = [3.0] # 类别间偏移量# 生成并可视化数据
X, Y = generate(1000, mean, cov, diff, True) # 生成回归格式数据
colors = ['r' if y == 0 else 'b' for y in Y] # 类别着色方案
plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6) # 绘制散点图
plt.xlabel('Scaled age (in yrs)');
plt.ylabel('Tumor size (in cm)') # 设置坐标轴标签
plt.title('Generated Dataset');
plt.grid(True) # 设置标题和网格
plt.show()# 2. 构建神经网络模型
input_features = tf.placeholder(tf.float32, [None, input_dim], name='input_features') # 特征占位符
input_labels = tf.placeholder(tf.float32, [None, lab_dim], name='input_labels') # 标签占位符
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight") # 权重变量
b = tf.Variable(tf.zeros([lab_dim]), name="bias") # 偏置变量# 前向传播计算
logits = tf.matmul(input_features, W) + b # 线性变换
output = tf.nn.sigmoid(logits, name='output') # sigmoid激活函数# 3. 训练模型配置
# 定义损失函数(二元交叉熵)
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=input_labels, logits=logits))
optimizer = tf.train.GradientDescentOptimizer(0.04) # 梯度下降优化器
train = optimizer.minimize(cross_entropy) # 训练操作# 训练参数设置
maxEpochs, minibatch_size = 50, 25 # 训练轮数和批次大小
num_batches = len(Y) // minibatch_size # 总批次数# 4. 数据可视化
with tf.Session() as sess:#训练过程sess.run(tf.global_variables_initializer()) # 初始化变量# 训练循环for epoch in range(maxEpochs):total_loss = 0# 每轮打乱数据X_shuffled, Y_shuffled = shuffle(X, Y.reshape(-1, 1))# 小批次训练for i in range(num_batches):start = i * minibatch_size# 运行训练并计算损失_, loss_val = sess.run([train, cross_entropy],feed_dict={input_features: X_shuffled[start:start + minibatch_size],input_labels: Y_shuffled[start:start + minibatch_size]})total_loss += loss_val# 输出每轮平均损失print(f'Epoch: {epoch + 1:04d}/{maxEpochs}, Loss: {total_loss / num_batches:.4f}')# 可视化决策边界W_val, b_val = sess.run([W, b]) # 获取训练后参数# 创建决策边界网格x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),np.linspace(y_min, y_max, 100))# 计算网格点预测值z = np.dot(np.c_[xx.ravel(), yy.ravel()], W_val) + b_valz = 1 / (1 + np.exp(-z)) # sigmoid函数z = z.reshape(xx.shape)# 绘制决策边界plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6)plt.title('Original Data')plt.subplot(1, 2, 2)plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6)plt.contour(xx, yy, z, levels=[0.5], colors='k', linestyles='dashed') # 决策边界线plt.contourf(xx, yy, z, levels=[0, 0.5, 1], colors=['red', 'blue'], alpha=0.1) # 决策区域填充plt.title('Decision Boundary')plt.tight_layout()plt.show()
2.2 单分类线性逻辑回归(精简版)
#1.2.2 单分类线性逻辑回归(精简版)'''
多神经网络1_单分类线性逻辑回归TF 2.x
'''
import numpy as np
from matplotlib import pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() # 启用TF1.x兼容模式
from sklearn.utils import shuffle# 1. 生成模拟数据集
def generate(sample_size, mean, cov, diff, regression):"""生成两类分类数据集"""num_classes = len(diff) + 1 # 计算总类别数sample_per_class = sample_size // num_classes # 每类样本数Xs, Ys = [], [] # 初始化数据存储列表# 生成基准类别数据(类别0)X0 = np.random.multivariate_normal(mean, cov, sample_per_class)Y0 = np.zeros(sample_per_class)Xs.append(X0)Ys.append(Y0)# 生成其他类别数据(类别1)for ci, d in enumerate(diff):class_mean = mean + np.array([d, 0]) # x轴方向偏移X1 = np.random.multivariate_normal(class_mean, cov, sample_per_class)Y1 = (ci + 1) * np.ones(sample_per_class) # 类别标签设为1Xs.append(X1)Ys.append(Y1)# 合并并打乱数据X = np.vstack(Xs) # 垂直堆叠特征矩阵Y = np.hstack(Ys) # 水平堆叠标签向量return shuffle(X, Y) # 返回打乱后的数据# 参数设置
np.random.seed(10) # 设置随机种子
input_dim, lab_dim = 2, 1 # 输入特征维度和输出维度
num_classes = 2 # 类别总数
mean = np.random.randn(input_dim) # 基准类别均值向量
cov = np.eye(input_dim) # 协方差矩阵(单位矩阵)
diff = [3.0] # 类别间偏移量# 生成并可视化数据
X, Y = generate(1000, mean, cov, diff, True) # 生成回归格式数据
colors = ['r' if y == 0 else 'b' for y in Y] # 类别着色方案
plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6) # 绘制散点图
plt.xlabel('Scaled age (in yrs)');
plt.ylabel('Tumor size (in cm)') # 设置坐标轴标签
plt.title('Generated Dataset');
plt.grid(True) # 设置标题和网格
plt.show()# 2. 构建神经网络模型
input_features = tf.placeholder(tf.float32, [None, input_dim], name='input_features') # 特征占位符
input_labels = tf.placeholder(tf.float32, [None, lab_dim], name='input_labels') # 标签占位符
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight") # 权重变量
b = tf.Variable(tf.zeros([lab_dim]), name="bias") # 偏置变量# 前向传播计算
logits = tf.matmul(input_features, W) + b # 线性变换
output = tf.nn.sigmoid(logits, name='output') # sigmoid激活函数# 3. 训练模型配置
# 定义损失函数(二元交叉熵)
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=input_labels, logits=logits))
optimizer = tf.train.GradientDescentOptimizer(0.04) # 梯度下降优化器
train = optimizer.minimize(cross_entropy) # 训练操作# 训练参数设置
maxEpochs, minibatch_size = 50, 25 # 训练轮数和批次大小
num_batches = len(Y) // minibatch_size # 总批次数# 4. 数据可视化
with tf.Session() as sess:#训练过程sess.run(tf.global_variables_initializer()) # 初始化变量# 训练循环for epoch in range(maxEpochs):total_loss = 0# 每轮打乱数据X_shuffled, Y_shuffled = shuffle(X, Y.reshape(-1, 1))# 小批次训练for i in range(num_batches):start = i * minibatch_size# 运行训练并计算损失_, loss_val = sess.run([train, cross_entropy],feed_dict={input_features: X_shuffled[start:start + minibatch_size],input_labels: Y_shuffled[start:start + minibatch_size]})total_loss += loss_val# 输出每轮平均损失print(f'Epoch: {epoch + 1:04d}/{maxEpochs}, Loss: {total_loss / num_batches:.4f}')# 可视化决策边界W_val, b_val = sess.run([W, b]) # 获取训练后参数# 创建决策边界网格x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),np.linspace(y_min, y_max, 100))# 计算网格点预测值z = np.dot(np.c_[xx.ravel(), yy.ravel()], W_val) + b_valz = 1 / (1 + np.exp(-z)) # sigmoid函数z = z.reshape(xx.shape)# 绘制决策边界plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6)plt.title('Original Data')plt.subplot(1, 2, 2)plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.6)plt.contour(xx, yy, z, levels=[0.5], colors='k', linestyles='dashed') # 决策边界线plt.contourf(xx, yy, z, levels=[0, 0.5, 1], colors=['red', 'blue'], alpha=0.1) # 决策区域填充plt.title('Decision Boundary')plt.tight_layout()plt.show()
三.多神经网络_单分类3线性逻辑回归TF 2.x
3.1 单分类线性逻辑回归(新)
#1.3.1 单分类线性逻辑回归(新)# -*- coding: utf-8 -*-
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf# 强制启用 eager 执行 + 禁止 legacy 回退
tf.config.run_functions_eagerly(True)
os.environ['TF_USE_LEGACY_KERAS'] = '0' # 关键:禁止使用 legacy 优化器print("当前 eager execution 状态:", tf.executing_eagerly())# 设置随机种子
np.random.seed(10)
tf.random.set_seed(10)# === 数据生成 ===
def generate_samples(sample_size, mean, cov, class_diff, task_type='classification'):n_per_class = sample_size // 2X0 = np.random.multivariate_normal(mean, cov, n_per_class)Y0 = np.zeros(n_per_class)X1 = np.random.multivariate_normal(mean + class_diff, cov, n_per_class)Y1 = np.ones(n_per_class)X = np.vstack([X0, X1])Y = np.hstack([Y0, Y1])idx = np.random.permutation(len(X))X, Y = X[idx], Y[idx]if task_type == 'classification':Y = Y.reshape(-1, 1).astype(np.float32)else:Y = Y.astype(np.float32)return X.astype(np.float32), Y.astype(np.float32)# 参数
input_dim = 2
mean = np.array([0.0, 0.0])
cov = np.eye(2)
class_diff = [3.0, 0.0]
sample_size = 1000X_train, Y_train = generate_samples(sample_size, mean, cov, class_diff, 'classification')
print(f"数据形状: X.shape={X_train.shape}, Y.shape={Y_train.shape}")# 可视化
plt.figure(figsize=(8, 6))
plt.scatter(X_train[:, 0], X_train[:, 1], c=Y_train.flatten(), cmap='bwr', alpha=0.7)
plt.xlabel('Scaled Age'), plt.ylabel('Tumor Size'), plt.title('Synthetic Dataset')
plt.colorbar(label='Class'), plt.grid(True, alpha=0.3)
plt.show()# === 模型定义 ===
class LogisticRegression(tf.keras.Model):def __init__(self):super().__init__()self.dense = tf.keras.layers.Dense(units=1,activation='sigmoid',input_shape=(input_dim,),dtype=tf.float32)def call(self, inputs):return self.dense(inputs)model = LogisticRegression()
model.build(input_shape=(None, input_dim))# === 优化器修复(最关键!)===
learning_rate = 0.04
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate) # ✅ 直接传 float# try:
# model.compile(
# optimizer=optimizer,
# loss='binary_crossentropy',
# metrics=['accuracy'],
# run_eagerly=True # 强制 eager 模式
# )
# except Exception as e:
# print(f"编译失败: {e}")
# raiseprint("\n模型结构:")
model.summary()# # === 训练 ===
# try:
# print("\n开始训练...")
# history = model.fit(
# x=X_train,
# y=Y_train,
# batch_size=25,
# epochs=50,
# verbose=1,
# validation_split=0.2,
# shuffle=True,
# callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)]
# )
# print(f"\n训练完成!最终验证准确率:{history.history['val_accuracy'][-1]:.4f}")
# except Exception as e:
# print(f"训练失败: {e}")
# raise# # === 可视化决策边界 ===
# def plot_decision_boundary(X, Y, model, title='决策边界'):
# x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
# y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
# xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
# np.linspace(y_min, y_max, 200))
# Z = model.predict(np.c_[xx.ravel(), yy.ravel()], verbose=0)
# Z = (Z > 0.5).astype(int).reshape(xx.shape)# plt.figure(figsize=(12, 5))
# plt.subplot(1, 2, 1)
# plt.scatter(X[:, 0], X[:, 1], c=Y.flatten(), cmap='bwr', alpha=0.7)
# plt.title('原始数据分布'), plt.xlabel('标准化年龄'), plt.ylabel('肿瘤大小')# plt.subplot(1, 2, 2)
# plt.contourf(xx, yy, Z, alpha=0.3, cmap='bwr')
# plt.scatter(X[:, 0], X[:, 1], c=Y.flatten(), cmap='bwr', alpha=0.7)
# plt.title(title), plt.xlabel('标准化年龄'), plt.ylabel('肿瘤大小')
# plt.tight_layout()
# plt.show()# plot_decision_boundary(X_train, Y_train, model, '逻辑回归决策边界')# # === 训练曲线 ===
# plt.figure(figsize=(12, 5))
# plt.subplot(1, 2, 1)
# plt.plot(history.history['loss'], label='训练损失', lw=2)
# plt.plot(history.history['val_loss'], label='验证损失', lw=2)
# plt.title('损失曲线'), plt.xlabel('周期'), plt.ylabel('损失'), plt.legend(), plt.grid(True)# plt.subplot(1, 2, 2)
# plt.plot(history.history['accuracy'], label='训练准确率', lw=2)
# plt.plot(history.history['val_accuracy'], label='验证准确率', lw=2)
# plt.title('准确率曲线'), plt.xlabel('周期'), plt.ylabel('准确率'), plt.legend(), plt.grid(True)
# plt.tight_layout()
# plt.show()# # === 保存模型 ===
# try:
# model.save('logistic_regression_model.h5')
# print("\n 模型已保存为 'logistic_regression_model.h5'")
# loaded = tf.keras.models.load_model('logistic_regression_model.h5')
# print(" 模型加载成功!")
# except Exception as e:
# print(f" 保存失败: {e}")
3.2 单分类线性逻辑回归(上)
#1.3.2 单分类线性逻辑回归(上)# -*- coding: utf-8 -*-
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf# 禁止 legacy 模式
os.environ['TF_USE_LEGACY_KERAS'] = '0'
tf.config.run_functions_eagerly(True)
print("当前 eager execution 状态:", tf.executing_eagerly())# 设置随机种子
np.random.seed(10)
tf.random.set_seed(10)# === 数据生成 ===
def generate_samples(sample_size, mean, cov, class_diff):n_per_class = sample_size // 2X0 = np.random.multivariate_normal(mean, cov, n_per_class)Y0 = np.zeros(n_per_class)X1 = np.random.multivariate_normal(mean + class_diff, cov, n_per_class)Y1 = np.ones(n_per_class)X = np.vstack([X0, X1])Y = np.concatenate([Y0, Y1]).reshape(-1, 1)idx = np.random.permutation(len(X))return X[idx], Y[idx]# 参数
input_dim = 2
mean = np.array([0.0, 0.0])
cov = np.eye(2)
class_diff = [3.0, 0.0]
sample_size = 1000X_train, Y_train = generate_samples(sample_size, mean, cov, class_diff)
print(f"数据形状: X.shape={X_train.shape}, Y.shape={Y_train.shape}")# === 可视化 ===
plt.figure(figsize=(8, 6))
plt.scatter(X_train[:, 0], X_train[:, 1], c=Y_train.flatten(), cmap='bwr')
plt.xlabel('Scaled Age'), plt.ylabel('Tumor Size'), plt.title('Synthetic Dataset')
plt.colorbar(label='Class'), plt.grid(True)
plt.show()# === 模型定义 ===
model = tf.keras.Sequential([tf.keras.layers.Dense(1, activation='sigmoid', input_shape=(input_dim,))
])learning_rate = 0.04
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)print("模型结构:")
model.summary()
3.2 单分类线性逻辑回归(下)
#1.3.2 单分类线性逻辑回归(下)# === 模型解释(SHAP 值) ===
# 可以使用 SHAP 库进行更详细的模型解释
# 需要先安装 shap: pip install shap
# import shap
# explainer = shap.KernelExplainer(model.predict, X_train[:100])
# shap_values = explainer.shap_values(X_train[:100])
# shap.summary_plot(shap_values, X_train[:100])# === 编译模型 ===
model.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])# === 训练模型 ===
epochs = 100
batch_size = 32history = model.fit(X_train, Y_train,epochs=epochs,batch_size=batch_size,verbose=1,validation_split=0.2) # 使用20%作为验证集# === 可视化训练结果 ===
# 绘制训练和验证的损失曲线
plt.figure(figsize=(12, 4))# 损失
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()# 准确率
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()plt.tight_layout()
plt.show()# === 可视化决策边界 ===
# 生成网格点
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))# 预测网格点的类别
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)# 绘制决策边界和数据点
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap='bwr', alpha=0.6)
plt.scatter(X_train[:, 0], X_train[:, 1], c=Y_train.flatten(), cmap='bwr', edgecolor='k')
plt.xlabel('Scaled Age')
plt.ylabel('Tumor Size')
plt.title('Decision Boundary')
plt.colorbar(label='Class')
plt.grid(True)
plt.show()# === 模型评估 ===
test_loss, test_acc = model.evaluate(X_train[-200:], Y_train[-200:], verbose=0)
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")# === 保存模型 ===
model.save('simple_binary_classifier.h5')
print("模型已保存为 simple_binary_classifier.h5")# === 使用模型进行预测 ===
def predict_sample(sample):"""对单个样本进行预测"""prob = model.predict(np.array([sample]))[0][0]class_pred = 1 if prob > 0.5 else 0return class_pred, prob# 示例预测
sample_to_predict = [1.5, 0.5] # 示例输入
class_pred, prob = predict_sample(sample_to_predict)
print(f"预测类别: {class_pred}, 概率: {prob:.4f}")# 使用模型预测整个测试集并计算准确率
y_pred = model.predict(X_train[-200:]) > 0.5
accuracy = np.mean((y_pred == Y_train[-200:]).astype(int))
print(f"测试集准确率: {accuracy:.4f}")# === 模型权重和偏置 ===
weights, bias = model.get_weights()
print("模型权重 (W):", weights)
print("模型偏置 (b):", bias)# === 生成混淆矩阵 ===
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplayy_true = Y_train[-200:].flatten()
y_pred = (model.predict(X_train[-200:]) > 0.5).flatten()cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')
plt.show()# === ROC 曲线和 AUC 值 ===
from sklearn.metrics import roc_curve, aucfpr, tpr, _ = roc_curve(y_true, model.predict(X_train[-200:]))
roc_auc = auc(fpr, tpr)plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()# === 特征重要性分析 ===
# 由于是线性模型,权重大小可以反映特征的重要性
feature_importance = np.abs(weights[0])
print("特征重要性:", feature_importance)
plt.bar(['Feature 1', 'Feature 2'], feature_importance)
plt.title('Feature Importance')
plt.ylabel('Importance')
plt.show()
3.3 单分类线性逻辑回归(全新)
#1.3.3 单分类线性逻辑回归(全新)
# -*- coding: utf-8 -*-
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf# 修复1: 移除M1/M2 Mac上不必要的eager执行设置
# 修复2: 使用新的Keras优化器API
print("TensorFlow版本:", tf.__version__)
print("当前eager execution状态:", tf.executing_eagerly())# 设置随机种子
np.random.seed(10)
tf.random.set_seed(10)# === 数据生成 ===
def generate_samples(sample_size, mean, cov, class_diff):n_per_class = sample_size // 2X0 = np.random.multivariate_normal(mean, cov, n_per_class)Y0 = np.zeros(n_per_class)X1 = np.random.multivariate_normal(mean + class_diff, cov, n_per_class)Y1 = np.ones(n_per_class)X = np.vstack([X0, X1])Y = np.concatenate([Y0, Y1]).reshape(-1, 1)idx = np.random.permutation(len(X))return X[idx].astype(np.float32), Y[idx].astype(np.float32)# 参数
input_dim = 2
mean = np.array([0.0, 0.0])
cov = np.eye(2)
class_diff = [3.0, 0.0]
sample_size = 1000X_train, Y_train = generate_samples(sample_size, mean, cov, class_diff)
print(f"数据形状: X.shape={X_train.shape}, Y.shape={Y_train.shape}")# === 可视化 ===
plt.figure(figsize=(8, 6))
plt.scatter(X_train[:, 0], X_train[:, 1], c=Y_train.flatten(), cmap='bwr')
plt.xlabel('Scaled Age'), plt.ylabel('Tumor Size'), plt.title('Synthetic Dataset')
plt.colorbar(label='Class'), plt.grid(True)
plt.show()# === 模型定义 ===
model = tf.keras.Sequential([tf.keras.layers.Dense(1, activation='sigmoid', input_shape=(input_dim,))
])# 修复3: 使用新的优化器API
learning_rate = 0.04
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)print("模型结构:")
model.summary()# 修复4: 移除SHAP解释部分(可选)
# import shap
# explainer = shap.KernelExplainer(model.predict, X_train[:100])
# shap_values = explainer.shap_values(X_train[:100])
# shap.summary_plot(shap_values, X_train[:100])# === 编译模型 ===
model.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])# === 训练模型 ===
epochs = 100
batch_size = 32history = model.fit(X_train, Y_train,epochs=epochs,batch_size=batch_size,verbose=1,validation_split=0.2) # 使用20%作为验证集# === 可视化训练结果 ===
plt.figure(figsize=(12, 4))# 损失
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()# 准确率
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()plt.tight_layout()
plt.show()# === 可视化决策边界 ===
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))# 预测网格点的类别
Z = model.predict(np.c_[xx.ravel(), yy.ravel()], verbose=0)
Z = Z.reshape(xx.shape)plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap='bwr', alpha=0.6)
plt.scatter(X_train[:, 0], X_train[:, 1], c=Y_train.flatten(), cmap='bwr', edgecolor='k')
plt.xlabel('Scaled Age')
plt.ylabel('Tumor Size')
plt.title('Decision Boundary')
plt.colorbar(label='Class')
plt.grid(True)
plt.show()# === 模型评估 ===
test_loss, test_acc = model.evaluate(X_train[-200:], Y_train[-200:], verbose=0)
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")# === 保存模型 ===
model.save('simple_binary_classifier.h5')
print("模型已保存为 simple_binary_classifier.h5")# === 使用模型进行预测 ===
def predict_sample(sample):"""对单个样本进行预测"""prob = model.predict(np.array([sample]), verbose=0)[0][0]class_pred = 1 if prob > 0.5 else 0return class_pred, prob# 示例预测
sample_to_predict = [1.5, 0.5] # 示例输入
class_pred, prob = predict_sample(sample_to_predict)
print(f"预测类别: {class_pred}, 概率: {prob:.4f}")# 测试集准确率
y_pred = model.predict(X_train[-200:], verbose=0) > 0.5
accuracy = np.mean((y_pred == Y_train[-200:]).astype(int))
print(f"测试集准确率: {accuracy:.4f}")# === 模型权重和偏置 ===
weights, bias = model.get_weights()
print("模型权重 (W):", weights)
print("模型偏置 (b):", bias)# === 生成混淆矩阵 ===
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplayy_true = Y_train[-200:].flatten()
y_pred = (model.predict(X_train[-200:], verbose=0) > 0.5).flatten()cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')
plt.show()# === ROC 曲线和 AUC 值 ===
from sklearn.metrics import roc_curve, aucfpr, tpr, _ = roc_curve(y_true, model.predict(X_train[-200:], verbose=0))
roc_auc = auc(fpr, tpr)plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()# === 特征重要性分析 ===
feature_importance = np.abs(weights.flatten())
print("特征重要性:", feature_importance)
plt.bar(['Feature 1', 'Feature 2'], feature_importance)
plt.title('Feature Importance')
plt.ylabel('Importance')
plt.show()
整理不易,诚望各位看官点赞 收藏 评论 予以支持,这将成为我持续更新的动力源泉。若您在阅览时存有异议或建议,敬请留言指正批评,让我们携手共同学习,共同进取,吾辈自当相互勉励!