当前位置：首页 > news >正文

分类问题-机器学习

news 2025/7/14 5:57:17

分类：感知机

简单判断图片是纵向还是横向

训练数据：images1.csv

x1,x2,y
153,432,-1
220,262,-1
118,214,-1
474,384,1
485,411,1
233,430,-1
396,361,1
484,349,1
429,259,1
286,220,1
399,433,-1
403,340,1
252,34,1
497,472,1
379,416,-1
76,163,-1
263,112,1
26,193,-1
61,473,-1
420,253,1

import numpy as np
import matplotlib.pyplot as plttrain = np.loadtxt('images1.csv',delimiter=',',skiprows=1)#取第一列和第二列
train_x = train[:,0:2]
#取第三列
train_y = train[:,2]#plt.plot(train_x[train_y == 1,0],train_x[train_y == 1,1],'o')
#plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')#plt.axis('scaled')
#plt.show()#权重初始化
#w·x = w1x1 + w2x2 = 0
w = np.random.rand(2)#判别函数
def f(x):if np.dot(w,x)>=0:return 1else:return -1;#迭代次数
epoch = 10
#更新次数
count = 0#学习权重
for _ in range(epoch):for x,y in zip(train_x,train_y):if f(x) != y:w = w + y*x#输出日志count += 1print('第{}次：w={}'.format(count,w))#w·x = w1x1 + w2x2 = 0
#x2 = -w1/w2*x1
x1 = np.arange(0,500)
plt.plot(train_x[train_y == 1,0],train_x[train_y==1,1],'o')
plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')
plt.plot(x1,-w[0]/w[1]*x1,linestyle = 'dashed')
plt.show()#预测
#200x100 横向
print(f([200,100]))#100x200 纵向
print(f([100,200]))

在这里插入图片描述

分类：逻辑回归

训练数据：images2.csv

x1,x2,y
153,432,0
220,262,0
118,214,0
474,384,1
485,411,1
233,430,0
396,361,1
484,349,1
429,259,1
286,220,1
399,433,0
403,340,1
252,34,1
497,472,1
379,416,0
76,163,0
263,112,1
26,193,0
61,473,0
420,253,1

import numpy as np
import matplotlib.pyplot as plt#读入
train = np.loadtxt('images2.csv',delimiter=',',skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]#初始化参数
theta = np.random.rand(3)#标准化
#axis=0会计算每列的平均值和标准差
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)
def standardize(x):return (x-mu)/sigmatrain_z = standardize(train_x)#增加x0
def to_matrix(x):#创建和x1一样的行一列的矩阵x0 = np.ones([x.shape[0],1])#参数合并成一个矩阵return np.hstack([x0,x])X = to_matrix(train_z)#可视化
'''
plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
plt.show()
'''#sigmoid函数
def f(x):return 1/(1+np.exp(-np.dot(x,theta)))#学习率
ETA = 1e-3#迭代次数
epoch = 5000#重复学习
for _ in range(epoch):theta = theta - ETA*np.dot(f(X)-train_y,X)#theta.Tx = 0
#theta.Tx = theta0x0 + theta1x1 + theta2x2 = 0
#x2 = -(theta0 + theta1*x1)/theta2x0 = np.linspace(-2,2,100)
plt.plot(train_z[train_y==1,0],train_z[train_y==1,0],'o')
plt.plot(train_z[train_y==0,0],train_z[train_y==0,0],'x')
plt.plot(x0,-(theta[0]+theta[1]*x0)/theta[2],linestyle='dashed')
plt.show()#预测
#astype(np.int_)：将布尔值转为整数（True→1，False→0），最终输出0或1的分类结果。
def classify1(x):return (f(x)>=0.5).astype(np.int_)array = classify1(to_matrix(standardize([[200,100],[100,200]
])))print(array)

分类：线性不可分分类问题

训练数据：

x1,x2,y
0.54508775,2.34541183,0
0.32769134,13.43066561,0
4.42748117,14.74150395,0
2.98189041,-1.81818172,1
4.02286274,8.90695686,1
2.26722613,-6.61287392,1
-2.66447221,5.05453871,1
-1.03482441,-1.95643469,1
4.06331548,1.70892541,1
2.89053966,6.07174283,0
2.26929206,10.59789814,0
4.68096051,13.01153161,1
1.27884366,-9.83826738,1
-0.1485496,12.99605136,0
-0.65113893,10.59417745,0
3.69145079,3.25209182,1
-0.63429623,11.6135625,0
0.17589959,5.84139826,0
0.98204409,-9.41271559,1
-0.11094911,6.27900499,0

import numpy as np
import matplotlib.pyplot as plt#读入
train = np.loadtxt('data3.csv',delimiter=',',skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]'''
plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o')
plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x')
plt.show()
'''#参数初始化
theta = np.random.rand(4)#精度历史记录
accuracies = []#标准化
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)
def standardize(x):return (x-mu)/sigmatrain_z = standardize(train_x)#增加x0和x3
def to_matrix(x):x0 = np.ones([x.shape[0],1])x3 = x[:,0,np.newaxis]**2return np.hstack([x0,x,x3])X = to_matrix(train_z)#sigmoid函数
def f(x):return 1/(1+np.exp(-np.dot(x,theta)))#学习率
ETA = 1e-3#迭代次数
epoch = 5000def classify1(x):return (f(x)>=0.5).astype(np.int_)#重复学习
for _ in range(epoch):theta = theta - ETA*np.dot(f(X)-train_y,X)#计算现在精度result = classify1(X) == train_yaccuracy = len(result[result==True])/len(result)accuracies.append(accuracy)#theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2
#         = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0
#x2 = -(theta0+theta1x1+theta3x1^2)/theta2
x1 = np.linspace(-2,2,100)
x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2]
plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
plt.plot(x1,x2,linestyle='dashed')
plt.show()#绘制acc曲线
# x = np.arange(len(accuracies))
# plt.plot(x,accuracies)
# plt.show()

在这里插入图片描述

因为训练数据过少只有20个精度值只能为0.05的整数倍所以acc曲线有棱有角：

在这里插入图片描述

分类：线性不可分分类问题随机梯度下降法的实现

训练数据：同上

import numpy as np
import matplotlib.pyplot as plt#读入
train = np.loadtxt('data3.csv',delimiter=',',skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]'''
plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o')
plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x')
plt.show()
'''#参数初始化
theta = np.random.rand(4)#精度历史记录
accuracies = []#标准化
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)
def standardize(x):return (x-mu)/sigmatrain_z = standardize(train_x)#增加x0和x3
def to_matrix(x):x0 = np.ones([x.shape[0],1])x3 = x[:,0,np.newaxis]**2return np.hstack([x0,x,x3])X = to_matrix(train_z)#sigmoid函数
def f(x):return 1/(1+np.exp(-np.dot(x,theta)))#学习率
ETA = 1e-3#迭代次数
epoch = 5000def classify1(x):return (f(x)>=0.5).astype(np.int_)#重复学习
for _ in range(epoch):#使用随机梯度下降法更新参数p = np.random.permutation(X.shape[0])for x,y in zip(X[p,:],train_y[p]):theta = theta - ETA*(f(x)-y)*x#theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2
#         = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0
#x2 = -(theta0+theta1x1+theta3x1^2)/theta2
x1 = np.linspace(-2,2,100)
x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2]
plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
plt.plot(x1,x2,linestyle='dashed')
plt.show()