Python——鸢尾花和⼿写数字识别⽤线性核和⾼斯核训练⼀个svm 题⽬:
在鸢尾花数据集和 MNIST ⼿写数字数据库上,分别⽤线性核和⾼斯核训练 ⼀个 SVM,写出实验对⽐结果。要求:
(1)使⽤ 70%数据作为训练集,然后对剩余 30%数据测试。
(2)计算分类精确度值。
步骤:
# -*- coding: utf-8 -*-
###鸢尾花线性核、⾼斯核###
import numpy as np
from sklearn import svm
import random
import csv
ics import classification_report
def loadDataSet():
testMat=[];data0=[]
ader(open('E:/pywork/test/sy-5/iris/iris.csv'))
for line in data:
lineArr=[]
for i in range(5):
lineArr.append(float(line[i]))
data0.append(lineArr)
m,n=np.shape(data0)
times=int(m*0.3)
for i in range(times):
randIndex=int(random.uniform(0,len(data0)))
testMat.append(data0[randIndex])
del(data0[randIndex])
dataMat = data0[:]
return dataMat,testMat
def separate(dataset):
dataMat =[]; labelMat =[]
for line in dataset:
lineArr =[]
for i in range(4):
lineArr.append(line[i])
dataMat.append(lineArr)
labelMat.append(line[-1])
return  dataMat,labelMat
def test(X,model):
rightCount=0.0
numTestVec=0.0
b=[]
for line in X:
numTestVec +=1.0
lineArr=[]
a=[]
for i in range(4):
lineArr.append(float(line[i]))
a.append(lineArr)
result=model.predict(a)
b.append(int(result))
if int(result)==int(line[-1]):
rightCount +=1
rightRate=(float(rightCount)/numTestVec)
print("the right rate of this test is:%f"% rightRate)
return rightRate,b
trainSet,testSet=loadDataSet()
trainMat,trainlabels=separate(trainSet)
testMat,testlabels=separate(testSet)
clf1=svm.SVC(kernel='linear')
clf1.fit(trainMat,trainlabels)
try1,re1=test(testSet,clf1)
com1=classification_report(testlabels, re1)
clf2=svm.SVC(kernel='rbf')
clf2.fit(trainMat,trainlabels)
try2,re2=test(testSet,clf2)
com2=classification_report(testlabels, re2)
print('linear:',try1)
print(com1)
print('rbf',try2)
print(com2)
运⾏结果:
# -*- coding: utf-8 -*-
>>####鸢尾花数据集使⽤SVM线性分类>>>####linspace numpy
###导⼊相关的包
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
###数据获取
iris=datasets.load_iris()#datasets
#每⾏的数据,⼀共四列,每⼀列映射为feature_names中对应的值
X=iris.data
#每⾏数据对应的分类结果值(也就是每⾏数据的label值),取值为[0,1,2]
Y=iris.target
#通过Y=iris.target.size,可以得到⼀共150⾏数据,三个类别个50条数据,并且数据是按照0,1,2的顺序放的###数据处理
#只取y<2的类别,也就是0 1并且只取前两个特征
X=X[:,:2]
#获取0 1类别的数据
Y1=Y[Y<2]
y1=len(Y1)
#获取0类别的数据
Y2=Y[Y<1]
y2=len(Y2)
X=X[:y1,:2]
###原始数据的绘制(此时得到图1)
plt.scatter(X[0:y2,0],X[0:y2,1],color='red')#绘制出类别0和类别1
plt.scatter(X[y2+1:y1,0],X[y2+1:y1,1],color='blue')#绘制出类别0和类别1
plt.title("iris-1")
plt.show()
plt.show()
###归⼀化处理
standardScaler=StandardScaler()#标准化
standardScaler.fit(X)
#计算训练数据的均值和⽅差
X_ansform(X)
#⽤scaler中的均值和⽅差来转换X,使X标准化
svc=LinearSVC(C=1e9)
svc.fit(X_standard,Y1)
###画出决策边界(此时得到图2)
def plot_decision_boundary(model, axis):
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1],int((axis[1]-axis[0])*100)).reshape(-1,1),# 600个,影响列数
np.linspace(axis[2], axis[3],int((axis[3]-axis[2])*100)).reshape(-1,1),# 600个,影响⾏数
)
# x0 和 x1 被拉成⼀列,然后拼接成360000⾏2列的矩阵,表⽰所有点
X_new = np.c_[x0.ravel(), x1.ravel()]# 变成 600 * 600⾏, 2列的矩阵
y_predict = model.predict(X_new)# ⼆维点集才可以⽤来预测
zz = shape(x0.shape)# (600, 600)
lors import ListedColormap
custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
#输出的内容
# [[-3.        -3.        ]
# [-2.98998331 -3.        ]
# [-2.97996661 -3.        ]
# ...
# [ 2.97996661  3.        ]
# [ 2.98998331  3.        ]
# [ 3.          3.        ]]
plot_decision_boundary(svc, axis=[-3,3,-3,3])
plt.scatter(X_standard[0:y2,0], X_standard[0:y2,1],color='red')
plt.scatter(X_standard[y2:y1,0], X_standard[y2:y1,1],color='blue')
plt.title("iris-2")
plt.show()
###再画⼀个svc2(此时得到图3)
svc2=LinearSVC(C=0.01)
svc2.fit(X_standard,Y1)
f_)
print(svc2.intercept_)
plot_decision_boundary(svc2, axis=[-3,3,-3,3])
plt.scatter(X_standard[0:y2,0], X_standard[0:y2,1],color='red')
plt.scatter(X_standard[y2:y1,0], X_standard[y2:y1,1],color='blue')
plt.title("iris-3")
plt.show()
###对分好类的内容基础上添加上下边界(此时得到图4)
def plot_svc_decision_boundary(model, axis):
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1],int((axis[1]-axis[0])*100)).reshape(-1,1),# 600个,影响列数
np.linspace(axis[2], axis[3],int((axis[3]-axis[2])*100)).reshape(-1,1),# 600个,影响⾏数
)
# x0 和 x1 被拉成⼀列,然后拼接成360000⾏2列的矩阵,表⽰所有点
X_new = np.c_[x0.ravel(), x1.ravel()]# 变成 600 * 600⾏, 2列的矩阵
y_predict = model.predict(X_new)# ⼆维点集才可以⽤来预测
zz = shape(x0.shape)# (600, 600)
lors import ListedColormap
custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
w = f_[0]
b = model.intercept_[0]
index_x = np.linspace(axis[0], axis[1],100)
# f(x,y) = w[0]x1 + w[1]x2 + b    决策树形式
# 1 = w[0]x1 + w[1]x2 + b    上边界
# -1 = w[0]x1 + w[1]x2 + b  下边界
y_up =(1-w[0]*index_x - b)/ w[1]
y_down =(-1-w[0]*index_x - b)/ w[1]
x_index_up = index_x[(y_up<=axis[3])&(y_up>=axis[2])]
x_index_down = index_x[(y_down<=axis[3])&(y_down>=axis[2])]
y_up = y_up[(y_up<=axis[3])&(y_up>=axis[2])]
y_down = y_down[(y_down<=axis[3])&(y_down>=axis[2])]
plt.plot(x_index_up, y_up, color="black")#plt.plot(x,y,color=,lw=,label=),label是图像所带的标签    plt.plot(x_index_down, y_down, color="black")
# plot_svc_decision_boundary(svc, axis=[-3, 3, -3, 3])
# plt.scatter(X_standard[0:y2,0], X_standard[0:y2,1],color='red')
# plt.scatter(X_standard[y2:y1,0], X_standard[y2:y1,1],color='blue')
# plt.title("iris-4")
# plt.show()
###修改c值(此时得到图5)
plot_svc_decision_boundary(svc2, axis=[-3,3,-3,3])
plt.scatter(X_standard[0:y2,0], X_standard[0:y2,1],color='red')
plt.scatter(X_standard[y2:y1,0], X_standard[y2:y1,1],color='blue')
plt.show()
###求解精确值
import pandas as pd
iris_data = pd.read_csv('iris.csv')#由于这个数据没有列名,所以先给每个列取个名字。
lumns =['sepal_length','sepal_width','petal_length','petal_width','class']
print(iris_data.head(5))
iris_data.describe()
del_selection import train_test_split
all_x = iris_data[['sepal_length','sepal_width','petal_length','petal_width']]
all_y = iris_data['class']#.values
(training_x,testing_x,training_y,testing_y)=train_test_split(all_x,all_y,test_size=0.3,random_state=1)
def show_accuracy(y_hat,y_train,str):
pass
###⾼斯核函数
def SVM():
from sklearn import svm
classifier=svm.SVC(kernel='rbf',gamma=0.1,decision_function_shape='ovo',C=0.8)
#调⽤ravel()函数将矩阵转变成⼀维数组
classifier.fit(training_x,training_y.ravel())
print("⾼斯核数据集的准确率:", classifier.score(training_x, training_y))
y_hat = classifier.predict(training_x)
show_accuracy(y_hat, training_y,'训练集')
print("测试集的准确率:", classifier.score(testing_x, testing_y))
y_hat = classifier.predict(testing_x)
show_accuracy(y_hat, testing_y,'测试集')
SVM()
###线性核函数
def SVM():
from sklearn import svm
from sklearn import svm
classifier=svm.SVC(kernel='linear',gamma=0.1,decision_function_shape='ovo',C=0.8) #调⽤ravel()函数将矩阵转变成⼀维数组
classifier.fit(training_x,training_y.ravel())
print("线性核训练集的准确率:", classifier.score(training_x, training_y))
y_hat = classifier.predict(training_x)
show_accuracy(y_hat, training_y,'训练集')
print("测试集的准确率:", classifier.score(testing_x, testing_y))
y_hat = classifier.predict(testing_x)
show_accuracy(y_hat, testing_y,'测试集')
SVM()
运⾏结果:

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。