matplotlib.pyplot绘制决策树的准确率,召回率,ROC,特征
重要性
因为训练模型需要返回模型评价指标,但是召回率和ROC 不能很好的展⽰返回,所以决定把相关评价指标绘制成图⽚
DecisionTree2Pmml.py
import sys
import os
curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)
del_selection import train_test_split
ics import classification_report,accuracy_score, auc, confusion_matrix, f1_score, precision_score, recall_score,roc_curve
import prettytable # 导⼊表格库
import matplotlib.pyplot as plt # 导⼊图形展⽰库
from time import sleep
# 导⼊指标库
import numpy as np
def main(modelName,trainingFilePath):
print(modelName)
print(trainingFilePath)
import pandas
iris_df = ad_csv(trainingFilePath)
# print(iris_df.head(2))
x=iris_df[lumns.difference(["response"])]
y=iris_df["response"]
x_train,x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state=0);
import DecisionTreeClassifier
from sklearn2pmml.pipeline import PMMLPipeline
# DecisionTreeClassifier( max_depth=10)
cur_tree=DecisionTreeClassifier()
pipeline = PMMLPipeline([
("classifier", cur_tree)
])
pipeline.fit(x_train,y_train)
anwser=cur_tree.predict(x_test)
#准确率:某个类别在测试结果中被正确测试的⽐率
mean=float('%.4f' % np.mean(anwser == y_test))
# 召回率:某个类别在真实结果中被正确预测的⽐率
recall_str=classification_report(y_test,anwser)
# 核⼼评估指标
y_score = cur_tree.predict_proba(x_test) # 获得决策树的预测
fpr, tpr, thresholds = roc_curve(y_test, y_score[:, 1]) # ROC
# # 模型效果可视化
names_list =(lumns.difference(["response"])).tolist() # 分类模型维度列表
color_list = ['r', 'c', 'b', 'g'] # 颜⾊列表
plt.figure(figsize=(10, 5)) # 创建画布
# ⼦⽹格1:ROC曲线
plt.subplot(2, 2, 1) # 第⼀个⼦⽹格
plt.plot(fpr, tpr, label='ROC') # 画出ROC曲线
plt.plot([0, 1], [0, 1], linestyle='--', color='k', label='random chance') # 画出随机状态下的准确率线
plt.title('ROC') # ⼦⽹格标题
plt.xlabel('false positive rate') # X轴标题
plt.xlabel('false positive rate') # X轴标题
plt.ylabel('true positive rate') # y轴标题
plt.legend(loc=0)
# ⼦⽹格2:指标重要性
matplotlib中subplotfeature_importance = cur_tree.feature_importances_ # 获得指标重要性
plt.subplot(2, 2, 2) # 第⼆个⼦⽹格
plt.bar(np.arange(feature_importance.shape[0]), feature_importance, tick_label=names_list, color=color_list) # 画出条形图
plt.title('feature importance') # ⼦⽹格标题
plt.xlabel('features') # x轴标题
plt.ylabel('importance') # y轴标题
plt.suptitle('classification result') # 图形总标题
plt.savefig("E:/data/out/examples.png")
# ⼦⽹格3:
plt.subplot(2, 2, 3) # 第⼆个⼦⽹格
plt.axis('off')
plt.title('mean:'+str(mean),loc ='center') # ⼦⽹格标题
plt.subplot(2, 2, 4) # 第⼆个⼦⽹格
plt.axis('off')
plt.title(recall_str,loc ='right')
f()
#⾃动调整绘图区的⼤⼩及间距,在cmd窗⼝调⽤脚本时绘制图位置会错乱
fig.tight_layout()
fig.savefig("E:/data/out/examples.png")
plt.close()
from sklearn2pmml import sklearn2pmml
sklearn2pmml(pipeline, modelName, with_repr=True)
DecisionTree_main.py
# coding=utf-8
import sys
import model_pmml.DecisionTree2Pmml as m
#sys.argv[0] 为脚本⾃⾝
#m.main(sys.argv[1],sys.argv[2])
# m.main("E:/data/out/DecisionTreeIris.pmml","E:/data/yanshi/Iris.csv")
m.main("E:/data/out/DecisionTreeIris.pmml","E:/data/classification.csv")
效果图如下:
附带训练集的csv⽂件
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论