(⼀)、python读取数据⽅法总结1、python读取数据⽅法
1.1读取 csv⽂件。⽤到pandas库中的read_csv()函数,格式如下:
**读取格式:
import pandas as pd
csv_data= pd.read_csv(’/路径/⽂件名.csv’)
(注意:安装pandas库:pip install pandas,(⽤annaconda不⽤安装此模块,即导⼊模块即可))1.2读取txt⽂件。格式如下:
读取格式:f= open(’/路径/⽂件名.txt’, ‘r’)
实例1:⽤梯度下降法,代码如下
import numpy as np
import matplotlib.pyplot as plt
def load_dataset():
"""
前两⾏两个值分别是X1和X2,第三个是标签类别
"""
data_mat = []
label_mat = []
# 读取格式:-0.017612 14.053064 0
with open('', 'r') as f:
for line adlines():#按⾏读⽂件
# 拆分放到arr⾥⾯
line_arr = line.strip().split()
# 第⼀列是1
data_mat.append([1., float(line_arr[0]), float(line_arr[1])])
# 标签变成2
label_mat.append(int(line_arr[2]))
return data_mat, label_mat
print(data_mat)
def sigmoid(inx):
# 那个s形的函数
return 1.0 / (1 + np.exp(-inx))
def grad_ascent(data_mat, class_labels):
"""
梯度上升
:
param data_mat:
:param class_labels:
:return:
"""
# matrix格式
data_matrix = np.mat(data_mat)  # 2D np array,列是特征,⾏是样本
# 也是matrix格式,并站了起来
label_mat = np.mat(class_labels).transpose()  # 转置,⾏变列
m, n = np.shape(data_matrix)
alpha = 0.001  # lr
max_cycles = 500  # iter
weights = np.ones((n, 1))
# 保存weight
weights_history = np.zeros((max_cycles, n))
for k in range(max_cycles):
# alpha = 0.004 / (1.0 + k) + 0.001
h = sigmoid(data_matrix * weights)  # 是⼀个向量
error = (label_mat - h)  # 真是简陋
# 每次都撸⼀个进去
weights = weights + alpha * anspose() * error        weights_history[k, :] = anspose()
# 根本就不收敛
return weights, weights_history
print(weights)
print(weights_history)
def stoc_grad_ascent(data_mat, class_labels, iter_counts=20):    m, n = np.shape(data_mat)
# alpha = 0.01  # 改进前
weights = np.ones(n)
# 迭代次数
# iter_counts = 20
# 存放历史
weights_history = np.zeros((m * iter_counts, n))
# 只跑那么多样本
for j in range(iter_counts):
data_index = list(range(m))
for i in range(m):
# 模拟退⽕常见?
alpha = 4 / (1.0 + j + i) + 0.01  # 改进后,减少波动
# 下⾯这种改动似乎不靠谱
# rand_index = int(random.uniform(0,len(data_index)))            h = sigmoid(np.sum(data_mat[i] * weights))
error = class_labels[i] - h
weights = weights + alpha * error * data_mat[i]
weights_history[i + j * m, :] = weights
# data_index.pop(rand_index)
return weights, weights_history
def plot_data(weights):
#import matplotlib.pylab as plt
# 原本的weights是matrix,要转换为ndarray
weights = np.squeeze(np.asarray(weights))
data_mat, label_mat = load_dataset()
data_arr = np.array(data_mat)
n = np.shape(data_arr)[0]
xcord1, ycord1, xcord2, ycord2 = [], [], [], []
for i in range(n):
# 分两类
if int(label_mat[i]) == 1:
xcord1.append(data_arr[i, 1])
ycord1.append(data_arr[i, 2])
else:
xcord2.append(data_arr[i, 1])
ycord2.append(data_arr[i, 2])
# 创建画板
fig = plt.figure()
# 设置布局
ax = fig.add_subplot(111)
# 散点图
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = np.arange(-3.0, 3.0, 0.1)
y = (-weights[0] - weights[1] * x) / weights[2]
# 连续的线段
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
def plot_weights_update(weights_history):
"""⽤来画权重的更新图"""
fig = plt.figure()
# 三⾏⼀列的第⼀⾏
ax = fig.add_subplot(311)
type1 = ax.plot(weights_history[:, 0])
plt.ylabel('X0')
ax = fig.add_subplot(312)
type2 = ax.plot(weights_history[:, 1])
plt.ylabel('X1')
ax = fig.add_subplot(313)
type3 = ax.plot(weights_history[:, 2])
plt.xlabel('iteration')
plt.ylabel('X2')
plt.show()
if __name__ == '__main__':
data_arr, label_mat = load_dataset()  # 加载数据
weights1, weights_history1 = grad_ascent(data_arr, label_mat)  # 梯度上升
plot_weights_update(weights_history1)  # 梯度
plot_data(weights1)  # 梯度
pass
运⾏如下:python怎么读取excel某一列
1.3读取excel⽂件。⽤到xlrd库(pip install xlrd安装,导⼊模块import xlrd)。
读取格式:
import xlrd
data=xlrd.open_workbook(r’\路径\⽂件名.xlsx’,formatting_info=True) table =data.sheet_by_name(“Sheet3”)
or
import pandas as pd
df = pd.read_excel("\路径\⽂件名.xlsx",sheetname=“Sheet3”)
实例2,代码如下:
#导⼊需要的库
import numpy as np
from numpy.linalg import inv
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
#读取数据
ad_excel("data.xlsx")
temp=dataset.iloc[:,2:5]#对数据进⾏处理
temp['x0']=1
X=temp.iloc[:,[3,0,1,2]]
Y=dataset.iloc[:,1].shape(19,1)
clf=linear_model.LinearRegression()
clf.fit(X,Y)#模型训练
plt.xlabel('X3')
plt.ylabel('Y')
plt.scatter(dataset.X3,dataset.Y)#三点图
运⾏以上代码如下图:
感悟:
“只有你⾃⼰才是⾃⼰⼈⽣的主宰,要懂得“舍弃”,和“放下”的重要性。少⼀点纠缠,才会对⼀点⾃在,也唯有这样,你才能真正放过⾃⼰”

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。