(一)、python读取数据方法总结--688IT编程网

（⼀）、python读取数据⽅法总结1、python读取数据⽅法

1.1读取 csv⽂件。⽤到pandas库中的read_csv()函数，格式如下：

**读取格式：

import pandas as pd

csv_data= pd.read_csv(’/路径/⽂件名.csv’)

（注意：安装pandas库:pip install pandas，(⽤annaconda不⽤安装此模块，即导⼊模块即可)）1.2读取txt⽂件。格式如下：

读取格式：f= open(’/路径/⽂件名.txt’, ‘r’)

实例1：⽤梯度下降法，代码如下

import numpy as np

import matplotlib.pyplot as plt

def load_dataset():

"""

前两⾏两个值分别是X1和X2，第三个是标签类别

"""

data_mat = []

label_mat = []

# 读取格式：-0.017612 14.053064 0

with open('', 'r') as f:

for line adlines():#按⾏读⽂件

# 拆分放到arr⾥⾯

line_arr = line.strip().split()

# 第⼀列是1

data_mat.append([1., float(line_arr[0]), float(line_arr[1])])

# 标签变成2

label_mat.append(int(line_arr[2]))

return data_mat, label_mat

print(data_mat)

def sigmoid(inx):

# 那个s形的函数

return 1.0 / (1 + np.exp(-inx))

def grad_ascent(data_mat, class_labels):

"""

梯度上升

param data_mat:

:param class_labels:

:return:

"""

# matrix格式

data_matrix = np.mat(data_mat) # 2D np array，列是特征，⾏是样本

# 也是matrix格式，并站了起来

label_mat = np.mat(class_labels).transpose() # 转置，⾏变列

m, n = np.shape(data_matrix)

alpha = 0.001 # lr

max_cycles = 500 # iter

weights = np.ones((n, 1))

# 保存weight

weights_history = np.zeros((max_cycles, n))

for k in range(max_cycles):

# alpha = 0.004 / (1.0 + k) + 0.001

h = sigmoid(data_matrix * weights) # 是⼀个向量

error = (label_mat - h) # 真是简陋

# 每次都撸⼀个进去

weights = weights + alpha * anspose() * error weights_history[k, :] = anspose()

# 根本就不收敛

return weights, weights_history

print(weights)

print(weights_history)

def stoc_grad_ascent(data_mat, class_labels, iter_counts=20): m, n = np.shape(data_mat)

# alpha = 0.01 # 改进前

weights = np.ones(n)

# 迭代次数

# iter_counts = 20

# 存放历史

weights_history = np.zeros((m * iter_counts, n))

# 只跑那么多样本

for j in range(iter_counts):

data_index = list(range(m))

for i in range(m):

# 模拟退⽕常见？

alpha = 4 / (1.0 + j + i) + 0.01 # 改进后，减少波动

# 下⾯这种改动似乎不靠谱

# rand_index = int(random.uniform(0,len(data_index))) h = sigmoid(np.sum(data_mat[i] * weights))

error = class_labels[i] - h

weights = weights + alpha * error * data_mat[i]

weights_history[i + j * m, :] = weights

# data_index.pop(rand_index)

return weights, weights_history

def plot_data(weights):

#import matplotlib.pylab as plt

# 原本的weights是matrix，要转换为ndarray

weights = np.squeeze(np.asarray(weights))

data_mat, label_mat = load_dataset()

data_arr = np.array(data_mat)

n = np.shape(data_arr)[0]

xcord1, ycord1, xcord2, ycord2 = [], [], [], []

for i in range(n):

# 分两类

if int(label_mat[i]) == 1:

xcord1.append(data_arr[i, 1])

ycord1.append(data_arr[i, 2])

else:

xcord2.append(data_arr[i, 1])

ycord2.append(data_arr[i, 2])

# 创建画板

fig = plt.figure()

# 设置布局

ax = fig.add_subplot(111)

# 散点图

ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')

ax.scatter(xcord2, ycord2, s=30, c='green')

x = np.arange(-3.0, 3.0, 0.1)

y = (-weights[0] - weights[1] * x) / weights[2]

# 连续的线段

ax.plot(x, y)

plt.xlabel('X1')

plt.ylabel('X2')

plt.show()

def plot_weights_update(weights_history):

"""⽤来画权重的更新图"""

fig = plt.figure()

# 三⾏⼀列的第⼀⾏

ax = fig.add_subplot(311)

type1 = ax.plot(weights_history[:, 0])

plt.ylabel('X0')

ax = fig.add_subplot(312)

type2 = ax.plot(weights_history[:, 1])

plt.ylabel('X1')

ax = fig.add_subplot(313)

type3 = ax.plot(weights_history[:, 2])

plt.xlabel('iteration')

plt.ylabel('X2')

plt.show()

if __name__ == '__main__':

data_arr, label_mat = load_dataset() # 加载数据

weights1, weights_history1 = grad_ascent(data_arr, label_mat) # 梯度上升

plot_weights_update(weights_history1) # 梯度

plot_data(weights1) # 梯度

pass

运⾏如下：python怎么读取excel某一列

1.3读取excel⽂件。⽤到xlrd库（pip install xlrd安装，导⼊模块import xlrd）。

读取格式：

import xlrd

data=xlrd.open_workbook(r’\路径\⽂件名.xlsx’,formatting_info=True) table =data.sheet_by_name(“Sheet3”)

import pandas as pd

df = pd.read_excel("\路径\⽂件名.xlsx",sheetname=“Sheet3”)

实例2，代码如下：

#导⼊需要的库

import numpy as np

from numpy.linalg import inv

import pandas as pd

import matplotlib.pyplot as plt

from sklearn import linear_model

#读取数据

ad_excel("data.xlsx")

temp=dataset.iloc[:,2:5]#对数据进⾏处理

temp['x0']=1

X=temp.iloc[:,[3,0,1,2]]

Y=dataset.iloc[:,1].shape(19,1)

clf=linear_model.LinearRegression()

clf.fit(X,Y)#模型训练

plt.xlabel('X3')

plt.ylabel('Y')

plt.scatter(dataset.X3,dataset.Y)#三点图

运⾏以上代码如下图：

感悟：

“只有你⾃⼰才是⾃⼰⼈⽣的主宰，要懂得“舍弃”，和“放下”的重要性。少⼀点纠缠，才会对⼀点⾃在，也唯有这样，你才能真正放过⾃⼰”

688IT编程网

(一)、python读取数据方法总结

发表评论

推荐文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

热门文章

随机森林算法的改进方法

基于随机森林算法的风险预警模型研究

Python中的随机森林算法详解

随机森林发展历史

如何使用随机森林进行时间序列数据模式识别(八)

随机森林回归模型原理

如何使用随机森林进行时间序列数据模式识别(六)

如何使用随机森林进行时间序列数据预测(四)

如何使用随机森林进行异常检测(六)

随机森林算法和grandientboosting算法 -回复

随机森林方法总结全面

随机森林算法原理和步骤

随机森林的原理

随机森林重要性

随机森林算法

机器学习中随机森林的原理

随机森林算法原理

使用计算机视觉技术进行动物识别的技巧

基于crf命名实体识别实验总结

transformer预测模型训练方法

最新文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

随机森林结合直接正交信号校正的模型传递方法

标签列表

688IT编程网

(一)、python读取数据方法总结

发表评论

推荐文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

热门文章

随机森林算法的改进方法

基于随机森林算法的风险预警模型研究

Python中的随机森林算法详解

随机森林发展历史

如何使用随机森林进行时间序列数据模式识别(八)

随机森林回归模型原理

如何使用随机森林进行时间序列数据模式识别(六)

如何使用随机森林进行时间序列数据预测(四)

如何使用随机森林进行异常检测(六)

随机森林算法和grandientboosting算法 -回复

随机森林方法总结全面

随机森林算法原理和步骤

随机森林的原理

随机森林 重要性

随机森林算法

机器学习中随机森林的原理

随机森林算法原理

使用计算机视觉技术进行动物识别的技巧

基于crf命名实体识别实验总结

transformer预测模型训练方法

最新文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

随机森林结合直接正交信号校正的模型传递方法

标签列表

随机森林重要性