⽤python对excel进⾏数据处理与分析操作本⽂代码及数据集来⾃《超简单:⽤Python让Excel飞起来(实战150例)》
# 排序⼀个⼯作表中的数据(⽅法⼀)
import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
data = data.sort_values(by='利润', ascending=False)# 按照“利润”列进⾏降序排序
<_excel('销售表1.xlsx', sheet_name='总表', index=False)
# 排序⼀个⼯作表中的数据(⽅法⼆)
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').options(pd.DataFrame).value # 读取⼯作表中的数据,并将其转换为DataFrame格式result = data.sort_values(by='利润', ascending=False)# 对DataFrame中的“利润”列进⾏降序排序
worksheet.range('A1').value = result # 将排序后的数据写⼊⼯作表,这⾥是从单元格A1开始写⼊
workbook.save('销售表1.xlsx')
workbook.close()
app.quit()
# 排序⼀个⼯作簿中所有⼯作表的数据
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('各⽉销售数量表.xlsx')
worksheet = workbook.sheets
for i in worksheet:# 遍历⼯作表
data = i.range('A1').expand('table').options(pd.DataFrame).value
result = data.sort_values(by='销售数量', ascending=False)
i.range('A1').value = result
workbook.save('各⽉销售数量表1.xlsx')
workbook.close()
app.quit()
# 排序多个⼯作簿中同名⼯作表的数据
from pathlib import Path
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
folder_path = Path('各地区销售数量')
file_list = folder_path.glob('*.xls*')
for i in file_list:
workbook = app.books.open(i)
worksheet = workbook.sheets['销售数量']# 指定要排序的⼯作表
data = worksheet.range('A1').expand('table').options(pd.DataFrame).value
result = data.sort_values(by='销售数量', ascending=False)
worksheet.range('A1').value = result
workbook.save()
workbook.close()
app.quit()
# 根据单个条件筛选⼀个⼯作表中的数据
import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
pro_data = data[data['产品名称']=='离合器']# 在“产品名称”列中筛选“离合器”数据
num_data = data[data['销售数量']>=100]# 在“销售数量”列中筛选⼤于等于100的数据
_excel('离合器.xlsx', sheet_name='离合器', index=False)
_excel('销售数量⼤于等于100的记录.xlsx', sheet_name='销售数量⼤于等于100的记录', index=False)
# 根据多个条件筛选⼀个⼯作表中的数据
# 根据多个条件筛选⼀个⼯作表中的数据
import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
condition1 =(data['产品名称']=='转速表')&(data['销售数量']>=50)
condition2 =(data['产品名称']=='转速表')|(data['销售数量']>=50)
data1 = data[condition1]
data2 = data[condition2]
<_excel('销售表1.xlsx', sheet_name='与条件筛选', index=False)
<_excel('销售表2.xlsx', sheet_name='或条件筛选', index=False)
# 筛选⼀个⼯作簿中所有⼯作表的数据
import pandas as pd
all_data = pd.read_excel('办公⽤品采购表.xlsx', sheet_name=None)
with pd.ExcelWriter('筛选表.xlsx')as workbook:
for i in all_data:
data = all_data[i]
filter_data = data[data['采购物品']=='办公桌']# 筛选“采购物品”为“办公桌”的数据
_excel(workbook, sheet_name=i, index=False)# 存放在不同⼯作表中
# 筛选⼀个⼯作簿中所有⼯作表的数据并汇总
import pandas as pd
all_data = pd.read_excel('办公⽤品采购表.xlsx', sheet_name=None)
datas = pd.DataFrame()
for i in all_data:
data = all_data[i]
filter_data = data[data['采购物品']=='办公桌']
datas = pd.concat([datas, filter_data], axis=0)# 将筛选出的数据合并到第3⾏代码创建的DataFrame中_excel('办公桌.xlsx', sheet_name='办公桌', index=False)
# 分类汇总⼀个⼯作表
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').options(pd.DataFrame, dtype=float).value
result = upby('产品名称').sum()# 汇总依据为“产品名称”列
worksheet1 = workbook.sheets.add(name='分类汇总')# 新增⼀个名为“分类汇总”的⼯作表
worksheet1.range('A1').value = result[['销售数量','销售⾦额']]# 将指定列的汇总结果写⼊⼯作表“分类汇总”workbook.save('分类汇总表.xlsx')
workbook.close()
app.quit()
# 对⼀个⼯作表求和
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('办公⽤品采购表.xlsx')
worksheet = workbook.sheets['1⽉']
data = worksheet.range('A1').expand('table').options(pd.DataFrame).value
result = data['采购⾦额'].sum()
worksheet.range('B15').value ='合计'# 将⽂本“合计”写⼊单元格
worksheet.range('C15').value = result # 将求和结果写⼊单元格
python怎么读取excel的数据workbook.save('求和表.xlsx')
workbook.close()
app.quit()
# 对⼀个⼯作簿的所有⼯作表分别求和
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('办公⽤品采购表.xlsx')
worksheet = workbook.sheets
for i in worksheet:
data = i.range('A1').expand('table').options(pd.DataFrame).value
result = data['采购⾦额'].sum()
result = data['采购⾦额'].sum()
column = i.range('A1').expand('table').value[0].index('采购⾦额')+1# 获取采购⾦额列的列号
row = i.range('A1').expand('table').shape[0]# 获取最后⼀⾏的⾏号
i.range(row +1, column -1).value ='合计'# 将⽂本“合计”写⼊“采购⾦额”列前⼀列最后⼀个单元格下⽅的单元格
i.range(row +1, column).value = result # 将求和结果写⼊“采购⾦额”列最后⼀个单元格下⽅的单元格
workbook.save('求和表.xlsx')
workbook.close()
app.quit()
# 在⼀个⼯作表中制作数据透视表
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').options(pd.DataFrame, dtype=float).value
pivot = pd.pivot_table(data, values=['销售数量','销售⾦额'], index=['产品名称'], aggfunc={'销售数量':'sum','销售⾦额':'sum'}, fill_value=0, margins=True, ma rgins_name='合计')
worksheet1 = workbook.sheets.add(name='数据透视表')
worksheet1.range('A1').value = pivot
workbook.save('数据透视表.xlsx')
workbook.close()
app.quit()
# 使⽤相关系数判断数据的相关性
import pandas as pd
data = pd.read_excel('销售额统计表.xlsx', sheet_name=0, index_col='序号')
result = ()
print(result)
# 使⽤描述统计和直⽅图制定⽬标
import pandas as pd
import matplotlib.pyplot as plt
import xlwings as xw
data = pd.read_excel('员⼯销售业绩表.xlsx', sheet_name=0)
data_describe = data['销售额(万元)'].astype(float).describe()# 计算数据的个数、平均值、最⼤值和最⼩值等描述统计数据
data_cut = pd.cut(data['销售额(万元)'],6)# 将“销售额(万元)”列的数据分为6个均等的区间
data1 = pd.DataFrame()
data1['计数']= data['销售额(万元)'].groupby(data_cut).count()# 统计各个区间的⼈数
data2 = set_index()# 将⾏索引重置为数字序号
data2['销售额(万元)']= data2['销售额(万元)'].apply(lambda x:str(x))# 将“销售额”列的数据转换为字符串类型
figure = plt.figure()
n, bins, patches = plt.hist(data['销售额(万元)'], bins=6, edgecolor='black', linewidth=1)
plt.title('员⼯销售业绩频率分析')
plt.xlabel('销售额(万元)')
plt.ylabel('频数')
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('员⼯销售业绩表.xlsx')
worksheet = workbook.sheets[0]
worksheet.range('E1').value = data_describe # 将计算出的描述统计数据写⼊指定⼯作表
worksheet.range('H1').value = data2 # 将销售额的区间以及区间的⼈数写⼊指定⼯作表
worksheet.pictures.add(figure, name='图⽚1', update=True, left=400, top=200)# 将直⽅图的图⽚插⼊指定⼯作表
worksheet.autofit()# ⾃动调整⾏⾼列宽
workbook.save('描述统计.xlsx')
workbook.close()
app.quit()
# 拟合回归⽅程并判断拟合程度
import pandas as pd
from sklearn import linear_model
df = pd.read_excel('各⽉销售额与⼴告费⽀出表.xlsx', sheet_name=0)
x = df[['视频门户⼴告费(万元)','电视台⼴告费(万元)']]# 选取作为⾃变量的列数据
y = df['销售额(万元)']
y = df['销售额(万元)']
model = linear_model.LinearRegression()
model.fit(x, y)
R2 = model.score(x, y)
print(R2)
# 使⽤回归⽅程预测未来值
import pandas as pd
from sklearn import linear_model
df = pd.read_excel('各⽉销售额与⼴告费⽀出表.xlsx', sheet_name=0) x = df[['视频门户⼴告费(万元)','电视台⼴告费(万元)']]
y = df['销售额(万元)']
model = linear_model.LinearRegression()
model.fit(x, y)
coef = f_ # 获取⽅程中各变量的系数
model_intercept = model.intercept_ # 获取⽅程截距
equation =f'y={coef[0]}*x1+{coef[1]}*x2{model_intercept:+}'
print(equation)# 输出线性回归⽅程
x1 =40
x2 =30
y = coef[0]* x1 + coef[1]* x2 + model_intercept
print(y)

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。