Python 实战之Excel 数据按索引更新
函数名函数功能
get_str_for_cell(cell_value)把传⼊的内容转换为字符串格式,主要
针对浮点型数据。
在python读取Excel单元格时,会把数
值读为浮点数,此处⽅法重新转换为整
数型的字符串
get_saveas_name(origin_name)把传⼊的⽂件名改名为带时间戳,例如
原⽂件名为abc.xls,则返回
abc_2018-11-13-10-10-07.xls。
get_mainpara()获config⽂件中main的sheet页的参数,该
信息是源数据和⽬标数据的定位信息。
get_optionpara()获config⽂件中option的sheet页的参数,
该信息是数据替换的参数,⽐如是否进⾏
force替换。
get_reference_dict(main_paras)根据main参数获取到reference数据的数据
字典。
update_xlsx_file(main_paras,
option_paras, reference_dict)更新destination的表格信息
在⽇常⼯作中,我们经常需要需要批量更新数据,⽐如有个destination表,⾥⾯有⼀列的数据需要被更新,更新的依据为reference
表,python脚本执⾏前和执⾏后的数据列⽰意图如下:
我们使⽤Excel⽂件作为config参数表,reference和destination也使⽤Excel作为数据,其中config参数如下图,python例⼦读取该⽂
件中的参数,获取各个参数的值,从⽽获取源数据和⽬的数据信息。
实现代码设计为⼏个函数,其各个功能如下:
整个功能实现代码如下:
import xlrd
py
import time
import datetime
def get_str_for_cell(cell_value):
if isinstance(cell_value, float):
if cell_value == int(cell_value):
cell_value = int(cell_value)
return str(cell_value)
def get_saveas_name(origin_name):
now_time = w().strftime('%Y-%m-%d-%H-%M-%S')
if origin_name.rfind('.'):
return origin_name.split('.')[0] + "_" + str(now_time) + "." + origin_name.split('.')[1]
def get_mainpara():
main_paras = {}
wb = xlrd.open_workbook("config.xlsx")
ws = wb.sheet_by_name(u"main")
rown = 1; coln = 1
main_paras["ReferenceFileName"] = ws.cell_value(rown,coln)
rown = 2
main_paras["ReferenceSheetName"] = ws.cell_value(rown,coln)
rown = 3
main_paras["ReferenceColumnName"] = ws.cell_value(rown,coln)
rown = 4
main_paras["ReferenceDataColumnName"] = ws.cell_value(rown,coln)
rown = 5
main_paras["DestinationFileName"] = ws.cell_value(rown,coln)
rown = 6
main_paras["DestinationSheetName"] = ws.cell_value(rown,coln)
rown = 7
main_paras["DestinationColumnName"] = ws.cell_value(rown,coln)
rown = 8
main_paras["DestinationDataColumnName"] = ws.cell_value(rown,coln)
wb = xlrd.open_workbook(main_paras["ReferenceFileName"])
ws = wb.sheet_by_name(main_paras["ReferenceSheetName"])
reference_column_index = get_column_index(ws, main_paras["ReferenceColumnName"])
reference_data_column_index = get_column_index(ws, main_paras["ReferenceDataColumnName"]) main_paras["reference_column_index"] = reference_column_index
main_paras["reference_data_column_index"] = reference_data_column_index
wb = xlrd.open_workbook(main_paras["DestinationFileName"])
ws = wb.sheet_by_name(main_paras["DestinationSheetName"])
dest_column_index = get_column_index(ws, main_paras["DestinationColumnName"])
dest_data_column_index = get_column_index(ws, main_paras["DestinationDataColumnName"])
main_paras["dest_column_index"] = dest_column_index
main_paras["dest_data_column_index"] = dest_data_column_index
return main_paras
def get_optionpara():
option_paras = {}
wb = xlrd.open_workbook("config.xlsx")
wb = xlrd.open_workbook("config.xlsx")
ws = wb.sheet_by_name(u"option")
rown = 1; coln = 1
option_paras["ForceReplace"] = ws.cell_value(rown,coln)
return option_paras
def get_column_index(table, column_name):
column_index = -1
for i in ls):
ll_value(0, i) == column_name):
column_index = i
break
return column_index
def get_reference_dict(main_paras):
reference_dict = {}
wb = xlrd.open_workbook(main_paras["ReferenceFileName"])
ws = wb.sheet_by_name(main_paras["ReferenceSheetName"])
reference_column_index = main_paras["reference_column_index"]
reference_data_column_index = main_paras["reference_data_column_index"]
num_rows = ws.nrows
for rown in range(num_rows):
if rown == 0:
continue
reference_dict[get_str_for_ll_value(rown, reference_column_index))] = get_str_for_ll_value(rown, reference_data_column_index)) return reference_dict
def update_xlsx_file(main_paras, option_paras, reference_dict):
rb = xlrd.open_workbook(main_paras["DestinationFileName"], formatting_info = True)
wb = py(rb)
ws_origin = rb.sheet_by_name(main_paras["DestinationSheetName"])
ws = wb.get_sheet(main_paras["DestinationSheetName"])
dest_column_index = main_paras["dest_column_index"]
dest_data_column_index = main_paras["dest_data_column_index"]
writen_count = 0
num_rows = ws
for rown in range(num_rows):
if rown < 5:
continue
key_cell = get_str_for_cell(ll_value(rown, dest_column_index))
if key_cell not in reference_dict:
print("error! can't find the value for key:", key_cell)
continue
data_value = (key_cell)
data_value_old = ll_value(rown, dest_data_column_index)
if data_value == data_value_old:
continue
ws.write(rown, dest_data_column_index, data_value)
ws.write(rown, 1, "M")
writen_count = writen_count + 1
print("totally modified rows:", writen_count)
wb.save(get_saveas_name(main_paras["DestinationFileName"]))
return
print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), "PlanDataReplacer started work, ")
python怎么读取xls文件main_paras = get_mainpara()
print("main_paras:", main_paras)
print("main_paras:", main_paras)
option_paras = get_optionpara()
print("option_paras:", option_paras)
reference_dict = get_reference_dict(main_paras)
print("reference_dict length:", len(reference_dict))
#print(reference_dict)
update_xlsx_file(main_paras, option_paras, reference_dict)
print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), "PlanDataReplacer work complete.") 执⾏后打印信息如下,则说明有479⾏数据已被更新。
如果您喜欢这篇⽂章,别忘了点赞和评论哦!
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论