python处理word表格excel_python读取word表格内容并写⼊到
excel。。。
1.环境 安装
pip install pywin32
pip install python-docx
2.关键代码,根据表格标题搜索内容
#根据标题获取同⼀⾏的值
def find_row_value(d,sts):
lists=[]
tlen=len(d.tables)
#print("tlen"+str(tlen))
for i in range(0,tlen): #获取表格数
d.tables[i]
for j in range(0,len(d.tables[i].rows)): #获取所有⾏数
for c in range(0,d.tables[i]._column_count): #获取所有的列数
#print("表格⾏数和列数:"+str(i)+":"+str(j)+":"+str(c))
tests=d.tables[i].cell(j,c).text
if tests==sts:
#print(tests)
#return i,j,c
lists.append(d.tables[i].cell(j+1,c).text)
else:
pass
#print("没有到字符串:"+sts)
#print(tests)
return lists
#根据标题获取同⼀列的值
def find_cel_value(d,sts):
lists=[]
tlen=len(d.tables)
#print("tlen"+str(tlen))
for i in range(0,tlen): #获取表格数
d.tables[i]
for j in range(0,len(d.tables[i].rows)): #获取所有⾏数
for c in range(0,d.tables[i]._column_count): #获取所有的列数#print("表格⾏数和列数:"+str(i)+":"+str(j)+":"+str(c))
tests=d.tables[i].cell(j,c).text
if tests==sts:
#print(tests)
#return i,j,c
lists.append(d.tables[i].cell(j,c+1).text)
else:
pass
#print("没有到字符串:"+sts)
#print(tests)
return lists
3.使⽤代码如下:
#coding:utf-8
import os
import win32com
import win32api
from win32com.client import Dispatch, constants
from docx import Document
def parse_docx(f,workbook,num):
python怎么读入exceld = Document(f)
tabs=d.tables
#获取name
name=find_row_value(d,"⽹站名称")
print(name)
url=find_row_value(d,"url")
print(url)
fx_name=find_cel_value(d,"名称")
print(type(fx_name[0]))
for i in range(0,len(fx_name)):
xuhao=num-1
xuhao=str(xuhao)
fx_name_str=fx_name[i]
fx_ms_str=fx_ms[i]
write_excel(workbook,num,name[0],xuhao,url[0],fx_name_str) num=num+1
def write_excel(workbook,i_row,name,xuhao,url,fx_name1): try:
first_sheet=workbook.Worksheets(1)
first_sheet.Cells(i_row,1).value=name
first_sheet.Cells(i_row,2).value=xuhao
first_sheet.Cells(i_row,3).value=url
first_sheet.Cells(i_row,4).value=fx_name1
print('成功写⼊:'+name+" 的信息","这是第"+str(i_row)+"个") workbook.Save()
except TypeError as e:
print(e)
#获取hang的值
def find_row_value(d,sts):
lists=[]
tlen=len(d.tables)
for i in range(0,tlen): #获取表格数
d.tables[i]
for j in range(0,len(d.tables[i].rows)): #获取所有⾏数
for c in range(0,d.tables[i]._column_count): #获取所有的列数
#print("表格⾏数和列数:"+str(i)+":"+str(j)+":"+str(c))
tests=d.tables[i].cell(j,c).text
if tests==sts:
lists.append(d.tables[i].cell(j+1,c).text)
else:
pass
return lists
#获取列的值
def find_cel_value(d,sts):
lists=[]
tlen=len(d.tables)
for i in range(0,tlen): #获取表格数
d.tables[i]
for j in range(0,len(d.tables[i].rows)): #获取所有⾏数
for c in range(0,d.tables[i]._column_count): #获取所有的列数#print("表格⾏数和列数:"+str(i)+":"+str(j)+":"+str(c))
tests=d.tables[i].cell(j,c).text
if tests==sts:
lists.append(d.tables[i].cell(j,c+1).text)
else:
pass
return lists
''' 上述函数主要实现⽂件的读取 '''
if __name__ == "__main__":
w = win32com.client.Dispatch('Word.Application')
#excel
excel = win32com.client.Dispatch('Excel.Application') workbook=excel.Workbooks.open('D:\\test\\2.xlsx')
excel.Visible=False
#获取从第⼏⾏开始写⼊
rember_sheet=workbook.Worksheets(1)
for rember_ever in range(3,1000):
Value=rember_sheet.Cells(rember_ever,2).value
if(Value==None):
break
i=rember_ever
print("从上次的第",i,"⾏录⼊\n")
# 遍历⽂件
PATH = "D:\\test" # windows⽂件路径
doc="1.docx"
parse_docx(PATH+'\\'+doc,workbook,i)
#关闭进程
excel.Quit()
w.Quit()
参考链接:
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论