python,⽐较两个xml
实现的功能是从两个xml读取多个⼦节点和属性,⽐较相同⼦节点是否相同,将不同的内容保存⽣成csv⽂件。
# -*- coding: UTF-8 -*-
#作⽤:查询表⼀表⼆的不同内容,表1相对于表2 ,表2相对于表1
#data1⽣成名字的list2
from __future__ import division
import csv
import re
import os
import math
import sys
from bs4 import BeautifulSoup
from tkinter.filedialog import askopenfilename
import pandas as pd
#定义进度条
def progressbar(cur, total):
percent = '{:.2%}'.format(cur / total)
sys.stdout.write('\r')
sys.stdout.write("[%-50s] %s" % ('=' * int(math.floor(cur * 50 / total)),
percent))
sys.stdout.flush()
print('第⼀份xml⽂件')
filename1 = askopenfilename(filetypes=( ("Text file", "*.xml*"),("HTML files", "*.html;*.htm")))
print('第⼆份xml⽂件')
filename2 = askopenfilename(filetypes=( ("Text file", "*.xml*"),("HTML files", "*.html;*.htm")))
print('处理中,⼤约20来秒,处理结果在D盘diff⽂件夹内')
print('处理进度')
progressbar(1, 100)
#创建⽂件夹⽬录
if(ists('D:\diff')):
pass
else:
os.mkdir("D:\diff")
#xml2csv 把xml⽂件转换成csv
header = ['Name', 'InitialValue', 'BachmannAddress', 'CalibrationExpression', 'IOInitialValue']
def parse_channel(tag):
name = tag.find("Name")
initial_value = tag.find("InitialValue")
bachmann_address = tag.find("BachmannAddress")
calibration_expression = tag.find("CalibrationExpression")
io_initial_value = tag.find("IOInitialValue")
return {
"Name": if name else"",
"InitialValue": if initial_value else"",
"BachmannAddress": if bachmann_address else"",
"CalibrationExpression": if calibration_expression else"",
"IOInitialValue": io_ if io_initial_value else"",
}
def dump_file(data_list, file_name):
df = pd.DataFrame(data_list)
# 去除空⾏
df.dropna()
# 去重
df.drop_duplicates()
#df.to_csv(file_name, index=False, header=header)
<_csv(file_name, index=False, columns=header)
def xml2csv(input_file, output_file):
raw_text = open(input_file,'rb').read()
soup = BeautifulSoup(raw_text, "xml")
data_list = [parse_channel(i) for i in soup.find("Channels").find_all("Channel")]
dump_file(data_list=data_list, file_name=output_file)
# xml2csv(input_file="l", output_file="post.csv")
def zhaozuzhi(a,b): #a是多的那⼀个,b是少的
t=[]
for i in range(len(a)):
for j in b:
if a[i][0]==j:
t.append([a[i]])
return t
#从三个列表中依次读取第⼀个存⼊⼀⾏存⼊⼀个list  形成name,data1,data2的模样def walk_list(a,b,c):
list=[]
for i in range(len(a)):
list.append([a[i],b[i],c[i]])
return list
#将xml转换成csv
xml2csv(input_file=filename1, output_file="D:/data1.csv")
xml2csv(input_file=filename2, output_file="D:/data2.csv")
#读取第⼀个csv,⽣成以name名字为元素的list2
progressbar(30, 100)
with open('D:\data1.csv','r') as f:
ader(f)
list1=[row for row in readers1]
list2=[]
for i in range(len(list1)):
list2.append(list1[i][0])
#读取第⼆个csv,⽣成以name名字为元素的list4
progressbar(40, 100)
with open('D:\data2.csv','r') as f1:
ader(f1)
list3=[row for row in readers1]
list4=[]
for j in range(len(list3)):
list4.append(list3[j][0])
progressbar(50, 100)
# list2有,list4没有,即为,data1有,data2没有的减少的
ret1 = [i for i in list2 if i not in list4]
# list4没有,list2有,即为,data1没有,data2有,增加的
ret2 = [i for i in list4 if i not in list2]
#list2有,list4也有,即为相同的部分
ret3=[i for i in list2 if i in list4]
#a11 是list1相同部分⼤list,b11是相同部分⼤list
file = open('D:\', 'w')
for i in  ret1:
file.write(str(i) +','+ '\n')
file.close()
file = open('D:\', 'w')
for i in  ret2:
file.write(str(i) +','+ '\n')
file.close()
a11=zhaozuzhi(list1,ret3)
b11=zhaozuzhi(list3,ret3)
file = open('D:\', 'w')
for i in  a11:
file.write(str(i) +','+ '\n')
file.close()
file = open('D:\', 'w')
for i in  b11:
file.write(str(i) +','+ '\n')
file.close()
lines=open('D:\').readlines()
fp=open('D:\diff/temp1.csv', 'w')
for s in lines:
fp.write( s.replace('[[','').replace(']]',''))  # replace是替换,write是写⼊
fp.close()  # 关闭⽂件
progressbar(70, 100)
lines=open('D:\').readlines()
fp=open('D:\diff/temp2.csv', 'w')
for s in lines:
fp.write( s.replace('[[','').replace(']]',''))  # replace是替换,write是写⼊
fp.close()  # 关闭⽂件
with open('D:\diff/temp1.csv','r') as f:
ader(f)
a11=[row for row in readers1]
#print(list1)
# list2=[]
# for i in range(len(list1)):
#    list2.append(list1[i][0])
with open('D:\diff/temp2.csv','r') as f:
ader(f)
b11=[row for row in readers1]
#print(list2)
# print(len(a11))
# print(len(b11))
#四个name,空列表,⽤来放出不同的name
name1=[]
name2=[]
name3=[]
name4=[]
#写initiavalue,形成⼀个txt,读的内容不对,读的内容应该是表以表2的相同部分progressbar(80, 100)
initiavalue1=[]
initiavalue2=[]
for z in range(len(a11)):
if a11[z][1]!=b11[z][1]:
name1.append(a11[z][0])
initiavalue1.append(a11[z][1])
initiavalue2.append(b11[z][1])
a=walk_list(name1,initiavalue1,initiavalue2)
file = open('D:\', 'w')
for i in a:
file.write(str(i) +','+ '\n')
file.close()
lines=open('D:\').readlines()
fp=open('D:\diff/initiavalue.csv', 'w')
for s in lines:
fp.write( s.replace('[','').replace(']',''))  # replace是替换,write是写⼊
fp.close()  # 关闭⽂件
##写BachmannAddress,形成⼀个txt
BachmannAddress1=[]
BachmannAddress2=[]
for i in range(len(a11)):
if a11[i][2]!=b11[i][2]:
name2.append(str(a11[i][0]))
BachmannAddress1.append(str(a11[i][2]))
BachmannAddress2.append(str(b11[i][2]))
b=walk_list(name2,BachmannAddress1,BachmannAddress2)
file = open('D:\', 'w')
for i in  b:
file.write(str(i) +','+ '\n')
file.close()
lines=open('D:\').readlines()
fp=open('D:\diff/BachmannAddress.csv', 'w')
for s in lines:
fp.write( s.replace('[','').replace(']',''))  # replace是替换,write是写⼊fp.close()  # 关闭⽂件
# #写CalibrationExpression,形成⼀个txt
CalibrationExpression1=[]
CalibrationExpression2=[]
for i in range(len(a11)):
if a11[i][3]!=b11[i][3]:
name3.append(str(a11[i][0]))
CalibrationExpression1.append(str(a11[i][3]))
CalibrationExpression2.append(str(b11[i][3]))
c=walk_list(name3,CalibrationExpression1,CalibrationExpression2)
file = open('D:\', 'w')
for i in  c:
file.write(str(i) +','+ '\n')
file.close()
lines=open('D:\').readlines()
fp=open('D:\diff/CalibrationExpression.csv', 'w')
for s in lines:
fp.write( s.replace('[','').replace(']',''))  # replace是替换,write是写⼊fp.close()  # 关闭⽂件
#写IOInitialValue,形成⼀个txt
try:
IOInitialValue1=[]
IOInitialValue2=[]
for i in range(len(a11)):
if a11[i][4]!=b11[i][4]:
name4.append(a11[i][0])
IOInitialValue1.append(str(a11[i][4]))
IOInitialValue2.append(str(b11[i][4]))
d=walk_list(name4,IOInitialValue1,IOInitialValue2)
file = open('D:\', 'w')
for i in  d:
file.write(str(i) +','+ '\n')
file.close()
lines=open('D:\').readlines()
fp=open('D:\diff/IOInitialValue.csv', 'w')
for s in lines:
fp.write( s.replace('[','').replace(']',''))  # replace是替换,write是写⼊    fp.close()  # 关闭⽂件
except IndexError as e:
pass
python处理xml文件
progressbar(100,100)

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。