关于对象标注⽂件的转换(xml转json)背景介绍
也就是说,该⼯程的输出是⼀个个的对头的⽅框标注
因此所需要的label⽂件是关于当前图⽚所有⽬标的像素点位置记录,是⼀个json⽂件,如下所⽰
我的⽬标就是在普通图⽚上实现该⽂件的输出。
⽅案说明
采取的⽅案是⽤labelImg应⽤得到xml⽂件,再从xml⽂件中提取需要的部分⽣成对应json⽂件labelImg可⾃⾏寻。
xml⽂件⼀般格式
所需要的便是‘bndbox’内的数据。
代码(基于Python3)
import os
import json
import xmltodict
import numpy as np
import random
'''得到关于所有框位置的数据'''
def get(path,txt):
with open(path) as f:
dic=xmltodict.ad())
dd=dic["annotation"]["object"]
type(dd)
for i in range(len(dd)):
x1=[dd[i]["bndbox"]["xmin"]+'.0']
x2=[dd[i]["bndbox"]["xmax"]+'.0'] y1=[dd[i]["bndbox"]["ymin"]+'.0']
y2=[dd[i]["bndbox"]["ymax"]+'.0'] txt=txt+x1+x2+y1+y2
return txt
'''制作json⽂件,dic为最终输出的字典格式'''
def makejson(dic,save_path):
#dic={"image_ath":data_path,"rects":boxes[1:]}
#obj=json.dumps(dic)
with open(save_path, 'w') as f:
json.dump(dic, f, sort_keys=True, indent=2, separators=(',', ': '))
f.close()
'''读取txt⽂件内的内容,分别读取train,test,val的数据,返回以list格式''' def readtxt(txtpath,name):
txt = txtpath+name+'.txt'
f = open(txt,"r") #设置⽂件对象
line = f.readline()
train=[line[:-1]]
while line: #直到读取完⽂件
line = f.readline() #读取⼀⾏⽂件,包括换⾏符python处理xml文件
line = line[:-1] #去掉换⾏符,也可以不去
train.append(line)
train=train[:-1]
return train
#print(train)
f.close() #关闭⽂件
'''根据地址读取xml⽂件名,再根据⽐例进⾏分配,分别保存为三个txt⽂件,⽤于后续读取''' def divide(xmlfilepath,txtsavepath, test_percent = 0.2,val_percent = 0.2,train_percent = 0.6):
total_xml = os.listdir(xmlfilepath)
num = len(total_xml)
#list = range(num)
tr = int(num * train_percent)
te = int(num * test_percent)+tr
#train = int(tv * train_percent)
#val = int()
#train = random.sample(list, tr)
#train = random.sample(trainval, tr)
#test = random.sample(list, te)
ftrain = open('/home/tjw/Desktop/change/xml_to_json_idl/', 'w')
ftest = open('/home/tjw/Desktop/change/xml_to_json_idl/', 'w')
fval = open('/home/tjw/Desktop/change/xml_to_json_idl/', 'w')
xml_num=0
for xml in total_xml:
name = xml[:-4] + '\n'
if (xml_num<tr):
ftrain.write(name)
xml_num+=1
elif (xml_num<te):
ftest.write(name)
xml_num+=1
else:
fval.write(name)
ftrain.close()
fval.close()
ftest.close()
#print("finish")
if __name__ == '__main__':
#data_path='/home/tjw/Desktop/change/xml_to_json_idl/xml/l'
xml_data_path = '/home/tjw/Desktop/change/xml_to_json_idl/xml/'#xml⽂件地址
img_paths='/home/tjw/Desktop/change/xml_to_json_idl/img/'
save_path='/home/tjw/Desktop/change/xml_to_json_idl/json/data.json'#json⽂件的保存位置
#xmlfilepath = '/home/tjw/Desktop/change/xml_to_json_idl/xml/'
txtsavepath = '/home/tjw/Desktop/change/xml_to_json_idl/txt/'#txt⽂件的保存位置
divide(xml_data_path,txtsavepath) #根据读取到的xml路径进⾏分配,制作txt⽂件
'''分别得到三种类型数据的列表'''
txt_train = readtxt(txtsavepath,'train')
txt_test = readtxt(txtsavepath,'test')
txt_val = readtxt(txtsavepath,'val')
unreshape_box=[]#⽤于存储未reshape的框位置信息
dic=[]#⽤于存储最后的⽂件
#labeltxt_path = os.path.join('/home/tjw/Desktop/change/xml_to_json_idl/xml/')#获取所有xml⽂件名 #filenames = os.listdir(labeltxt_path)
#for filename in filenames:
for filename in txt_train:
img_path=(filename[:-4]+'.png')
unreshape_box=get(xml_data_path+filename+'.xml',unreshape_box)#得到关于x,y的数据
b=np.array(unreshape_box)#此时得到的是⼀长串的数字,s数组格式
shape(-1,4)#四个为⼀组
a=len(b)
boxes={"x1":b[0][0],"x2":b[0][1],"y1":b[0][2],"y2":b[0][3]},#这个是元组,reshape好的框位置信息
i=1
#搭建rects
for i in range(a):
box={"x1":b[i][0],"x2":b[i][1],"y1":b[i][2],"y2":b[i][3]},#元组
boxes=boxes+box#元组可直接拼接
dic1={"image_path":img_paths+img_path,"rects":boxes}#此时为dict格式
dic.append(dic1)#通过list的append操作进⾏存储
#print(dic)
#makejson(dic,save_path)#根据dic创建json⽂件
#obj=json.dumps(dic)
#print(obj)
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论