YOLOV5实战训练⾃⼰的数据集,包含数据集转换,训练~ YOLOV5实战检测
import glob
from PIL import Image
import csv
#xml保存的位置
save_xml_dir ="/home/kemove/shumiao/save_path/Annotations/"#修改为你⾃⼰的路径
src_img_dir ="/home/kemove/shumiao"#修改为你⾃⼰的路径
#img_Lists = glob.glob(src_img_dir + '/*.jpg')
# read csv
file_path ="/home/kemove/shumiao/5.26SM.csv"
width =1920
height =1080#图⽚⼤⼩也修为⾃⼰数据集
with open(file_path)as csvfile:
#读取csv数据
csv_reader = ader(csvfile)
#去掉第⼀⾏(第⼀⾏是列名)
csv_header =next(csv_reader)
#因为csv数据中有许多⾏其实是同⼀个照⽚,因此需要pre_img
pre_img =''
for row in csv_reader:
#C:/Users/Timothy/Desktop/keras-retinanet/images/test/Subset_1_450x450_001.jpg
#只要⽂件名Subset_1_450x450_001
img = row[0].split("/")[-1].split(".")[0]
#遇到的是⼀张新图⽚
if img != pre_img:
#⾮第⼀张图⽚,在上⼀个xml中写下</annotation>
if pre_img !='':
xml_file1 =open((save_xml_dir + pre_img +'.xml'),'a')
xml_file1.write('</annotation>')
xml_file1.close()
#新建xml⽂件
xml_file =open((save_xml_dir + img +'.xml'),'w')
xml_file.write('<annotation>\n')
xml_file.write(' <folder>VOC2007</folder>\n')
xml_file.write(' <filename>'+str(img)+'.jpg'+'</filename>\n')
xml_file.write(' <path>'+'/home/kemove/shumiao/'+str(img)+'.jpg'+'</path>\n')#+
xml_file.write('<source>\n')
xml_file.write('<database>Unknown</database>\n')
xml_file.write('</source>\n')
xml_file.write(' <size>\n')
xml_file.write(' <width>'+str(width)+'</width>\n')
xml_file.write(' <height>'+str(height)+'</height>\n')
xml_file.write(' <depth>3</depth>\n')
xml_file.write(' </size>\n')
xml_file.write(' <segmented>0</segmented>\n')#+
xml_file.write(' <object>\n')
xml_file.write('<name>'+str(row[-1])+'</name>\n')
xml_file.write('<pose>'+'Unspecified'+'</pose>\n')#+
xml_file.write('<truncated>'+'0'+'</truncated>\n')#+
xml_file.write('<difficult>'+'0'+'</difficult>\n')#+
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>'+str(row[1])+'</xmin>\n')
xml_file.write(' <ymin>'+str(row[2])+'</ymin>\n')
xml_file.write(' <xmax>'+str(row[3])+'</xmax>\n')
xml_file.write(' <ymax>'+str(row[4])+'</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.close()
pre_img = img
else:
#同⼀张图⽚,只需要追加写⼊object
#同⼀张图⽚,只需要追加写⼊object
xml_file =open((save_xml_dir + pre_img +'.xml'),'a')
xml_file.write(' <object>\n')
xml_file.write('<name>'+str(row[-1])+'</name>\n')
xml_file.write(' <pose>Unspecified</pose>\n')
xml_file.write(' <truncated>0</truncated>\n')
xml_file.write(' <difficult>0</difficult>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>'+str(row[1])+'</xmin>\n')
xml_file.write(' <ymin>'+str(row[2])+'</ymin>\n')
xml_file.write(' <xmax>'+str(row[3])+'</xmax>\n')
xml_file.write(' <ymax>'+str(row[4])+'</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.close()
import picklepre_img = img
#最后⼀个xml需要写⼊</annotation>
xml_file1 =open((save_xml_dir + pre_img +'.xml'),'a')
xml_file1.write('</annotation>')
xml_file1.close()
4.在YOLOV5/paper_data中创建⽴split_train_val.py 以⽣成,,等绝对路径的图⽚名称地址。代码如下:
import os
import random
trainval_percent =1.0
train_percent =0.9
xmlfilepath ='Annotations'
total_xml = os.listdir(xmlfilepath)
num =len(total_xml)
list=range(num)
tv =int(num * trainval_percent)
tr =int(num * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
# ImageSets⽬录不存在,就创建
if not ists('ImageSets/'):
os.makedirs('ImageSets/')
# ImageSets/Main⽬录不存在,就创建
if not ists('ImageSets/Main/'):
os.makedirs('ImageSets/Main/')
ftrainval =open('ImageSets/','w')
ftest =open('ImageSets/','w')
ftrain =open('ImageSets/','w')
fval =open('ImageSets/','w')
for i in list:
name ='/home/kemove/yolov5/paper_data/images/'+ total_xml[i][:-4]+'.jpg'+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
运⾏python split_train_val.py后,会在yolov5/paper_data中⽣成/ImageSets/Main两个⽂件夹,Main⽂件夹中包含
内容为绝对路径地址:
5.在/yolov5/paper_data中创建voc_label.py⽂件,以⽣成yolo训练需要的txt⽂件。代码如下:
ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os import listdir, getcwd
from os.path import join
image_sets =['train','val','test']
classes =["??"]#??为⾃⼰类的名称
def convert(size, box):
dw =1./(size[0])#中⼼点做坐标
dh =1./(size[1])#中⼼点做坐标
x =(box[0]+ box[1])/2.0-1#若⽣成txt中包含负数,可将此处的‘-1’删除
y =(box[2]+ box[3])/2.0-1#若⽣成txt中包含负数,可将此处的‘-1’删除
w = box[1]- box[0]#size的max-min
h = box[3]- box[2]#size的max-min
x = x * dw
w = w * dw
y = y * dh
h = h * dh
isValid =True
if x ==0or y ==0or w ==0or h ==0:
print(f'x :{x} y:{y} w:{w} h:{h}')
isValid =False
if box[0]==0or box[1]==0or box[2]==0or box[3]==0:
print(f'box[0] :{box[0]} box[1]:{box[1]} box[2]:{box[2]} box[3]:{box[3]}')
isValid =False
return(x, y, w, h), isValid
def convert_annotation(image_id):
in_file =open('Annotations/%s.xml'%(image_id.split("/").pop().split(".")[0]))
out_file =open('labels/%s.txt'%(image_id.split("/").pop().split(".")[0]),'w')
print(out_file)
tree = ET.parse(in_file)
root = t()
size = root.find('size')
w =int(size.find('width').text)
h =int(size.find('height').text)
# print(root['object'])
# return
objList = root.findall('object')
# for obj in objList:
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
print('-------')
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b =(float(xmlbox.find('xmin').text),float(xmlbox.find('xmax').text),float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb, isValid = convert((w, h), b)
if isValid ==False:
# out_file.close()
print(image_id)
return False
out_file.write(str(cls_id)+" "+" ".join([str(a)for a in bb])+'\n')
print("⽂件内容:")
print(str(cls_id)+" "+" ".join([str(a)for a in bb])+'\n')
out_file.flush()
out_file.close()
return True
return True
if not ists('labels/'):
os.makedirs('labels/')
for image_set in image_sets:
# strip() 移除字符串的⾸尾字符,默认为空格
# split() 字符串分割,默认为所有空字符,包含空格、换⾏、制表符
image_ids =open('ImageSets/Main/%s.txt'%(image_set)).read().strip().split()
list_file =open('%s.txt'%(image_set),'w')
for image_id in image_ids:
isValid = convert_annotation(image_id)
if isValid ==True:
list_file.write('%s\n'%(image_id))
else:
print(f'存在⽆效值:{image_id}')
list_file.close()
运⾏python voc_label.py后,labels⽂件中会⽣成类别 + 坐标的数字,以及在本级⽬录下会⽣成同样的等。如下:
6.配置⽂件。在yolov5/model中有⽂件yolov5l.yaml 等,修改部分
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论