Python⽬标检测数据清洗与标注常⽤程序记录1 、 为⼀个⽂件夹的图⽚改变⽂件名称
import os
prefix =input("前缀\n")
startNumber =input("开始的数字\n")
father =r"D:\Desktop\jinggai\jinggai3"
for k, fileName in enumerate(os.listdir(father),int(startNumber)):
dswith("jpg"):
2 、 已经为⼀些图⽚标注,然后利⽤标注清除⼀个⽂件夹中没有带labels的图⽚
import os
images =r"D:\Desktop\jinggai\images"
labels =r"D:\Desktop\jinggai\labels"
imgFileNameList = os.listdir(images)
labelFileNameList = os.listdir(labels)
for fileName in imgFileNameList:
dswith(".jpg"):
if fileName[:-4]+".txt"not in labelFileNameList:
else:
print("当前图⽚剩余多少张==>",len(imgFileNameList))
print("当前图⽚标注剩余多少个⽂件==>",len(labelFileNameList), labelFileNameList)
3、 使⽤算法服务标注数据,将label结果写⼊到⼀个⽂件夹中
# -*- coding: utf-8 -*-
import os
import requests
import cv2
imgpath =r"D:\Desktop\jinggai\jinggai"
labelspath =r"D:\Desktop\jinggai\labels"
class DataConvert():
"""
表达⽅式说明
x1,y1,x2,y2 (x1,y1)为左上⾓坐标,(x2,y2)为右下⾓坐标
x1,y1,w,h (x1,y1)为左上⾓坐标,w为⽬标区域宽度,h为⽬标区域⾼度
xc,yc,w,h (xc,yc)为⽬标区域中⼼坐标,w为⽬标区域宽度,h为⽬标区域⾼度 COCO标注
"""
def__init__(self):
pass
@staticmethod
def cvtx0y0whTox1y1x2y2(x0, y0, w, h, imgShape):
# "0.530921 0.666667 0.622368 0.666667"=>(167, 169, 639, 507)
python处理xml文件# labelme 的COCO标注格式就是中⼼点x+中⼼点y+宽+⾼(归⼀化的)
# 此函数出来的就是左上点右下点(未归⼀化的)
height, width, c = imgShape
x1, y1, x2, y2 =int((x0 - w *0.5)* width), \
int((y0 - h *0.5)* height), \
int((x0 + w *0.5)* width), \
int((y0 + h *0.5)* height)
return x1, y1, x2, y2
@staticmethod
def cvtx1y1x2y2Tox0y0wh(x1, y1, x2, y2, imgShape):
# (167, 169, 639, 507)=>"0.530921 0.666667 0.622368 0.666667"
# 左上点右下点(未归⼀化的) => 中⼼点x+中⼼点y+宽+⾼(归⼀化的)
height, width, c = imgShape
x0, y0, w, h =(x1 + x2)/2/ width,(y1 + y2)/2/ height,(x2 - x1)/ width,(y2 - y1)/ height, return x0, y0, w, h
files = os.listdir(imgpath)
for file in files:
filep = os.path.join(imgpath,file)
res = requests.post(url="172.18.43.32:8012/alg_analyse_bytes/",
files={"file":open(filep,"rb")}).json()
labelfile = os.path.join(labelspath,file[:-4]+".txt")
if len(res["data"])!=0:
img = cv2.imread(filep)
shape = img.shape
for data in res["data"]:
bboxs0 = data["bbox"]
bboxs0 =list(map(int, bboxs0))
poss = DataConvert.cvtx1y1x2y2Tox0y0wh(*bboxs0, shape)
poss =list(map(lambda x:str(round(x,6)), poss))
open(labelfile,"a+").write("0 "+" ".join(poss)+"\r\n")
4、每张图都必有labels的前提下,让图⽚⽂件夹⽂件数量和label⽂件夹⽂件数量⼀致。
import os
images =r"D:\Desktop\jinggai\jinggai_nano\images"
labels =r"D:\Desktop\jinggai\jinggai_nano\labels"
imgFileNameList = os.listdir(images)
labelFileNameList = os.listdir(labels)
for fileName in imgFileNameList:
dswith(".jpg"):
if fileName[:-4]+".txt"not in labelFileNameList:
else:
imgFileNameList = os.listdir(images)
labelFileNameList = os.listdir(labels)
for fileName in labelFileNameList:
dswith(".txt"):
if(fileName[:-4]+".jpg"not in imgFileNameList)or(len(str(open(os.path.join(labels, fileName),"r").readlines()))<7): os.remove(os.path.join(labels, fileName))
else:
imgFileNameList = os.listdir(images)
labelFileNameList = os.listdir(labels)
print("当前图⽚剩余多少张==>",len(imgFileNameList))
print("当前图⽚标注剩余多少个⽂件==>",len(labelFileNameList), labelFileNameList)
5、有图⽚和对应的xml标注:
将图⽚全部改名,重写到另外⼀个⽂件夹;
将对应的xml标注⽂件改名;
import os
import traceback
import cv2
import numpy as np
prefix =input("前缀\n")
startNumber =input("开始的数字\n")
father =r"D:\fireandsmoke\images"
father_labels =r"D:\fireandsmoke\annotations"
for k, fileName in enumerate(os.listdir(father),int(startNumber)):
try:
img = cv2.imdecode(np.fromfile(os.path.join(father, fileName), dtype=np.uint8),1)# img是矩阵
cv2.imwrite(os.path.join(r"D:\fireandsmoke\tarimg", prefix +str(k).zfill(5)+".jpg"), img)
os.path.join(father_labels, prefix +str(k).zfill(5)+".xml"))
except:
traceback.print_exc()
6、 xml标记⽂件转COCO的txt⽂件。
ElementTree as ET
import os
xmldir =r"D:\fireandsmoke\annotations"
txtdir =r"D:\fireandsmoke\labels"
txtdir =r"D:\fireandsmoke\labels"
classes =['fire']# 标签名
def convert_annotation(img_id_filename):
image_id = img_id_filename[:-4]# xml⽂件的⽂件名,不带后缀
in_file =open(os.path.join(xmldir,'%s.xml'%(image_id),), encoding='UTF-8')# 打开xml
out_file =open(os.path.join(txtdir,'%s.txt'%(image_id),),'w')# 最终写⼊txt⽂件,这⾥打开
tree = ET.parse(in_file)
root = t()
size = root.find('size')
size_width =int(size.find('width').text)# 图⽚宽
size_height =int(size.find('height').text)# 图⽚⾼
for obj in root.iter('object'):# 遍历标记
difficult = obj.find('difficult').text # 是否是难样本
cls = obj.find('name').text # 标签分类名
if cls not in classes or int(difficult)==1:# 需要是想要的标签+需要不是难样本
continue
cls_id = classes.index(cls)# COCO⾥⾯的第⼀个数字表⽰是第⼏类
xmlbox = obj.find('bndbox')
b =[float(xmlbox.find('xmin').text),float(xmlbox.find('xmax').text),float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text)]# 注意这⾥顺序
if size_width ==0or size_height ==0:
print("不合理的图,程序会删除这张图", image_id)
in_file.close()
continue
# 标注越界修正
if b[0]> size_width:
b[0]= size_width
if b[1]> size_width:
b[1]= size_width
if b[2]> size_height:
b[2]= size_height
if b[3]> size_height:
b[3]= size_height
txt_data =[((b[0]+ b[1])/2.0-1)/ size_width,((b[2]+ b[3])/2.0-1)/ size_height,
(b[1]- b[0])/ size_width,(b[3]- b[2])/ size_height]
txt_data =list(map(lambda x:round(x,6), txt_data))
out_file.write(str(cls_id)+" "+" ".join([str(a)for a in txt_data])+'\n')
in_file.close()
out_file.close()
def getClsName():
clsSet =set()
xmllist = os.listdir(xmldir)
for img_id in xmllist:
img_id_filename = img_id
image_id = img_id_filename[:-4]# xml⽂件的⽂件名,不带后缀
in_file =open(os.path.join(xmldir,'%s.xml'%(image_id),), encoding='UTF-8')# 打开xml
tree = ET.parse(in_file)
root = t()
for obj in root.iter('object'):# 遍历标记
cls = obj.find('name').text # 标签分类名
clsSet.add(cls)
in_file.close()
print(clsSet)
# imglist = os.listdir(imgdir)
# imglist = os.listdir(imgdir)
# xmllist = os.listdir(xmldir)
# # 测试图⽚是否损坏
# for img_id in imglist:
# try:
# cv2img = cv2.imread(os.path.join(imgdir, img_id)) # except:
# os.remove(os.path.join(imgdir, img_id))
if __name__ =='__main__':
xmllist = os.listdir(xmldir)
for img_id in xmllist:
convert_annotation(img_id)
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论