关于coco数据集删减类个数的代码--688IT编程网

关于coco数据集删减类个数的代码

我也是复制别⼈的代码，并⾮⾃⼰写的。因为平常跑代码，⼤多都会⽤到cooc数据集，但是，因为随着⽹络的复杂，以及显卡的有限，常常不能跑11W张数据集，实在太过费时。其次我们通过减少类数，但不减少类的照⽚个数，这样对⽹络没有太⼤的影响

起初如下所⽰：

第⼀个⾥⾯放了.json⽂件

第⼆第三个分别放了训练集和验证集图⽚

1.把coco数据集转成xml格式

读取出images 和 Annotations 两个⽂件夹

import COCO

import os

import shutil

from tqdm import tqdm

import skimage.io as io

import matplotlib.pyplot as plt

import cv2

from PIL import Image, ImageDraw

#the path you want to save your results for coco to voc

savepath="" # 保存的路径

img_dir = savepath+'images/val2017/' # 保存图⽚的路径

anno_dir = savepath+'Annotations/val2017/' # 保存xml的路径

# datasets_list=['train2014', 'val2014']

# datasets_list=['train2014']

datasets_list = ['val2017']

# 你需要类的名称

classes_names = ['fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow']

#Store annotations and train2014/val2014/... in this folder

dataDir = 'datasets' # coco数据集所在的位置

headstr = """\

<database>My Database</database>

<image>flickr</image>

</source>

<size>

</size>

"""

objstr = """\

<pose>Unspecified</pose>

</bndbox>

</object>

"""

tailstr = '''\

</annotation>

'''

#if the dir is not exists,make it,else delete it

def mkr(path):

if ists(path):

<(path)

os.mkdir(path)

else:

os.mkdir(path)

mkr(img_dir)

mkr(anno_dir)

def id2name(coco):

classes=dict()

for cls in coco.dataset['categories']:

classes[cls['id']]=cls['name']

return classes

def write_xml(anno_path,head, objs, tail):

f = open(anno_path, "w")

f.write(head)

for obj in objs:

f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))

f.write(tail)

def save_annotations_and_imgs(coco,dataset,filename,objs):

#eg:COCO_train2014_000000196610.jpg-->COCO_l anno_path=anno_dir+filename[:-3]+'xml'

img_path=dataDir+'/images/'+dataset+'/'+filename

# print(img_path)

dst_imgpath=img_dir+filename

# print(img_path,'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')

img=cv2.imread(img_path)

# print(img)

if (img.shape[2] == 1):

print(filename + " not a RGB image")

return

head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])

tail = tailstr

write_xml(anno_path,head, objs, tail)

def showimg(coco,dataset,img,classes,cls_id,show=True):

global dataDir

I=Image.open('%s/%s/%s/%s'%(dataDir,'images',dataset,img['file_name']))

#Get the annotated information by ID

annIds = AnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)

# print(annIds)

anns = coco.loadAnns(annIds)

# print(anns)

# coco.showAnns(anns)

objs = []

for ann in anns:

class_name=classes[ann['category_id']]

if class_name in classes_names:

print(class_name)

if 'bbox' in ann:

bbox=ann['bbox']

xmin = int(bbox[0])

ymin = int(bbox[1])

xmax = int(bbox[2] + bbox[0])

ymax = int(bbox[3] + bbox[1])

obj = [class_name, xmin, ymin, xmax, ymax]

objs.append(obj)

draw = ImageDraw.Draw(I)

if show:

plt.figure()

plt.axis('off')

plt.imshow(I)

plt.show()

return objs

for dataset in datasets_list:

#./COCO/annotations/instances_train2014.json

# ,json⽂件所在的位置

annFile='{}/images/annotations/instances_{}.json'.format(dataDir,dataset)

#COCO API for initializing annotated data

coco = COCO(annFile)

'''

When the COCO object is created, the following information will be output:

loading annotations

Done (t=0.81s)

index created!

So far, the JSON script has been parsed and the images are associated with the corresponding annotated data. '''

#show all classes in coco

classes = id2name(coco)

print(classes)

#[1, 2, 3, 4, 6, 8]

classes_ids = CatIds(catNms=classes_names)

print(classes_ids)

# exit()

getsavefilenamefor cls in classes_names:

#Get ID number of this class

cls_CatIds(catNms=[cls])

img_ImgIds(catIds=cls_id)

print(cls,len(img_ids))

# imgIds=img_ids[0:10]

for imgId in tqdm(img_ids):

img = coco.loadImgs(imgId)[0]

filename = img['file_name']

# print(filename)

objs=showimg(coco, dataset, img, classes,classes_ids,show=False)

print(objs)

save_annotations_and_imgs(coco, dataset, filename, objs)

2.把xml格式转化为txt

读取出labels的⽂件夹并⽣成与labels同级的txt

最终如下图所⽰：

第⼀个放的是xml

第⼆个是分类出来的图⽚⽂件内容如下：images/train2017 再是图⽚

第三个个是分类出来的图⽚的txt信息⽂件内容如下：labels/train2017 再是则是⼀个索引记录了每个图⽚所在的位置

代码如下：

ElementTree as ET

import pickle

import os

from os import listdir, getcwd

from os.path import join

classes = ['fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow'] # classes = ['truck']

def convert(size, box):

dw = 1. / (size[0])

dh = 1. / (size[1])

x = (box[0] + box[1]) / 2.0 - 1

y = (box[2] + box[3]) / 2.0 - 1

w = box[1] - box[0]

h = box[3] - box[2]

x = x * dw

w = w * dw

y = y * dh

h = h * dh

return (x, y, w, h)

def convert_annotation(image_id):

in_file = open('/coco/Annotations/train2017/%s.xml' % (image_id)) # xml所在的位置

out_file = open('/coco/labels/trian2017/%s.txt' % (image_id), 'w') # 转化成txt所在的位置

tree = ET.parse(in_file)

root = t()

size = root.find('size')

w = int(size.find('width').text)

h = int(size.find('height').text)

for obj in root.iter('object'):

difficult = obj.find('difficult').text

cls = obj.find('name').text

print(cls)

if cls not in classes or int(difficult) == 1:

continue

cls_id = classes.index(cls)

xmlbox = obj.find('bndbox')

b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))

bb = convert((w, h), b)

out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

data_path = '/coco/images/train2017' # 刚刚分类出来的图⽚的位置

img_names = os.listdir(data_path)

list_file = open('./', 'w') # 图中所⽰记录索引的txt⽂件

for img_name in img_names:

if not ists('/coco/labels/train2017'):

os.makedirs('/coco/labels/train2017')

list_file.write('./images/train2017/%s\n' % img_name)

image_id = img_name[:-4]

convert_annotation(image_id)

list_file.close()

688IT编程网

关于coco数据集删减类个数的代码

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

m函数数字提取

jest断言方法大全

中兴ZXSEC US 管理员手册

keras系列(一):参数设置

Qt从QString中提取出数字

element input 金额千分位格式化

freemaker 参数解析正则

C#正则验证数字

form表单验证正则

scanf正则表达式用法

grafana value的正则表达式

Android平台浮点数运算应用

js-(JS正则表达式验证数字)

判断Python输入是否是整数,字符,或浮点数

c语言 sscanf 正则规则

从文本中提取数值技巧

js将整数转换成两位浮点数的方法

vue正则限制浮点数

8到20的结尾的正则

shell 正则表达式最后一行

最新文章

应用程序的安全检测方法、装置、电子设备和存储介质

VBA之正则表达式(1)--基础篇

代码编辑的辅助方法、装置及电子设备

SHELL查字符串中包含字符的命令

String方法中replace和replaceAll的区别详解(源码分析)

双字节符号正则

标签列表

688IT编程网

关于coco数据集删减类个数的代码

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法 正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

m函数数字提取

jest断言方法大全

中兴ZXSEC US 管理员手册

keras系列(一):参数设置

Qt从QString中提取出数字

element input 金额千分位格式化

freemaker 参数解析正则

C#正则验证数字

form表单验证正则

scanf正则表达式用法

grafana value的正则表达式

Android平台浮点数运算应用

js-(JS正则表达式验证数字)

判断Python输入是否是整数,字符,或浮点数

c语言 sscanf 正则规则

从文本中提取数值技巧

js将整数转换成两位浮点数的方法

vue正则限制浮点数

8到20的结尾的正则

shell 正则表达式 最后一行

最新文章

应用程序的安全检测方法、装置、电子设备和存储介质

VBA之正则表达式(1)--基础篇

代码编辑的辅助方法、装置及电子设备

SHELL查字符串中包含字符的命令

String方法中replace和replaceAll的区别详解(源码分析)

双字节符号正则

标签列表

nginx map用法正则

shell 正则表达式最后一行