对小样本数据进行数据增强--688IT编程网

对⼩样本数据进⾏数据增强

⼀、前情介绍

在之前对yolov3的学习中，有时候发现⼩样本数据集容易出现过拟合或者泛化能⼒不强的问题，在对这⼀问题提出的不同解决⽅法进⾏了摸索和尝试，发现提⾼数据集样本容量是⼀个⽐较直接和简单粗暴的⽅法，以下纪录这⼀实验⽅法。

⼆、环境

直接交代环境，都是相对较简单，在这⾥博主没遇到过坑

numpy

PIL

imgaug

三、代码

ElementTree as ET

import os

import numpy as np

from PIL import Image

import shutil

import imgaug as ia

from imgaug import augmenters as iaa

ia.seed(1)

def read_xml_annotation(root, image_id):

in_file =open(os.path.join(root, image_id))

tree = ET.parse(in_file)

root = t()

bndboxlist =[]

for object in root.findall('object'):# 到root节点下的所有country节点

bndbox =object.find('bndbox')# ⼦节点下节点rank的值

xmin =int(bndbox.find('xmin').text)

xmax =int(bndbox.find('xmax').text)

ymin =int(bndbox.find('ymin').text)

ymax =int(bndbox.find('ymax').text)

# print(xmin,ymin,xmax,ymax)

bndboxlist.append([xmin, ymin, xmax, ymax])

# print(bndboxlist)

bndbox = root.find('object').find('bndbox')

return bndboxlist

# (506.0000, 330.0000, 528.0000, 348.0000) -> (520.4747, 381.5080, 540.5596, 398.6603)

def change_xml_annotation(root, image_id, new_target):

new_xmin = new_target[0]

new_ymin = new_target[1]

new_xmax = new_target[2]

new_ymax = new_target[3]

in_file =open(os.path.join(root,str(image_id)+'.xml'))# 这⾥root分别由两个意思

tree = ET.parse(in_file)

xmlroot = t()

object= xmlroot.find('object')

bndbox =object.find('bndbox')

xmin = bndbox.find('xmin')

< =str(new_xmin)

ymin = bndbox.find('ymin')

< =str(new_ymin)

xmax = bndbox.find('xmax')

< =str(new_xmax)

ymax = bndbox.find('ymax')

< =str(new_ymax)

tree.write(os.path.join(root,str("%06d"%(str(id)+'.xml'))))

def change_xml_list_annotation(root, image_id, new_target, saveroot,id):

in_file =open(os.path.join(root,str(image_id)+'.xml'))# 这⾥root分别由两个意思 tree = ET.parse(in_file)

elem = tree.find('filename')

< =(str("%06d"%int(id))+'.jpg')

xmlroot = t()

index =0

for object in xmlroot.findall('object'):# 到root节点下的所有country节点

bndbox =object.find('bndbox')# ⼦节点下节点rank的值

# xmin = int(bndbox.find('xmin').text)

# xmax = int(bndbox.find('xmax').text)

# ymin = int(bndbox.find('ymin').text)

# ymax = int(bndbox.find('ymax').text)

new_xmin = new_target[index][0]

new_ymin = new_target[index][1]

new_xmax = new_target[index][2]

new_ymax = new_target[index][3]

xmin = bndbox.find('xmin')

< =str(new_xmin)

ymin = bndbox.find('ymin')

< =str(new_ymin)

xmax = bndbox.find('xmax')

< =str(new_xmax)

ymax = bndbox.find('ymax')

< =str(new_ymax)

index = index +1

tree.write(os.path.join(saveroot,str("%06d"%int(id))+'.xml'))

def mkdir(path):

# 去除⾸位空格

path = path.strip()

# 去除尾部 \ 符号

path = path.rstrip("/")

# 判断路径是否存在

# 存在 True

# 不存在 False

isExists = ists()

# 判断结果

if not isExists:

# 如果不存在则创建⽬录

# 创建⽬录操作函数

os.makedirs(path)

print(path +' 创建成功')

return True

else:

# 如果⽬录存在则不创建，并提⽰⽬录已存在

print(path +' ⽬录已存在')

return False

if __name__ =="__main__":

IMG_DIR ="⾃⼰的⽂件路径/image"

XML_DIR ="⾃⼰的⽂件路径/Anotations"

AUG_XML_DIR ="F:/crop/Annotations"# 存储增强后的XML⽂件夹路径

try:

<(AUG_XML_DIR)

except FileNotFoundError as e:

a =1

mkdir(AUG_XML_DIR)

AUG_IMG_DIR ="F:/crop/images"# 存储增强后的影像⽂件夹路径

try:

<(AUG_IMG_DIR)

except FileNotFoundError as e:

a =1

mkdir(AUG_IMG_DIR)

AUGLOOP =10# 每张影像增强的数量

error parse newboxes_img_aug_list =[]

new_bndbox =[]

new_bndbox_list =[]

# 影像增强

seq = iaa.Sequential([

iaa.Flipud(0.5),# vertically flip 20% of all images

iaa.Fliplr(0.5),# 镜像

iaa.Multiply((1.2,1.5)),# change brightness, doesn't affect BBs

iaa.GaussianBlur(sigma=(0,2.0)),# iaa.GaussianBlur(0.5),

iaa.Affine(

translate_px={"x":15,"y":15},

scale=(0.8,0.95),

rotate=(-30,30)

)

# translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs

])

for root, sub_folders, files in os.walk(XML_DIR):

for name in files:

bndbox = read_xml_annotation(XML_DIR, name)

for epoch in range(AUGLOOP):

seq_det = _deterministic()# 保持坐标和图像同步改变，⽽不是随机

# 读取图⽚

img = Image.open(os.path.join(IMG_DIR, name[:-4]+'.jpg'))

# sp = img.size

img = np.asarray(img)

# bndbox 坐标增强

for i in range(len(bndbox)):

bbs = ia.BoundingBoxesOnImage([

ia.BoundingBox(x1=bndbox[i][0], y1=bndbox[i][1], x2=bndbox[i][2], y2=bndbox[i][3]), ], shape=img.shape)

bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]

boxes_img_aug_list.append(bbs_aug)

这是增强了11倍的结果：

对增强后的数据集进⾏训练，没有任何问题：

688IT编程网

对小样本数据进行数据增强

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符回溯引用和前后查匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式选择题

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

688IT编程网

对小样本数据进行数据增强

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符 回溯引用和前后查 匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式 选择题

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

java正则表达式选择题

非零金额正则表达式

基本的元字符回溯引用和前后查匹配模式

java正则表达式选择题

非零金额正则表达式