matlabud.save,YOLOV5训练与测试时数据加载dataset.py代码注释与解析...--688IT编程网

matlabud.save,YOLOV5训练与测试时数据加载dataset.py代

码注释与解析

import glob

import math

import os

import random

import shutil

import time

from pathlib import Path

from threading import Thread

import cv2

import numpy as np

import torch

from PIL import Image, ExifTags

from torch.utils.data import Dataset

resized

from tqdm import tqdm

from utils.utils import xyxy2xywh, xywh2xyxy

help_url = 'github/ultralytics/yolov5/wiki/Train-Custom-Data'

img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']

vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']

# Get orientation exif tag

for orientation in ExifTags.TAGS.keys():

if ExifTags.TAGS[orientation] == 'Orientation':

break

# 此函数根据图⽚的信息获取图⽚的宽、⾼信息

def exif_size(img):

# Returns exif-corrected PIL size

s = img.size # (width, height)

try:

rotation = dict(img._getexif().items())[orientation]

if rotation == 6: # rotation 270

s = (s[1], s[0])

elif rotation == 8: # rotation 90

s = (s[1], s[0])

except:

pass

return s

# 根据LoadImagesAndLabels创建dataloader

def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False): """

参数解析：

path：包含图⽚路径的txt⽂件或者包含图⽚的⽂件夹路径

imgsz：⽹络输⼊图⽚⼤⼩

batch_size: 批次⼤⼩

stride：⽹络下采样最⼤总步长

opt：调⽤train.py时传⼊的参数，这⾥主要⽤到opt.single_cls，是否是单类数据集

hyp：⽹络训练时的⼀些超参数，包括学习率等，这⾥主要⽤到⾥⾯⼀些关于数据增强(旋转、平移等)的系数

augment：是否进⾏数据增强

cache：是否提前缓存图⽚到内存，以便加快训练速度

pad：设置矩形训练的shape时进⾏的填充

rect：是否进⾏矩形训练

"""

dataset = LoadImagesAndLabels(path, imgsz, batch_size,

augment=augment, # augment images

hyp=hyp, # augmentation hyperparameters

rect=rect, # rectangular training

cache_images=cache,

single_cls=opt.single_cls,

stride=int(stride),

pad=pad)

batch_size = min(batch_size, len(dataset))

nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers

dataloader = torch.utils.data.DataLoader(dataset,

batch_size=batch_size,

num_workers=nw,

pin_memory=True,

collate_llate_fn)

return dataloader, dataset

class LoadImagesAndLabels(Dataset): # for training/testing

def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0):

try:

f = []

for p in path if isinstance(path, list) else [path]:

# 获取数据集路径path，包含图⽚路径的txt⽂件或者包含图⽚的⽂件夹路径

# 使⽤pathlib.Path⽣成与操作系统⽆关的路径，因为不同操作系统路径的‘/’会有所不同

p = str(Path(p)) # os-agnostic

# 获取数据集路径的上级⽗⽬录，os.sep为路径⾥的破折号(不同系统路径破折号不同，os.sep根据系统⾃适应)

parent = str(Path(p).parent) + os.sep

# 如果路径path为包含图⽚路径的txt⽂件

if os.path.isfile(p): # file

with open(p, 'r') as t:

# 获取图⽚路径，更换相对路径

t = t.read().splitlines()

f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path

# 如果路径path为包含图⽚的⽂件夹路径

elif os.path.isdir(p): # folder

f += glob.iglob(p + os.sep + '*.*')

else:

raise Exception('%s does not exist' % p)

path = p # *.npy dir

# 破折号替换为os.sep，os.path.splitext(x)将⽂件名与扩展名分开并返回⼀个列表

self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]

except Exception as e:

raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))

# 数据集的数量

n = len(self.img_files)

assert n > 0, 'No images found in %s. See %s' % (path, help_url)

# 获取batch的索引

bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index

# ⼀个轮次batch的数量

nb = bi[-1] + 1 # number of batches

self.n = n # number of images

self.batch = bi # batch index of image

self.img_size = img_size # 输⼊图⽚分辨率⼤⼩

self.augment = augment # 数据增强

self.hyp = hyp # 超参数

self.image_weights = image_weights # 图⽚采样

< = False if image_weights else rect # 矩形训练

self.stride = stride # 模型下采样的总步长

# 获取数据集的标签

self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')

for x in self.img_files]

# 保存图⽚shape的路径

sp = place('.txt', '') + '.shapes' # shapefile path

try:

# 如果存在该路径，则读取

with open(sp, 'r') as f: # read existing shapefile

s = [x.split() for x ad().splitlines()]

assert len(s) == n, 'Shapefile out of sync'

except:

# 如果不存在，则读取图⽚shape再保存

s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any)

self.shapes = np.array(s, dtype=np.float64)

# 矩形训练

# Sort by aspect ratio

s = self.shapes # wh

ar = s[:, 1] / s[:, 0] # h/w

# 获取根据ar从⼩到⼤排序的索引

irect = ar.argsort()

# 根据索引排序数据集与标签路径、shape、h/w

self.img_files = [self.img_files[i] for i in irect]

self.label_files = [self.label_files[i] for i in irect]

self.shapes = s[irect] # wh

ar = ar[irect]

# Set training image shapes

# 初始化shapes，nb为⼀轮批次batch的数量

shapes = [[1, 1]] * nb

for i in range(nb):

ari = ar[bi == i]

mini, maxi = ari.min(), ari.max()

# 如果⼀个batch中最⼤的h/w⼩于1，则此batch的shape为(img_size*maxi, img_size)

if maxi < 1:

shapes[i] = [maxi, 1]

# 如果⼀个batch中最⼩的h/w⼤于1，则此batch的shape为(img_size, img_size/mini)

elif mini > 1:

shapes[i] = [1, 1 / mini]

self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # Cache labels

# 初始化图⽚与标签，为缓存图⽚、标签做准备

self.imgs = [None] * n

self.labels = [np.zeros((0, 5), dtype=np.float32)] * n

# 设置是否创建数据⼦集、提取⽬标检测框做再次分类，labels是否已加载

create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False

# 漏掉的标签数量，到的标签数量，空的标签数量，数据⼦集的数量，相同的标签数量

nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate

# 保存labels的numpy⽂件路径

np_labels_path = str(Path(self.label_files[0]).parent) + '.npy' # saved labels in *.npy file

# 如果存在labels.npy，则直接加载，并设置labels_loaded=True

if os.path.isfile(np_labels_path):

s = np_labels_path # print string

x = np.load(np_labels_path, allow_pickle=True)

if len(x) == n:

self.labels = x

labels_loaded = True

688IT编程网

matlabud.save,YOLOV5训练与测试时数据加载dataset.py代码注释与解析...

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符回溯引用和前后查匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式选择题

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

688IT编程网

matlabud.save,YOLOV5训练与测试时数据加载dataset.py代码注释与解析...

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符 回溯引用和前后查 匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式 选择题

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

java正则表达式选择题

非零金额正则表达式

基本的元字符回溯引用和前后查匹配模式

java正则表达式选择题

非零金额正则表达式