python狗品种识别_狗品种识别基本思路
加载⾃定义数据集
微调ResNet18模型
训练模型
基于pytorch的代码
⽇常导⼊需要⽤到的python库
import torch
as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets, models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(0)
torch.manual_seed(0)1
2
3
4
5
6
7
8
9
10
11
12
加载数据集
使⽤的是⽐赛⽹址上下载数据集, 格式如下
| Dog Breed Identification
| train
| | 000bec180eb18c7604dcecc8fe0dba07.jpg
| | …
| test
| | 00a3edd22dc7859c487a64777fc8d093.jpg | | 00a6892e5c7f92c1f465e213fd904582.jpg | | …
| labels.csv
| sample_submission.csv
我们要将他转换成pytorch能识别的格式, 如下
| train_valid_test
| train
| | affenpinscher
| | | 00ca18751837cd6a22813f8e221f7819.jpg | | | …
| | afghan_hound
resized| | | 0a4f1e17d720cdff35814651402b7cf4.jpg | | | …
| | …
| valid
| | affenpinscher
| | | 56af8255b46eb1fa5722f37729525405.jpg | | | …
| | afghan_hound
| | | 0df400016a7e7ab4abff824bf2743f02.jpg | | | …
| | …
| train_valid
| | affenpinscher
| | | 00ca18751837cd6a22813f8e221f7819.jpg | | | …
| | afghan_hound
| | | 0a4f1e17d720cdff35814651402b7cf4.jpg | | | …
| | …
| | | 00a3edd22dc7859c487a64777fc8d093.jpg
| | | …
先设置⽂件路径
all_path = "/home/kesci/input/Kaggle_Dog6357/dog-breed-identification"
test_path = "test"
train_path = "train"
train_label_path = "labels.csv"
valid_path = "valid"1
2
3
4
5
更据上⾯的路径去调整⽂件路径,
加载完后⽅便我们加载数据
# 操作⽂件
import os
# 拷贝⽂件
import shutil
def make_dir(path):
"""
判断路径是否存在:
False:创建该路径
"""
if not ists(os.path.join(*path)):
os.makedirs(os.path.join(*path))
def get_dog_data(root_path, train_path, label_path, test_path, valid_path, valid_alpha=.3): new_dir = "new_dir"
# 加载训练集图⽚⽂件名
train_names = os.listdir(os.path.join(root_path, train_path))
np.random.shuffle(train_names)
# 加载训练集标签
labels_csv = pd.read_csv(os.path.join(root_path, label_path))
valid_size = int(len(train_names) * valid_alpha)
for i, name in enumerate(train_names):
# 原name是name.jpg,只需要.jpg前⾯的部分
split_name = name.split(".")[0]
# labels -> {name: label} 将label提取出来
l = labels[split_name]
# 将数据集拷贝到valid所属⽂件夹中
if i < valid_size:
make_dir([root_path, new_dir, "valid", l])
# 源⽂件路径
os.path.join(root_path, train_path, name),
# 拷贝⽂件路径
os.path.join(root_path, new_dir, "valid", l)
)
else:
make_dir([root_path, new_dir, "train", l])
os.path.join(root_path, train_path, name),
os.path.join(root_path, new_dir, "train", l)
)
# 加⼊完整的训练集中(训练集 + 验证集)
make_dir([root_path, new_dir, "train_and_valid", l]) py(
os.path.join(root_path, train_path, name),
os.path.join(root_path, new_dir, "train_and_valid", l) )
make_dir([root_path, new_dir, "test", "unclass"])
for i in os.listdir(os.path.join(root_path, test_path)): py(
os.path.join(root_path, test_path, i),
os.path.join(root_path, new_dir, "test", "unclass")
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论