这是kaggle上一个非常经典的二分类图像数据集,训练集包括25000张猫和狗的图片及其标签,测试集则是12500张未标签图片,数据下载地址https://www.kaggle.com/c/dogs-vs-cats/data。不过这个网址比较远古,无法提交自己训练的答案,可以到新的(虽然也不新了)比赛链接提交https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/overview
将训练数据按类别分开整理成如下结构
|-- train |-- cat |-- 1.jpg |-- 2.jpg |-- ... |-- dog |-- 1.jpg |-- 2.jpg |-- ...
整理好数据后,我们可以直接使用ImageFolder读取并使用random_split()划分数据集验证集
all_data = torchvision.datasets.ImageFolder( root=train_root, transform=train_transform)train_data , vaild_data= torch.utils.data.random_split(all_data,[int(0.8*len(all_data)),len(all_data)-int(0.8*len(all_data)))
复杂的也可以继承datasets类,简单示例
class MyDataset(Dataset): def __init__(self, root, size=229, ): """ Initialize the data producer """ self._root = root self._size = size self._num_image = len(os.listdir(root)) self._img_name = os.listdir(root) def __len__(self): return self._num_image def __getitem__(self, index): img = Image.open(os.path.join(self._root, self._img_name[index])) # PIF image: H × W × C # torch image: C × H × W img = np.array(img, dtype-np.float32).transpose((2, 0, 1)) return img
为防止过拟合,可以对数据进行翻转,亮度,对比度等数据增广
train_transform = transforms.Compose([ transforms.Resize(224), transforms.RandomResizedCrop(224,scale=(0.6,1.0),ratio=(0.8,1.0)), transforms.RandomHorizontalFlip(), torchvision.transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0), torchvision.transforms.ColorJitter(brightness=0, contrast=0.5, saturation=0, hue=0), transforms.ToTensor(), transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])])
加载为pytorch读取的数据集
train_set = torch.utils.data.DataLoader( train_data, batch_size=BTACH_SIZE, shuffle=True)test_set = torch.utils.data.DataLoader( vaild_data, batch_size=BTACH_SIZE, shuffle=False)
这里我们使用残差网络 ResNet50 并且加上全连接层和softmax输出二分类
model = torchvision.models.resnet50(pretrained=True)model.fc = nn.Sequential( nn.Linear(2048,2), nn.softmax())
这里我用0.01的学习率训练了5次就能达到九十多的准确率了,实际上还可以对输出层使用较高的学习率而对其他层使用较低的学习率来达到更好的微调效果
import numpy as npimport torchimport torch.nn as nnimport torch.optim as optimimport torchvisionimport torchvision.transforms as transformsimport torch.nn.functional as Fimport torch.optim as optimfrom torchvision import datasets, transformsimport osimport matplotlib.pyplot as pltfrom tqdm import tqdm#超参DEVICE = torch.device('cuda')LR = 0.001EPOCH = 50BTACH_SIZE = 32train_root = './train'#数据加载及处理train_transform = transforms.Compose([ transforms.Resize(224), transforms.RandomResizedCrop(224,scale=(0.6,1.0),ratio=(0.8,1.0)), transforms.RandomHorizontalFlip(), torchvision.transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0), torchvision.transforms.ColorJitter(brightness=0, contrast=0.5, saturation=0, hue=0), transforms.ToTensor(), transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])])all_data = torchvision.datasets.ImageFolder( root=train_root, transform=train_transform ) train_data , vaild_data= torch.utils.data.random_split(all_data,[int(0.8*len(all_data)),len(all_data)-int(0.8*len(all_data)))train_set = torch.utils.data.DataLoader( train_data, batch_size=BTACH_SIZE, shuffle=True)test_set = torch.utils.data.DataLoader( vaild_data, batch_size=BTACH_SIZE, shuffle=False)#训练和验证cirterion = nn.CrossEntropyLoss()def train(model,device,dataset,optimizer,epoch): model.train() correct = 0 for i,(x,y) in tqdm(enumerate(dataset)): x , y = x.to(device), y.to(device) optimizer.zero_grad() output = model(x) pred = output.max(1,keepdim=True)[1] correct += pred.eq(y.view_as(pred)).sum().item() loss = cirterion(output,y) LOSS.append(loss) loss.backward() optimizer.step() print("Epoch {} Loss {:.4f} Accuracy {}/{} ({:.0f}%)".format(epoch,loss,correct,len(dataset),100*correct/len(dataset)))def vaild(model,device,dataset): model.eval() correct = 0 with torch.no_grad(): for i,(x,y) in tqdm(enumerate(dataset)): x,y = x.to(device) ,y.to(device) output = model(x) loss = nn.CrossEntropyLoss(output,y) pred = output.max(1,keepdim=True)[1] correct += pred.eq(y.view_as(pred)).sum().item() print("Test Loss {:.4f} Accuracy {}/{} ({:.0f}%)".format(loss,correct,len(dataset),100.*correct/len(dataset)))model = torchvision.models.resnet50(pretrained=True)model.fc = nn.Sequential( nn.Linear(2048,2), nn.Softmax() )model.to(DEVICE)optimizer = optim.SGD(model.parameters(), lr = LR, momentum = 0.09)for epoch in range(1,EPOCH+1): train(model,DEVICE,train_set,optimizer,epoch) vaild(model,DEVICE,test_set)