数据集下载:Dogs vs. Cats ~| Kaggle
不同方法准确度
方法 | 轮数 | 准确度 |
cnn | 5 | 67.64% |
cnn | 10 | 74.92% |
cnn | 15 | 73.42% |
cnn | 20 | 79.28% |
cnn | 25 | 78.28% |
vgg16 | 5 | 86.5% |
vgg16 | 10 | 86.98% |
vgg16 | 15 | 85.42% |
cnn.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
num_epochs = 20
batch_size = 50
learning_rate = 0.001
train_size = 25000
indices = torch.randperm(train_size)
train_indices = indices[:20000]
test_indices = indices[20000:]
class DogsVsCatsDataset(Dataset):
def __init__(self, root, train=True, transform=None):
super().__init__()
self.root = root
self.transform = transform
self.classes = ["dog", "cat"]
self.files = []
self.labels = []
files = os.listdir(root)
index = train_indices if train else test_indices
for i in index:
file = files[i]
self.files.append(file)
if "dog" in file:
self.labels.append(0)
else:
self.labels.append(1)
def __len__(self):
return len(self.files)
def __getitem__(self, index):
path = os.path.join(self.root, self.files[index])
image = Image.open(path).convert("RGB")
label = self.labels[index]
if self.transform:
image = self.transform(image)
return image, label
transform = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(degrees=30),
# transforms.RandomResizedCrop(
# size=224, scale=(0.08, 1.0), ratio=(0.75, 1.33333)
# ),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]
)
train_dataset = DogsVsCatsDataset(
root=".\\data\\Dogs Vs Cats\\train", train=True, transform=transform
)
test_dataset = DogsVsCatsDataset(
root=".\\data\\Dogs Vs Cats\\train", train=False, transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CNNModel(nn.Module):
def __init__(self):
super().__init__()
self.cnn1 = nn.Sequential(
nn.Conv2d(3, 24, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(24),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.cnn2 = nn.Sequential(
nn.Conv2d(24, 48, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(48),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.cnn3 = nn.Sequential(
nn.Conv2d(48, 96, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(96),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.cnn4 = nn.Sequential(
nn.Conv2d(96, 48, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(48),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.dropout = nn.Dropout()
self.line1 = nn.Linear(14 * 14 * 48, 512)
self.line2 = nn.Linear(512, 2)
def forward(self, x):
out = self.cnn1(x)
out = self.cnn2(out)
out = self.cnn3(out)
out = self.cnn4(out)
out = out.reshape(out.size(0), -1)
out = self.dropout(out)
out = self.line1(out)
out = self.line2(out)
return out
model = CNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()
for epoch in range(num_epochs):
for i, (image, label) in enumerate(train_loader):
image = image.to(device)
label = label.to(device)
output = model(image)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % batch_size == 0:
print(
"Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
)
)
model.eval()
with torch.no_grad():
total = 0
correct = 0
for image, label in test_loader:
image = image.to(device)
label = label.to(device)
output = model(image)
_, predict = torch.max(output, 1)
total += len(label)
correct += (predict == label).sum().item()
print(
"Accuracy of test {} images: {} %".format(
len(test_dataset), correct / total * 100
)
)
vgg16.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import os
num_epochs = 5
batch_size = 10
learning_rate = 0.001
train_size = 25000
indices = torch.randperm(train_size)
train_indices = indices[:20000]
test_indices = indices[20000:]
class DogsVsCatsDataset(Dataset):
def __init__(self, root, train=True, transform=None):
super().__init__()
self.root = root
self.transform = transform
self.classes = ["dog", "cat"]
self.files = []
self.labels = []
files = os.listdir(root)
index = train_indices if train else test_indices
for i in index:
file = files[i]
self.files.append(file)
if "dog" in file:
self.labels.append(0)
else:
self.labels.append(1)
def __len__(self):
return len(self.files)
def __getitem__(self, index):
path = os.path.join(self.root, self.files[index])
image = Image.open(path).convert("RGB")
label = self.labels[index]
if self.transform:
image = self.transform(image)
return image, label
transform = transforms.Compose(
[
transforms.RandomResizedCrop(size=224),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(degrees=30),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]
)
train_dataset = DogsVsCatsDataset(
root=".\\data\\Dogs Vs Cats\\train", train=True, transform=transform
)
test_dataset = DogsVsCatsDataset(
root=".\\data\\Dogs Vs Cats\\train", train=False, transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
for param in model.features.parameters():
param.requires_grad = False
model.classifier[6].out_features = 2
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()
for epoch in range(num_epochs):
for i, (image, label) in enumerate(train_loader):
image = image.to(device)
label = label.to(device)
output = model(image)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print(
"Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
)
)
model.eval()
with torch.no_grad():
total = 0
correct = 0
for image, label in test_loader:
image = image.to(device)
label = label.to(device)
output = model(image)
_, predict = torch.max(output, 1)
total += len(label)
correct += (predict == label).sum().item()
print(
"Accuracy of test {} images: {} %".format(
len(test_dataset), correct / total * 100
)
)