使用PyTorch实现猫狗分类

创建日期:2025-02-06
更新日期:2025-02-13

数据集下载:Dogs vs. Cats ~| Kaggle

不同方法准确度

方法轮数准确度
cnn567.64%
cnn1074.92%
cnn1573.42%
cnn2079.28%
cnn2578.28%
vgg16586.5%
vgg161086.98%
vgg161585.42%

cnn.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

num_epochs = 20
batch_size = 50
learning_rate = 0.001

train_size = 25000
indices = torch.randperm(train_size)
train_indices = indices[:20000]
test_indices = indices[20000:]

class DogsVsCatsDataset(Dataset):
    def __init__(self, root, train=True, transform=None):
        super().__init__()
        self.root = root
        self.transform = transform
        self.classes = ["dog", "cat"]

        self.files = []
        self.labels = []

        files = os.listdir(root)

        index = train_indices if train else test_indices
        for i in index:
            file = files[i]
            self.files.append(file)
            if "dog" in file:
                self.labels.append(0)
            else:
                self.labels.append(1)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        path = os.path.join(self.root, self.files[index])
        image = Image.open(path).convert("RGB")
        label = self.labels[index]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(degrees=30),
        # transforms.RandomResizedCrop(
        #     size=224, scale=(0.08, 1.0), ratio=(0.75, 1.33333)
        # ),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ]
)

train_dataset = DogsVsCatsDataset(
    root=".\\data\\Dogs Vs Cats\\train", train=True, transform=transform
)
test_dataset = DogsVsCatsDataset(
    root=".\\data\\Dogs Vs Cats\\train", train=False, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv2d(3, 24, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(24),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.cnn2 = nn.Sequential(
            nn.Conv2d(24, 48, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.cnn3 = nn.Sequential(
            nn.Conv2d(48, 96, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(96),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.cnn4 = nn.Sequential(
            nn.Conv2d(96, 48, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.dropout = nn.Dropout()
        self.line1 = nn.Linear(14 * 14 * 48, 512)
        self.line2 = nn.Linear(512, 2)

    def forward(self, x):
        out = self.cnn1(x)
        out = self.cnn2(out)
        out = self.cnn3(out)
        out = self.cnn4(out)
        out = out.reshape(out.size(0), -1)
        out = self.dropout(out)
        out = self.line1(out)
        out = self.line2(out)
        return out

model = CNNModel().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

model.train()

for epoch in range(num_epochs):
    for i, (image, label) in enumerate(train_loader):
        image = image.to(device)
        label = label.to(device)

        output = model(image)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % batch_size == 0:
            print(
                "Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
                    epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
                )
            )

model.eval()

with torch.no_grad():
    total = 0
    correct = 0

    for image, label in test_loader:
        image = image.to(device)
        label = label.to(device)

        output = model(image)
        _, predict = torch.max(output, 1)

        total += len(label)
        correct += (predict == label).sum().item()

    print(
        "Accuracy of test {} images: {} %".format(
            len(test_dataset), correct / total * 100
        )
    )

vgg16.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import os

num_epochs = 5
batch_size = 10
learning_rate = 0.001

train_size = 25000
indices = torch.randperm(train_size)
train_indices = indices[:20000]
test_indices = indices[20000:]

class DogsVsCatsDataset(Dataset):
    def __init__(self, root, train=True, transform=None):
        super().__init__()
        self.root = root
        self.transform = transform
        self.classes = ["dog", "cat"]

        self.files = []
        self.labels = []

        files = os.listdir(root)

        index = train_indices if train else test_indices
        for i in index:
            file = files[i]
            self.files.append(file)
            if "dog" in file:
                self.labels.append(0)
            else:
                self.labels.append(1)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        path = os.path.join(self.root, self.files[index])
        image = Image.open(path).convert("RGB")
        label = self.labels[index]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose(
    [
        transforms.RandomResizedCrop(size=224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(degrees=30),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ]
)

train_dataset = DogsVsCatsDataset(
    root=".\\data\\Dogs Vs Cats\\train", train=True, transform=transform
)
test_dataset = DogsVsCatsDataset(
    root=".\\data\\Dogs Vs Cats\\train", train=False, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

for param in model.features.parameters():
    param.requires_grad = False

model.classifier[6].out_features = 2

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

model.train()

for epoch in range(num_epochs):
    for i, (image, label) in enumerate(train_loader):
        image = image.to(device)
        label = label.to(device)

        output = model(image)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(
                "Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
                    epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
                )
            )

model.eval()

with torch.no_grad():
    total = 0
    correct = 0

    for image, label in test_loader:
        image = image.to(device)
        label = label.to(device)

        output = model(image)
        _, predict = torch.max(output, 1)

        total += len(label)
        correct += (predict == label).sum().item()

    print(
        "Accuracy of test {} images: {} %".format(
            len(test_dataset), correct / total * 100
        )
    )