不同方法准确度
方法 | 轮数 | 准确度 |
线性神经网络 | 5 | 96.69% |
线性神经网络 | 10 | 97.77% |
线性神经网络 | 15 | 97.81% |
线性神经网络 | 20 | 98.08% |
卷积神经网络 | 5 | 98.85% |
卷积神经网络 | 10 | 99.27% |
卷积神经网络 | 15 | 99.16% |
卷积神经网络 | 20 | 99.28% |
Visual Transformer | 5 | 97.64% |
Visual Transformer | 10 | 98.39% |
Visual Transformer | 15 | 98.27% |
Visual Transformer | 20 | 98.30% |
线性神经网络(准确度:98.08%)
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms
num_epochs = 20
batch_size = 100
learning_rate = 0.001
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
train_dataset = datasets.MNIST(
root="./data", train=True, transform=transform, download=True
)
test_dataset = datasets.MNIST(
root="./data", train=False, transform=transform, download=True
)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class LinearModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 1000)
self.relu = nn.ReLU()
self.dropout = nn.Dropout()
self.fc2 = nn.Linear(1000, 10)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
return out
model = LinearModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
images = images.reshape(-1, 784)
output = model(images)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print(
"Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
)
)
model.eval()
with torch.no_grad():
total = 0
correct = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
images = images.reshape(-1, 784)
output = model(images)
_, predict = torch.max(output, 1)
total += labels.size(0)
correct += (predict == labels).sum().item()
print("Accuracy of 10000 test images: {} %".format(correct / total * 100))
卷积神经网络(准确度:99.28%)
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms
num_epochs = 20
batch_size = 100
learning_rate = 0.001
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
train_dataset = datasets.MNIST(
root="./data", train=True, transform=transform, download=True
)
test_dataset = datasets.MNIST(
root="./data", train=False, transform=transform, download=True
)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CNNModel(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.dropout = nn.Dropout()
self.fc1 = nn.Linear(7 * 7 * 64, 1000)
self.fc2 = nn.Linear(1000, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.dropout(out)
out = self.fc1(out)
out = self.fc2(out)
return out
model = CNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
output = model(images)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print(
"Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
)
)
model.eval()
with torch.no_grad():
total = 0
correct = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
output = model(images)
_, predict = torch.max(output, 1)
total += labels.size(0)
correct += (predict == labels).sum().item()
print("Accuracy of 10000 test images: {} %".format(correct / total * 100))
Visual Transformer(98.30%)
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms
num_epochs = 20
batch_size = 100
learning_rate = 0.001
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
train_dataset = datasets.MNIST(
root="./data", train=True, transform=transform, download=True
)
test_dataset = datasets.MNIST(
root="./data", train=False, transform=transform, download=True
)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class ViTModel(nn.Module):
def __init__(
self,
image_size=28,
patch_size=7,
in_channels=1,
d_model=128,
n_head=4,
num_layers=3,
num_classes=10,
):
super().__init__()
self.num_patches = (image_size // patch_size) ** 2
self.patch_embed = nn.Conv2d(
in_channels, d_model, kernel_size=patch_size, stride=patch_size
)
self.cls_token = nn.Parameter(torch.randn(1, 1, d_model))
self.pos_embed = nn.Parameter(torch.randn(1, self.num_patches + 1, d_model))
encoder_layer = nn.TransformerEncoderLayer(
d_model=d_model,
nhead=n_head,
dim_feedforward=4 * d_model,
activation="gelu",
batch_first=True,
dropout=0.1,
)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
self.mlp_head = nn.Sequential(
nn.LayerNorm(d_model), nn.Linear(d_model, num_classes)
)
def forward(self, x):
batch_size = x.shape[0]
x = self.patch_embed(x)
x = x.flatten(2).permute(0, 2, 1)
cls_tokens = self.cls_token.repeat(batch_size, 1, 1)
x = torch.cat([cls_tokens, x], dim=1)
x += self.pos_embed
x = self.transformer(x)
return self.mlp_head(x[:, 0])
model = ViTModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
output = model(images)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print(
"Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()
)
)
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")