摘要:下面是一个使用PyTorch构建的CIFAR-10图像分类模型,通过深度卷积神经网络(CNN)和多种优化技术达到接近90%的准确率(在CIFAR-10数据集上接近1的准确率实际上是指接近人类水平识别精度,约94-96%,但完全达到100%几乎不可能)。
构建CIFAR-10图像分类模型(PyTorch实现)
下面是一个使用PyTorch构建的CIFAR-10图像分类模型,通过深度卷积神经网络(CNN)和多种优化技术达到接近90%的准确率(在CIFAR-10数据集上接近1的准确率实际上是指接近人类水平识别精度,约94-96%,但完全达到100%几乎不可能)。
python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
import time
# 检查GPU可用性
device = torch.device("cuda:0" if torch.cuda.is_available else "cpu")
print(f"Using device: {device}")
# 数据预处理和增强
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip,
transforms.RandomRotation(15),
transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
transforms.ToTensor,
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor,
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# 加载CIFAR-10数据集
trainset = torchvision.datasets.CIFAR10(
root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=128, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(
root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
testset, batch_size=100, shuffle=False, num_workers=2)
# 定义类别名称
classes = ('plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck')
# 定义CNN模型
class CIFAR10Model(nn.Module):
def __init__(self):
super(CIFAR10Model, self).__init__
self.features = nn.Sequential(
# 输入: 3x32x32
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=2, stride=2), # 输出: 64x16x16
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.MaxPool2d(kernel_size=2, stride=2), # 输出: 128x8x8
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.MaxPool2d(kernel_size=2, stride=2), # 输出: 256x4x4
)
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(256*4*4, 1024),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(1024, 512),
nn.Linear(512, 10)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
# 实例化模型
model = CIFAR10Model.to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss
optimizer = optim.AdamW(model.parameters, lr=0.001, weight_decay=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)
# 训练函数
def train(model, trainloader, criterion, optimizer, epoch):
model.train
running_loss = 0.0
correct = 0
total = 0
start_time = time.time
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, targets)
# 反向传播和优化
optimizer.zero_grad
loss.backward
optimizer.step
# 统计信息
running_loss += loss.item
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum.item
if batch_idx % 100 == 99:
print(f'Epoch: {epoch+1}, Batch: {batch_idx+1}/{len(trainloader)}, '
f'Loss: {running_loss/(batch_idx+1):.4f}, '
f'Acc: {100.*correct/total:.2f}%')
epoch_loss = running_loss / len(trainloader)
epoch_acc = 100. * correct / total
epoch_time = time.time - start_time
print(f'Epoch {epoch+1} completed in {epoch_time:.2f}s: '
f'Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
return epoch_loss, epoch_acc
# 测试函数
def test(model, testloader, criterion):
model.eval
running_loss = 0.0
correct = 0
total = 0
with torch.no_grad:
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
test_loss = running_loss / len(testloader)
test_acc = 100. * correct / total
print(f'Test set: Loss: {test_loss:.4f}, Accuracy: {test_acc:.2f}%')
return test_loss, test_acc
# 训练模型
train_losses =
train_accs =
test_losses =
test_accs =
best_acc = 0.0
num_epochs = 100
print(f"Starting training for {num_epochs} epochs...")
for epoch in range(num_epochs):
train_loss, train_acc = train(model, trainloader, criterion, optimizer, epoch)
test_loss, test_acc = test(model, testloader, criterion)
# 更新学习率
scheduler.step(test_acc)
# 保存最佳模型
if test_acc > best_acc:
best_acc = test_acc
torch.save(model.state_dict, 'cifar10_best_model.pth')
print(f'Saved new best model with accuracy: {best_acc:.2f}%')
# 记录训练过程
train_losses.append(train_loss)
train_accs.append(train_acc)
test_losses.append(test_loss)
test_accs.append(test_acc)
print(f'Finished Training! Best test accuracy: {best_acc:.2f}%')
# 可视化训练过程
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Test Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend
来源:老客数据一点号