[PyTorch]ZFNet vs AlexNet

ZFNetAlexNet进行了调整,使用更小的滤波器取得更大的特征提取能力。使用PyTorch进行测试

AlexNet推导

参考:AlexNet

AlexNet包含5个卷积层+3个池化层+3个全连接层,其结构如下:

输入大小滤波器大小步长零填充滤波器个数输出大小
CONV1227x227x311x11x3409655x55x96
POOL255x55x963x32/9627x27x96
CONV327x27x965x5x961225627x27x256
POOL427x27x2563x32/25613x13x256
CONV513x13x2563x3x2561138413x13x384
CONV613x13x3843x3x3841138413x13x384
CONV713x13x3843x3x3841125613x13x256
POOL813x13x2563x32/2566x6x256
FC91x1x92161x1//40961x1x4096
FC101x1x40961x1//40961x1x4096
FC111x1x40961x1//10001x1x1000

注意:论文中输入为\(224\times 224\),实际操作为\(227\times 227\)

ZFNet推导

ZFNetAlexNet进行了如下调整:

  1. 第一层的滤波器大小修改为7x7
  2. 第一和第二个卷积层的步长修改为2

Note:第二个卷积层的零填充修改为1

其网络结构如下所示:

输入大小滤波器大小步长零填充滤波器个数输出大小
CONV1227x227x37x7x32096111x111x96
POOL2111x111x963x32/9655x55x96
CONV355x55x965x5x962125627x27x256
POOL427x27x2563x31/25613x13x256
CONV513x13x2563x3x2561138413x13x384
CONV613x13x3843x3x3841138413x13x384
CONV713x13x3843x3x3841125613x13x256
POOL813x13x2563x32/2566x6x256
FC91x1x92161x1//40961x1x4096
FC101x1x40961x1//40961x1x4096
FC111x1x40961x1//10001x1x1000

注意:论文中输入为\(224\times 224\),实际操作为\(227\times 227\)

PyTorch实现

PyTorch提供了AlexNet实现,并对其进行了部分修改:

  1. 减少了第一个卷积层滤波器个数(96->64)、第二个卷积层滤波器个数(256->192)和第四个卷积层滤波器个数(384->256
  2. 最后一个卷积层和第一个全连接进行了随机失活操作

其实现代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class AlexNet(nn.Module):

def __init__(self, num_classes=1000):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)

def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x

ZFNetPyTorch提供的AlexNet代码上进行修改,实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class ZFNet(nn.Module):

def __init__(self, num_classes=1000):
super(ZFNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, stride=2, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)

def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x

VOC 2007

使用Pascal VOC 2007数据集进行分类,其实现参考create_voc_train_val.py

训练参数

  1. 优化器:Adam,学习率为1e-3
  2. 按步长衰减,每隔15轮衰减一次,衰减因子=0.1,共训练50
  3. 损失函数:Log Loss
  4. 数据:缩放到227x22750%几率水平翻转,归一化到0.5均值,0.5方差

实现代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-

import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import torchvision

import models.zfnet as zfnet
import utils.util as util

data_root_dir = '../data/train_val/'
model_dir = '../data/models/'


def load_data(root_dir):
transform = transforms.Compose([
transforms.Resize((227, 227)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

data_loaders = {}
dataset_sizes = {}
for phase in ['train', 'val']:
phase_dir = os.path.join(root_dir, phase)

data_set = ImageFolder(phase_dir, transform=transform)
data_loader = DataLoader(data_set, batch_size=128, shuffle=True, num_workers=8)

data_loaders[phase] = data_loader
dataset_sizes[phase] = len(data_set)

return data_loaders, dataset_sizes


def train_model(model, criterion, optimizer, scheduler, dataset_sizes, dataloaders, num_epochs=25, device=None):
since = time.time()

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

loss_dict = {'train': [], 'val': []}
acc_dict = {'train': [], 'val': []}
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)

# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode

running_loss = 0.0
running_corrects = 0

# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)

# zero the parameter gradients
optimizer.zero_grad()

# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)

# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()

# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()

epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
loss_dict[phase].append(epoch_loss)
acc_dict[phase].append(epoch_acc)

print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))

# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())

print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
model.load_state_dict(best_model_wts)
return model, loss_dict, acc_dict


if __name__ == '__main__':
data_loaders, data_sizes = load_data(data_root_dir)
print(data_sizes)

res_loss = dict()
res_acc = dict()
for name in ['alexnet', 'zfnet']:
if name == 'alexnet':
model = torchvision.models.AlexNet(num_classes=20)
else:
model = zfnet.ZFNet(num_classes=20)
device = util.get_device()
model = model.to(device)

criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)

best_model, loss_dict, acc_dict = train_model(model, criterion, optimizer, lr_scheduler, data_sizes,
data_loaders, num_epochs=50,
device=device)
# 保存最好的模型参数
util.check_dir(model_dir)
torch.save(best_model.state_dict(), os.path.join(model_dir, '%s.pth' % name))

res_loss[name] = loss_dict
res_acc[name] = acc_dict

print('train %s done' % name)
print()

util.save_png('loss', res_loss)
util.save_png('acc', res_acc)

训练结果

训练集共6301张,测试集共6307张,训练及验证结果如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
$ python classifier.py 
{'train': 6301, 'val': 6307}
Epoch 0/49
----------
train Loss: 2.9600 Acc: 0.2633
val Loss: 2.4553 Acc: 0.3697
。。。
。。。
Epoch 48/49
----------
train Loss: 1.3090 Acc: 0.6051
val Loss: 1.6987 Acc: 0.5156

Epoch 49/49
----------
train Loss: 1.3043 Acc: 0.6063
val Loss: 1.7111 Acc: 0.5175

Training complete in 7m 3s
Best val Acc: 0.519264
train alexnet done

Epoch 0/49
----------
train Loss: 2.6546 Acc: 0.3379
val Loss: 2.4400 Acc: 0.3697
。。。
。。。
Epoch 49/49
----------
train Loss: 0.9715 Acc: 0.6964
val Loss: 1.5378 Acc: 0.5698

Training complete in 8m 50s
Best val Acc: 0.575710
train zfnet done

小结

从结果可知,50轮训练后AlexNet最好的验证精度是51.93%,而ZFNet最好的验证精度是57.57%。进一步验证了小滤波器能够得到更好的特征提取能力

相关阅读