AlexNet-pytorch

使用pytorch实现AlexNet,并进行cifar-10训练和测试

Sequential

函数torch.nn.Sequentialpytorch提供的顺序容器

CLASS torch.nn.Sequential(*args)

在构造器中添加的模块会按序执行

有两种添加方式,一种是在构造器中按序输入模块,另一种是使用OrderedDict进行构造

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import torch
import torch.nn as nn
from torch.nn.modules.container import *

if __name__ == '__main__':
model = nn.Sequential(
nn.Conv2d(1, 20, 3),
nn.ReLU(),
nn.Conv2d(20, 64, 5),
nn.ReLU()
)

model2 = nn.Sequential(
OrderedDict([
('conv1', nn.Conv2d(1, 20, 3)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20, 64, 5)),
('relu2', nn.ReLU())
])
)

打印如下:

1
2
3
4
5
6
7
8
9
10
11
12
Sequential(
(0): Conv2d(1, 20, kernel_size=(3, 3), stride=(1, 1))
(1): ReLU()
(2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
(3): ReLU()
)
Sequential(
(conv1): Conv2d(1, 20, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
(conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
(relu2): ReLU()
)

可以按下标或键值来获取对应模块

1
2
3
4
5
a = model.__getitem__(1)
c = model2.__getattr__('conv1')
# 输出
ReLU()
Conv2d(1, 20, kernel_size=(3, 3), stride=(1, 1))

自适应平均池化层

自适应平均池化层(adaptive average pool layer)有1-D/2-D/3-D实现

AdaptiveAvgPool对单个输入通道进行操作,不影响数量和通道数,其特点是能够根据输入通道大小和输出通道大小自适应进行平均计算

其具体解释可参考:参考:What is AdaptiveAvgPool2d?

计算过程还挺复杂,目的还是对邻近像素值取平均值

2-D激活图为例

1
2
3
4
5
6
7
8
9
10
11
>>> a = torch.arange(0, 25.).reshape(1,1,5,5)
>>> a
tensor([[[[ 0., 1., 2., 3., 4.],
[ 5., 6., 7., 8., 9.],
[10., 11., 12., 13., 14.],
[15., 16., 17., 18., 19.],
[20., 21., 22., 23., 24.]]]])
>>> m = nn.AdaptiveAvgPool2d((2,2))
>>> m(a)
tensor([[[[ 6., 8.],
[16., 18.]]]])

AlexNet实现

pytorch提供了一个修改后AlexNet实现:vision/torchvision/models/alexnet.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class AlexNet(nn.Module):

def __init__(self, num_classes=1000):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)

def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x

原始AlexNet实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class AlexNet(nn.Module):

def __init__(self, num_classes=1000):
super(AlexNet_Origin, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)

def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x

相比于原实现,修改后的AlexNet网络减少了第一层滤波器个数(96->64)和第四个滤波器个数(384->256),在最后一个卷积层和第一个全连接进行了随机失活操作

训练

操作AlexNet模型以及Pytorch实现,调整参数,训练如下4个网络

  1. AlexNet(无失活)
  2. AlexNet
  3. AlexNet(最后一个卷积层+全连接层随机失活)
  4. pytorch_AlexNet

训练参数如下:

  • 批量大小batch_size=256
  • 迭代次数epochs=300
  • 学习率lr=1e-2
  • 动量momentum=0.9 + Nesterov加速

完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding: utf-8 -*-

# @Time : 19-6-8 下午1:57
# @Author : zj

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import time

# 批量大小
batch_size = 256
# 迭代次数
epochs = 300

# 学习率
lr = 1e-2


class AlexNet(nn.Module):

def __init__(self, num_classes=1000):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),

nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),

nn.Linear(4096, num_classes),
)

def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x


class AlexNet_v2(nn.Module):
...


def load_cifar_10_data(batch_size=128, shuffle=False):
data_dir = '/home/lab305/Documents/data/cifar_10/'
# data_dir = '/home/zj/zj/data/cifar_10/'

transform = transforms.Compose([
transforms.Resize((227, 227)),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

train_data_set = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform)
test_data_set = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transform)

train_loader = DataLoader(train_data_set, batch_size=batch_size, shuffle=shuffle, num_workers=8)
test_loader = DataLoader(test_data_set, batch_size=batch_size, shuffle=shuffle, num_workers=8)

return train_loader, test_loader


def compute_accuracy(loader, net, device):
total_accuracy = 0
num = 0
for item in loader:
data, labels = item
data = data.to(device)
labels = labels.to(device)

scores = net.forward(data)
predicted = torch.argmax(scores, dim=1)
total_accuracy += torch.mean((predicted == labels).float()).item()
num += 1
return total_accuracy / num


if __name__ == '__main__':
train_loader, test_loader = load_cifar_10_data(batch_size=batch_size, shuffle=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

# net = AlexNet(num_classes=10).to(device)
net = AlexNet_v2(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, nesterov=True)

best_train_accuracy = 0.995
best_test_accuracy = 0

loss_list = []
train_list = []
for i in range(epochs):
num = 0
total_loss = 0
start = time.time()
net.train()
for j, item in enumerate(train_loader, 0):
data, labels = item
data = data.to(device)
labels = labels.to(device)

scores = net.forward(data)
loss = criterion.forward(scores, labels)
total_loss += loss.item()

optimer.zero_grad()
loss.backward()
optimer.step()
num += 1
end = time.time()

avg_loss = total_loss / num
loss_list.append(float('%.8f' % avg_loss))
print('epoch: %d time: %.2f loss: %.8f' % (i + 1, end - start, avg_loss))

if i % 20 == 19:
# 计算训练数据集检测精度
net.eval()
train_accuracy = compute_accuracy(train_loader, net, device)
train_list.append(float('%.4f' % train_accuracy))
if best_train_accuracy < train_accuracy:
best_train_accuracy = train_accuracy

test_accuracy = compute_accuracy(test_loader, net, device)
if best_test_accuracy < test_accuracy:
best_test_accuracy = test_accuracy

print('best train accuracy: %.2f %% best test accuracy: %.2f %%' % (
best_train_accuracy * 100, best_test_accuracy * 100))
print(loss_list)
print(train_list)

测试

300次训练完成后测试结果如下:

训练精度测试精度
AlexNet(无失活)100.00 %80.24 %
AlexNet100.00 %84.48 %
AlexNet (最后一个卷积层+全连接层随机失活)100.00 %87.00 %
pytorch_AlexNet100.00 %86.76 %

损失值和训练集精度如下:

从测试结果看到,pytorch提供的AlexNet模型能够提高泛化能力,同时自定义的AlexNet模型(最后一个卷积层+全连接层随机失活)能够实现最好的测试精度