[PyTorch]Tensorboard使用实践

学习了PyTorch环境下的Tensorboard使用 - [PyTorch]Tensorboard可视化实现PyTorch也提供了Tensorboard学习教程 - Visualizing Models, Data, and Training with TensorBoard

下面结合一个完整的训练过程,通过Tensorboard实现可视化

示例

利用LeNet-5模型训练并测试Fashion-MNIST,训练参数如下:

  • 批量大小:256
  • 学习率:1e-3
  • 动量:0.9
  • 迭代次数:50

操作流程如下:

  1. 加载训练集,新建模型,损失器和优化器,转换数据和模型到GPU
  2. 迭代数据集训练网络,每轮完成训练后计算损失值,训练集精度和测试集精度
  3. 绘制损失图和精度图

完整代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# -*- coding: utf-8 -*-

"""
@author: zj
@file: tensorboard-fashion-mnist.py
@time: 2019-12-11
"""

import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
import torchvision.utils

learning_rate = 1e-3
moment = 0.9
epoches = 50
bsize = 256

# constant for classes
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')


def load_data(bsize):
# transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])

# datasets
trainset = torchvision.datasets.FashionMNIST('./data',
download=True,
train=True,
transform=transform)
testset = torchvision.datasets.FashionMNIST('./data',
download=True,
train=False,
transform=transform)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bsize,
shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=bsize,
shuffle=False, num_workers=2)
return trainloader, testloader


class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 4 * 4, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 4 * 4)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x


def compute_accuracy(loader, net, device):
total_accu = 0.0
num = 0

for i, data in enumerate(loader, 0):
inputs, labels = data[0].to(device), data[1].to(device)

outputs = net.forward(inputs)
predicted = torch.argmax(outputs, dim=1)
total_accu += torch.mean((predicted == labels).float()).item()
num += 1
return total_accu / num


def draw(values, xlabel, ylabel, title, label):
fig = plt.figure()
plt.plot(list(range(len(values))), values, label=label)

plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)

plt.legend()
plt.show()


def train(trainloader, testloader, net, criterion, optimizer, device):
train_accu_list = list()
test_accu_list = list()
loss_list = list()

for epoch in range(epoches): # loop over the dataset multiple times
num = 0
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device)

# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

running_loss += loss.item()
num += 1
# 每轮迭代完成后,记录损失值,计算训练集和测试集的检测精度
avg_loss = running_loss / num
print('[%d] loss: %.4f' % (epoch + 1, avg_loss))
loss_list.append(avg_loss)

train_accu = compute_accuracy(trainloader, net, device)
test_accu = compute_accuracy(testloader, net, device)
print('train: %.4f, test: %.4f' % (train_accu, test_accu))
train_accu_list.append(train_accu)
test_accu_list.append(test_accu)

print('Finished Training')
return train_accu_list, test_accu_list, loss_list


if __name__ == '__main__':
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

net = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=moment)

trainloader, testloader = load_data(bsize)

train_accu_list, test_accu_list, loss_list = train(trainloader, testloader, net, criterion, optimizer, device)

draw(train_accu_list, 'epoch', 'accuracy', 'train accuracy', 'fashion-mnist')
draw(test_accu_list, 'epoch', 'accuracy', 'test accuracy', 'fashion-mnist')
draw(loss_list, 'epoch', 'loss_value', 'loss', 'fashion-mnist')

Tensorboard实践

实现流程如下:

  1. 启动Tensorboard
  2. 写入样本图像
  3. 写入模型
  4. 高维特征投影
  5. 追踪训练过程

启动Tensorboard

1
2
3
4
from torch.utils.tensorboard import SummaryWriter

# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/fashion_mnist_experiment_1')

打开新的命令行窗口,在同一路径下输入命令:

1
$ tensorboard --logdir=runs --host=192.168.0.112 --port=7878

打开浏览器,输入192.168.0.112:7878,即可打开Tensorboard

写入样本图像

修改数据加载函数,分离转换器,以便能偶加载未标准化的数据集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def load_data(bsize, tf=None):
# datasets
trainset = torchvision.datasets.FashionMNIST('./data',
download=True,
train=True,
transform=tf)
testset = torchvision.datasets.FashionMNIST('./data',
download=True,
train=False,
transform=tf)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bsize,
shuffle=True, num_workers=4)

testloader = torch.utils.data.DataLoader(testset, batch_size=bsize,
shuffle=False, num_workers=4)
return trainloader, testloader

加载数据集,写入图像。torchvision提供了函数make_gridTensor数组转换成单个图像([64, 1, 28, 28] -> [3, 242, 242]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
transform = transforms.Compose(
[transforms.ToTensor()])

trainloader, testloader = load_data(64, tf=transform)
print(trainloader)

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.__next__()
print(images.size())

# create grid of images
img_grid = torchvision.utils.make_grid(images)
print(img_grid.size())

# write to tensorboard
writer.add_image('fashion_mnist_images', img_grid)
writer.close()

打开Tensorboard IMAGES页面,选择fashion_mnist_images标签的图像

写入模型

1
2
3
4
net = Net()

writer.add_graph(net, images)
writer.close()

打开Tensorboard GRAPHS页面,在右侧类别Runs中选择当前写入的文件fashion-mnist-lenet5

高维特征投影

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# constant for classes
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.__next__()
print(images.size())

# select random images and their target indices
# images, labels = select_n_random(trainset.data, trainset.targets)

# get the class labels for each image
class_labels = [classes[lab] for lab in labels]

# log embeddings
features = images.view(-1, 28 * 28)
print(features.size())
writer.add_embedding(features,
metadata=class_labels,
label_img=images)
writer.close()

随机提取批量大小数据集,转换成向量数组,输入add_embedding函数中

打开Tensorboard GRAPHS页面,在右侧类别Runs中选择当前写入的文件fashion-mnist-lenet5,可在右下角选择不同的投影规则(默认PCA

追踪训练过程

每轮迭代完成后,计算其损失值,训练集和测试集精度值,输入到add_scalar(s)函数中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
for epoch in range(epoches):  # loop over the dataset multiple times
num = 0
running_loss = 0.0
for i, data in enumerate(trainloader, 0):

# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device)

# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

running_loss += loss.item()
num += 1
# 每轮迭代完成后,记录损失值,计算训练集和测试集的检测精度
avg_loss = running_loss/num
print('[%d] loss: %.4f' % (epoch+1, avg_loss))

train_accu = compute_accuracy(trainloader, net, device)
test_accu = compute_accuracy(testloader, net, device)
print('train: %.4f, test: %.4f' % (train_accu, test_accu))

# 添加损失值
writer.add_scalar("training loss", avg_loss, epoch)

# 添加训练集和测试集精度
writer.add_scalars("training accurancy", {'loss': avg_loss,
'train_accu': train_accu,
'test_accu': test_accu}, epoch)

print('Finished Training')

打开Tensorboard SCALARS页面,在右下角选择类别