3.4 softmax regression

1
2
3
4
5
#硬性类别:属于某一类别
#软性类别:属于某一类别的概率
#我们用one-hot encoding来分类,one-hot encoding是一个向量,维数等于类别数
#比如我们有猫狗鸡三类,向量为(1,0,0)或(0,1,0)或(0,0,1),当一个动物被分类为鸡时,可以看成(0,0,1)其他两个以此类推
#所以y属于{(1,0,0),{0,1,0},{0,0,1}}

3.4.2 network architecture

1
#下面是公式推导部分,详见现有网址,后续这里补充推导。

3.5 image classification dataset

1
#MNIST数据集是图像分类中最广泛的数据集之一,我们使用更复杂的Fashion-MNIST数据集
1
2
3
4
5
6
7
%matplotlib inline
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
d2l.use_svg_display()

3.5.1 Loading the dataset

1
2
3
4
5
6
7
#we download the dataset load it into memory through inner function
#the image will be transformed from PIL to float32 by ToTensor instance
#and divide 255 to give the pixel from 0 to 1
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="../data", transform=trans, train=False, download=True)
#测试集不会用于训练
1
2
#Fashion-MNIST由10个类别的图像组成,每个类别由train dataset中的6000张图像和test dataset中的1000张图像组成
len(mnist_train), len(mnist_test)
(60000, 10000)
1
2
#每个图像的高度和宽度都是28像素,数据集由灰度图像组成,通道数为1。本笔记中用(h, w)表示高宽
mnist_train[0][0].shape#看每张图像的大小,先确定到每个类别,再确定到具体图像
torch.Size([1, 28, 28])
1
2
3
4
5
6
#10个类别
def get_fashion_mnist_labels(labels): #@save
"""返回Fashion-MNIST数据集的文本标签"""
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag',
'ankle boot']
return [text_labels[int(i)] for i in labels]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#可视化样本
def show_images(imgs, num_rows, num_cols, titles = None, scale = 1.5): #@save
"""绘制图像列表"""
figsize = (num_cols * scale, num_rows * scale)
_, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
#图像张量
ax.imshow(img.numpy())
else:
#PIL图像
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
#把坐标轴隐藏起来了
if titles:
ax.set_title(titles[i])
return axes
#下面画出训练集中前几个样本图像及其标签
1
2
3
X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))
#加载数据集中的下一个批次,返回给features和labels。
show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y))
array([<AxesSubplot:title={'center':'ankle boot'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'dress'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'pullover'}>,
       <AxesSubplot:title={'center':'sneaker'}>,
       <AxesSubplot:title={'center':'pullover'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'ankle boot'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'sneaker'}>,
       <AxesSubplot:title={'center':'ankle boot'}>,
       <AxesSubplot:title={'center':'trouser'}>,
       <AxesSubplot:title={'center':'t-shirt'}>], dtype=object)

svg

1
2
3
4
5
6
7
8
9
10
11
12
#解释一下上面的flatten()
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2) # 创建一个2x2的子图网格
axes = axes.flatten()
for ax in axes:
# 遍历展平后的轴对象,并在每个子图上进行操作
ax.plot([1, 2, 3, 4], [1, 4, 2, 3]) # 在每个子图上绘制一条曲线

plt.show()
#如果不加flatten会是什么情况呢,其实我们这里的axes表示四个图的轴,其实是axes此时是一个2*2的矩阵,我们要先访问每一个轴
#用axes[0][0]表示第一个轴,然后再画:axes.plot([1, 2, 3, 4], [1, 2, 3, 4])

svg

1
2
3
4
5
6
7
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2) # 创建一个2x2的子图网格

# 分别操作每个子图对象
axes[0, 0].plot([1, 2, 3, 4], [1, 4, 2, 3]) # 在第一个子图上绘制一条曲线
axes[0, 0].set_title("Subplot 1") # 设置第一个子图的标题
Text(0.5, 1.0, 'Subplot 1')

svg

3.5.2 读取小批量

1
2
3
4
5
6
#我们使用内置迭代器进行每次的小批量读取
batch_size = 256
def get_dataloader_workers(): #@save
"""使用4个进程来读取数据"""
return 4
train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers())
1
2
3
4
timer = d2l.Timer()
for X, y in train_iter:#直到加载完毕
continue
f'{timer.stop():.2f} sec'
'2.27 sec'

3.5.3 integrate all components

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Now we define a load_data_fashion_mnist function to obtain and read Fashion-Mnist dataset, the function will return itertools of training set and validation set. In addition, the function will receive an optional
#parameters resize, whichn can reshape the images.
def load_data_fashion_mnist(batch_size, resize=None): #@save
"""下载fashion数据集,and load it into memory"""
trans = [transforms.ToTensor()]#ToTensor进行从pil变成tensor并且把像素从0-255变成0-1
if resize:
trans.insert(0, transforms.Resize(resize))#在列表开头插入一个操作,这是变成64边长的正方形
trans = transforms.Compose(trans)#Compose进行组合转换,所以trans写成一个列表,可以有多个变换,从前向后一个一个变
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=True
)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=True
)
return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()),
data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=get_dataloader_workers()))

1
2
3
4
5
#Then we assign param:resize to test load_data_fashion_mnist function's resizing function.
train_iter, test_iter = load_data_fashion_mnist(32, resize=64)
for X, y in train_iter:
print(X.shape, X.dtype, y.shape, y.dtype)#For example we exhibit the shape of sample X in the training set
break#显然,每个批量32个,单通道图片,64大小
torch.Size([32, 1, 64, 64]) torch.float32 torch.Size([32]) torch.int64
1
2
3
4
5
6
7
8
#If we reduce batch_size to 1, whether performance will be impacted?
train_iter = data.DataLoader(mnist_train, batch_size=256, shuffle=True,
num_workers=get_dataloader_workers())
timer = d2l.Timer()
for X, y in train_iter:
continue
f'{timer.stop(): .2f} sec'
#可以看出严重影响了性能,如果增加batch_size到1024呢,到1024和2056性能几乎相同,
' 1.92 sec'