3.4 softmax regression

#硬性类别：属于某一类别
#软性类别：属于某一类别的概率
#我们用one-hot encoding来分类，one-hot encoding是一个向量，维数等于类别数
#比如我们有猫狗鸡三类，向量为（1,0,0）或(0,1,0)或(0,0,1),当一个动物被分类为鸡时，可以看成(0,0,1)其他两个以此类推
#所以y属于{(1,0,0),{0,1,0},{0,0,1}}

3.4.2 network architecture

1	#下面是公式推导部分，详见现有网址，后续这里补充推导。

3.5 image classification dataset

1	#MNIST数据集是图像分类中最广泛的数据集之一，我们使用更复杂的Fashion-MNIST数据集

%matplotlib inline
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
d2l.use_svg_display()

3.5.1 Loading the dataset

#we download the dataset load it into memory through inner function
#the image will be transformed from PIL to float32 by ToTensor instance
#and divide 255 to give the pixel from 0 to 1
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="../data", transform=trans, train=False, download=True)
#测试集不会用于训练

1 2	#Fashion-MNIST由10个类别的图像组成，每个类别由train dataset中的6000张图像和test dataset中的1000张图像组成 len(mnist_train), len(mnist_test)

(60000, 10000)

1
2

#每个图像的高度和宽度都是28像素，数据集由灰度图像组成，通道数为1。本笔记中用(h, w)表示高宽
mnist_train[0][0].shape#看每张图像的大小，先确定到每个类别，再确定到具体图像

torch.Size([1, 28, 28])

#10个类别
def get_fashion_mnist_labels(labels): #@save
    """返回Fashion-MNIST数据集的文本标签"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag',
                   'ankle boot']
    return [text_labels[int(i)] for i in labels]

#可视化样本
def show_images(imgs, num_rows, num_cols, titles = None, scale = 1.5): #@save
    """绘制图像列表"""
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        if torch.is_tensor(img):
            #图像张量
            ax.imshow(img.numpy())
        else:
            #PIL图像
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        #把坐标轴隐藏起来了
        if titles:
            ax.set_title(titles[i])
    return axes
#下面画出训练集中前几个样本图像及其标签

1
2
3

X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))
#加载数据集中的下一个批次，返回给features和labels。
show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y))

array([<AxesSubplot:title={'center':'ankle boot'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'dress'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'pullover'}>,
       <AxesSubplot:title={'center':'sneaker'}>,
       <AxesSubplot:title={'center':'pullover'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'t-shirt'}>,
       <AxesSubplot:title={'center':'ankle boot'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'sandal'}>,
       <AxesSubplot:title={'center':'sneaker'}>,
       <AxesSubplot:title={'center':'ankle boot'}>,
       <AxesSubplot:title={'center':'trouser'}>,
       <AxesSubplot:title={'center':'t-shirt'}>], dtype=object)

svg

#解释一下上面的flatten()
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2)  # 创建一个2x2的子图网格
axes = axes.flatten()
for ax in axes:
# 遍历展平后的轴对象，并在每个子图上进行操作
    ax.plot([1, 2, 3, 4], [1, 4, 2, 3])  # 在每个子图上绘制一条曲线

plt.show()
#如果不加flatten会是什么情况呢，其实我们这里的axes表示四个图的轴，其实是axes此时是一个2*2的矩阵，我们要先访问每一个轴
#用axes[0][0]表示第一个轴，然后再画：axes.plot([1, 2, 3, 4], [1, 2, 3, 4])

svg

import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2)  # 创建一个2x2的子图网格

# 分别操作每个子图对象
axes[0, 0].plot([1, 2, 3, 4], [1, 4, 2, 3])  # 在第一个子图上绘制一条曲线
axes[0, 0].set_title("Subplot 1")  # 设置第一个子图的标题

Text(0.5, 1.0, 'Subplot 1')

svg

3.5.2 读取小批量

#我们使用内置迭代器进行每次的小批量读取
batch_size = 256
def get_dataloader_workers(): #@save
    """使用4个进程来读取数据"""
    return 4
train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers())

timer = d2l.Timer()
for X, y in train_iter:#直到加载完毕
    continue
f'{timer.stop():.2f} sec'

'2.27 sec'

3.5.3 integrate all components

# Now we define a load_data_fashion_mnist function to obtain and read Fashion-Mnist dataset, the function will return itertools of training set and validation set. In addition, the function will receive an optional
#parameters resize, whichn can reshape the images.
def load_data_fashion_mnist(batch_size, resize=None): #@save
    """下载fashion数据集，and load it into memory"""
    trans = [transforms.ToTensor()]#ToTensor进行从pil变成tensor并且把像素从0-255变成0-1
    if resize:
        trans.insert(0, transforms.Resize(resize))#在列表开头插入一个操作，这是变成64边长的正方形
    trans = transforms.Compose(trans)#Compose进行组合转换，所以trans写成一个列表，可以有多个变换，从前向后一个一个变
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True
    )
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True
    )
    return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()),
            data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=get_dataloader_workers()))

#Then we assign param:resize to test load_data_fashion_mnist function's resizing function.
train_iter, test_iter = load_data_fashion_mnist(32, resize=64)
for X, y in train_iter:
    print(X.shape, X.dtype, y.shape, y.dtype)#For example we exhibit the shape of sample X in the training set
    break#显然，每个批量32个，单通道图片，64大小

torch.Size([32, 1, 64, 64]) torch.float32 torch.Size([32]) torch.int64

#If we reduce batch_size to 1, whether performance will be impacted?
train_iter = data.DataLoader(mnist_train, batch_size=256, shuffle=True,
                             num_workers=get_dataloader_workers())
timer = d2l.Timer()
for X, y in train_iter:
    continue
f'{timer.stop(): .2f} sec'
#可以看出严重影响了性能，如果增加batch_size到1024呢，到1024和2056性能几乎相同，