#We should know about the detail of softmax, so we use Fashion-Mnist dataset in 3.5 chapter, and we set the #batch_size 256 import torch from IPython import display from d2l import torch as d2l
#we know each image is 28x28, so we can regard them as vectors with length 784. #we recall y_j is the probability of class j with positive number and normalized by 1. #Because we have 10 classes, the output dimension will be 10, weights will form a 784x10 matrix #bias will construct a 1x10 matrix. num_inputs = 784 num_outputs = 10 #也就是说我们的输入x_i代表每个图片,为向量1x784 #所以整体的输入向量也就是nx784,一共有n张图片 #此外我们的权重为784x10,对于每一个输入图片的行向量来说,每一个像素,乘上其对应的行,行中的每一列都代表不同类给的权重。 #最后每一个图片的行向量经过权重运算,都得到一个行向量1x10,再加上偏置1x10,经过exp和归一化运算,就会得到这个图片属于每一个类的概率了。 W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True) b = torch.zeros(num_outputs, requires_grad=True) #这里我们初始化权重为正态分布矩阵,均值0,方差0.01 #偏置初始为0
3.6.2 Define softmax operation
1 2 3 4
# we use sum function to sum along specific dimension X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) X.sum(0, keepdim = True), X.sum(1, keepdim = True) #0得到行向量,1得到列向量
(tensor([[5., 7., 9.]]),
tensor([[ 6.],
[15.]]))
1 2 3 4 5 6 7 8 9 10 11 12
#Recall three steps of softmax: #1. exp #2. sum with every row(each row represents a sample) #3. Divided by normalized constant, ensuring the result to be 1. defsoftmax(X): X_exp = torch.exp(X) partition = X_exp.sum(1, keepdim = True) return X_exp / partition#广播机制#每一行除以每一行的和
#The classification accuracy is the ratio between correct prediction number and the total prediction number #we use argmax to get the max value in each row of y_hat, assigning the index with the max value as predicted class defaccuracy(y_hat, y): #@save """计算预测正确的数量""" iflen(y_hat.shape) > 1and y_hat.shape[1] > 1:#鸡肋 y_hat = y_hat.argmax(axis = 1) cmp = y_hat.type(y.dtype) == y #here transferring number type is to change 0.4, 0.5 into 1, and 0 to 0(false), then we compare this with y, to get a cmp matrix returnfloat(cmp.type(y.dtype).sum())#calculate number of 1
#notice: updater is a common function to update model parameters deftrain_epoch_ch3(net, train_iter, loss, updater): #@save """训练模型一轮""" #将模型设置为训练模式 ifisinstance(net, torch.nn.Module): net.train() #training loss sum, accuracy sum and number of samples metric = Accumulator(3) for X, y in train_iter: #计算梯度并且更新参数 y_hat = net(X) l = loss(y_hat, y) ifisinstance(updater, torch.optim.Optimizer): #使用Pytorch内置的优化器和损失函数 updater.zero_grad()#清除之前计算的梯度。 l.mean().backward()#求梯度 updater.step()#更新参数,比如Gradient descent梯度下降。比如之前定义过的sgd函数,输入为batch_size,也就是下面的 #X.shape[0] else:#那么说使用的是自定义优化器 #使用定制的优化器和损失函数 l.sum().backward() updater(X.shape[0]) metric.add(float(l.sum()), accuracy(y_hat, y), y.numel()) #返回 训练损失 和 训练精度 return metric[0] / metric[2], metric[1] / metric[2]#
#we define a practical programming class Animator to present training function classAnimator: #@save """在动画中绘制数据""" def__init__(self, xlabel = None, ylabel = None, legend = None, xlim = None, ylim = None, xscale = 'linear', yscale = 'linear', fmts=('-', 'm--', 'g--', 'r:'), nrows = 1, ncols = 1, figsize=(3.5, 2.5)): #传入了全部参数 #增量的绘制多条线 if legend isNone: legend = [] d2l.use_svg_display() self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)#行数列数决定了有多少图,并且给出图的大小 if nrows * ncols == 1: self.axes = [self.axes, ] #使用lambda函数捕获参数 self.config_axes = lambda: d2l.set_axes( self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend ) self.X, self.Y, self.fmts = None, None, fmts#fmts为设置线的参数! defadd(self, x, y): #向图表中添加多个数据点 ifnothasattr(y, "__len__"): y = [y] n = len(y) ifnothasattr(x, "__len__"): x = [x] * n#将x和y换成一个长度的列表,使得x对应y,能够一一对应 ifnot self.X: self.X = [[] for _ inrange(n)]#构造一个数组里面的n个数组 ifnot self.Y: self.Y = [[] for _ inrange(n)] for i, (a, b) inenumerate(zip(x, y)): if a isnotNoneand b isnotNone: self.X[i].append(a)#把每个元素当成数组元素加进去 self.Y[i].append(b) self.axes[0].cla()#清空子图中的内容 for x, y, fmt inzip(self.X, self.Y, self.fmts): self.axes[0].plot(x, y, fmt)#使用plot将点画出来 self.config_axes() display.display(self.fig) display.clear_output(wait=True) #Then we realize a training function, which will train a model "net" in the train_iter dataset. #The function will run for many times(assigned by num_epochs). After each training, we evaluate the model by #test_iter dataset. # Finally Animator will be used to display the procession.
#定义损失函数和学习率,也就是那个递降过程中的系数 lr = 0.1 defupdater(batch_size): return d2l.sgd([W, b], lr, batch_size)
1 2 3
#现在我们训练模型10轮,所以轮数和学习率都是可以调整的,叫超参数 num_epochs = 10 train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)#Oh my god it's beautiful!
3.6.7 预测
1 2 3 4 5 6 7 8 9 10 11 12 13
from d2l import torch as d2l defpredict_ch3(net, test_iter, n = 6): #@save """预测标签""" for X, y in test_iter: break#推出循环,只获取第一个批次用来预测! trues = d2l.get_fashion_mnist_labels(y)#得到图像标签,转换为文本标签trues preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))#把横向加一遍,出一个列的结果,当然这里不是加法,是求最大值。#因为net是softmax函数,所以返回结果y是每一个样本的概率向量,而这里也就是找每一行的最大值,得到一个列的结果作为预测概率结果,这里返回的是最大概率对应的类别索引!也就是预测的分类! titles = [true + '\n' + pred for true, pred inzip(trues, preds)]#每一个都有预测结果和真实结果 d2l.show_images( X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n]#n=6也就是前六个样本! ) predict_ch3(net, test_iter) #也就是给定一系列图像,我们将比较它的实际标签(第一行)和模型预测(第二行)