3.3 线性回归的简洁实现

import numpy as np
import torch
from torch.utils import data#新导入一个data包
from d2l import torch as d2l

3.3.1 生成数据集

1
2
3

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)#生成1000行数据，给出真实的特征和最后的结果

3.3.2 读取数据集

#we use the api to read data and we apply the features and labels as parameters of api, assigning batch_size by 
#data_iter
def load_array(data_arrays, batch_size, is_train=True): #@
    #is_train represents whether the data will be shuffled in every training time
    """构造一个pyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

1
2
3

batch_size = 10
data_iter = load_array((features, labels), batch_size)
#也就是说我们不用自己定义data_iter了，直接调用load_array加载数据集

1
2
3

#then we use iter to construct the python itertool.
next(iter(data_iter))
#所以说DataLoader函数就是作了一个打包，没有别的东西

[tensor([[ 0.7377,  0.3250],
         [ 1.1965, -1.4244],
         [ 0.9153, -0.2110],
         [-1.3975,  2.0740],
         [-2.0558,  0.2441],
         [ 0.1011, -0.1195],
         [ 0.8557, -0.6434],
         [ 1.0096,  1.4099],
         [-0.7929,  0.9482],
         [ 1.3263, -1.5974]]),
 tensor([[ 4.5658],
         [11.4182],
         [ 6.7555],
         [-5.6483],
         [-0.7439],
         [ 4.7911],
         [ 8.1009],
         [ 1.4290],
         [-0.6099],
         [12.2757]])]

3.3.3 定义模型

from torch import nn#nn namely neural network

net = nn.Sequential(nn.Linear(2, 1))
net

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)

3.3.4 initialize model parameters

#Firstly we need to initialize model parameters, like weight and bias. Here we assign every weight from average 
#value 0 and standard variance 0.01 normal distributions.
#bias  = 0
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
#finish setting params

tensor([0.])

3.3.5 define loss function

1
2
3

#we apply class MSELoss to calculate mediate square error, which is also called square L2 parameter.
#In default, it returns average value of all samples.
loss = nn.MSELoss()

3.3.6 define optimization method

1 2	#In module optim of PyTorch, it realize many variants of SGD（small group random gradient descent） trainer = torch.optim.SGD(net.parameters(), lr = 0.03)

3.3.7 Train

num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)#net(X)生成预测并且计算损失l
        trainer.zero_grad()
        l.backward()#反向传播计算梯度，对两个参数求导，使其误差最终最小
        trainer.step()
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')#输出每一轮损失

epoch 1, loss 0.000159
epoch 2, loss 0.000099
epoch 3, loss 0.000101

#下面我们比较生成数据集的 真实参数 和 通过有限数据训练获得的 模型参数。要访问参数我首先从net访问所需要的层，然后读取该层的权重和偏置。
w = net[0].weight.data
print("w的估计误差：", true_w - w.reshape(true_w.shape))#真实值减去我们优化的结果
b = net[0].bias.data
print("b的估计误差: ", true_b - b)

w的估计误差： tensor([-0.0011,  0.0004])
b的估计误差:  tensor([0.0007])

1 2	#如果用Huber损失代替原损失，我们可以用torch.nn自带的函数 loss1 = torch.nn.SmoothL1Loss()

#也可以自己写如下
import torch.nn as nn
import torch.nn.functional as F
class HuberLoss(nn.Module):
    def __init__(self, sigma):
        super(HuberLoss, self).__init__()
        self.sigma = sigma
    def forward(self, y, y_hat):
        if F.l1_loss(y, y_hat) > self.sigma:
            loss = F.l1_loss(y, y_hat) - self.sigma / 2
        else:
            loss = (1 / (2 * self.sigma)) * F.mse_loss(y, y_hat)
        return loss

我们来解释一下Huber损失的数学公式

$l(y, y') = \left\{ \begin{aligned} &|y - y'| - \sigma / 2, 若|y - y'| > \sigma\\ &\frac{1}{2\sigma}(y - y')^2, 其他情况\\ \end{aligned} \right.$

#如何访问线性回归的梯度
net[0].weight.grad
net[0].weight.grad
#只有一层，假如是多层网络，可以用一个self.xxx=某层，然后在外面通过net.xxx.weight.grad和net.xxx.bias.grad把梯度给拿出来。

tensor([[0.0014, 0.0027]])