3.3 线性回归的简洁实现

1
2
3
4
import numpy as np
import torch
from torch.utils import data#新导入一个data包
from d2l import torch as d2l

3.3.1 生成数据集

1
2
3
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)#生成1000行数据,给出真实的特征和最后的结果

3.3.2 读取数据集

1
2
3
4
5
6
7
#we use the api to read data and we apply the features and labels as parameters of api, assigning batch_size by 
#data_iter
def load_array(data_arrays, batch_size, is_train=True): #@
#is_train represents whether the data will be shuffled in every training time
"""构造一个pyTorch数据迭代器"""
dataset = data.TensorDataset(*data_arrays)
return data.DataLoader(dataset, batch_size, shuffle=is_train)
1
2
3
batch_size = 10
data_iter = load_array((features, labels), batch_size)
#也就是说我们不用自己定义data_iter了,直接调用load_array加载数据集
1
2
3
#then we use iter to construct the python itertool.
next(iter(data_iter))
#所以说DataLoader函数就是作了一个打包,没有别的东西
[tensor([[ 0.7377,  0.3250],
         [ 1.1965, -1.4244],
         [ 0.9153, -0.2110],
         [-1.3975,  2.0740],
         [-2.0558,  0.2441],
         [ 0.1011, -0.1195],
         [ 0.8557, -0.6434],
         [ 1.0096,  1.4099],
         [-0.7929,  0.9482],
         [ 1.3263, -1.5974]]),
 tensor([[ 4.5658],
         [11.4182],
         [ 6.7555],
         [-5.6483],
         [-0.7439],
         [ 4.7911],
         [ 8.1009],
         [ 1.4290],
         [-0.6099],
         [12.2757]])]

3.3.3 定义模型

1
2
3
4
from torch import nn#nn namely neural network

net = nn.Sequential(nn.Linear(2, 1))
net
Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)

3.3.4 initialize model parameters

1
2
3
4
5
6
#Firstly we need to initialize model parameters, like weight and bias. Here we assign every weight from average 
#value 0 and standard variance 0.01 normal distributions.
#bias = 0
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
#finish setting params
tensor([0.])

3.3.5 define loss function

1
2
3
#we apply class MSELoss to calculate mediate square error, which is also called square L2 parameter.
#In default, it returns average value of all samples.
loss = nn.MSELoss()

3.3.6 define optimization method

1
2
#In module optim of PyTorch, it realize many variants of SGD(small group random gradient descent)
trainer = torch.optim.SGD(net.parameters(), lr = 0.03)

3.3.7 Train

1
2
3
4
5
6
7
8
9
num_epochs = 3
for epoch in range(num_epochs):
for X, y in data_iter:
l = loss(net(X), y)#net(X)生成预测并且计算损失l
trainer.zero_grad()
l.backward()#反向传播计算梯度,对两个参数求导,使其误差最终最小
trainer.step()
l = loss(net(features), labels)
print(f'epoch {epoch + 1}, loss {l:f}')#输出每一轮损失
epoch 1, loss 0.000159
epoch 2, loss 0.000099
epoch 3, loss 0.000101
1
2
3
4
5
#下面我们比较生成数据集的 真实参数 和 通过有限数据训练获得的 模型参数。要访问参数我首先从net访问所需要的层,然后读取该层的权重和偏置。
w = net[0].weight.data
print("w的估计误差:", true_w - w.reshape(true_w.shape))#真实值减去我们优化的结果
b = net[0].bias.data
print("b的估计误差: ", true_b - b)
w的估计误差: tensor([-0.0011,  0.0004])
b的估计误差:  tensor([0.0007])
1
2
#如果用Huber损失代替原损失,我们可以用torch.nn自带的函数
loss1 = torch.nn.SmoothL1Loss()
1
2
3
4
5
6
7
8
9
10
11
12
13
#也可以自己写如下
import torch.nn as nn
import torch.nn.functional as F
class HuberLoss(nn.Module):
def __init__(self, sigma):
super(HuberLoss, self).__init__()
self.sigma = sigma
def forward(self, y, y_hat):
if F.l1_loss(y, y_hat) > self.sigma:
loss = F.l1_loss(y, y_hat) - self.sigma / 2
else:
loss = (1 / (2 * self.sigma)) * F.mse_loss(y, y_hat)
return loss

我们来解释一下Huber损失的数学公式

1
2
3
4
#如何访问线性回归的梯度
net[0].weight.grad
net[0].weight.grad
#只有一层,假如是多层网络,可以用一个self.xxx=某层,然后在外面通过net.xxx.weight.grad和net.xxx.bias.grad把梯度给拿出来。
tensor([[0.0014, 0.0027]])