import math features = np.random.normal(size=(n_train + n_test, 1))#generate column vector randomly with normal distribution np.random.shuffle(features) #数组中元素随机打乱次序
for i inrange(max_degree):#显然有20列,对于每一列 poly_features[:, i] /= math.gamma(i + 1) #gamma(n) = (n-1)!也就是让每一个幂次除以对应的次数i的阶乘 #labels的维度:(n_train+n_test) labels = np.dot(poly_features, true_w)#2维dot1维,namely each vector in poly_features is to dot true_w. That is #to multiply the coefficient of polynomail to each term respectively.
import torch #Numpy ndarray转换为tensor true_w, features, poly_features, labels = [torch.tensor(x, dtype=torch.float32) for x in [true_w, features, poly_features, labels]] #true_w is the coefficient vector of polynomail #features is 2维列向量,取的样本x #poly_fetures is just different power of each x, the different power is in each column, from 0 to 19 #labels is the result of polynomail working on x, and the result is a row vector. true_w, features[:2], poly_features[:2, :], labels[:2]#也就是对向量直接转,并不改变维数
defevaluate_loss(net, data_iter, loss): #@save """评估给定数据集上模型的损失""" metric = d2l.Accumulator(2) #损失的总和,样本数量 for X, y in data_iter: out = net(X)#经过模型之后和y做比对,评估损失 y = y.reshape(out.shape) l = loss(out, y) metric.add(l.sum(), l.numel()) return metric[0] / metric[1]#损失值和样本数量
#we choose the first four dimensionals, namely 1, x, x^2/2, x^3/6 from polynomail features train(poly_features[:n_train, :4], poly_features[n_train:, :4], labels[:n_train], labels[n_train:]) #the dataset is divided into training part and testing part.
#we just choose first two dimensionals, 1 and x train(poly_features[:n_train, :2], poly_features[n_train:, :2], labels[:n_train], labels[n_train:]) #也就是用一次函数来训练训练集,测试的时候误差会很大。
weight: [[3.8133729 3.794563 ]]
5. high-order polynomail fitting(overfitting)
1 2 3 4
#we choose all dimensions from polynomail features. train(poly_features[:n_train, :], poly_features[n_train:, :], labels[:n_train], labels[n_train:], num_epochs = 1500) #复杂模型对训练损失降低,但是测试损失仍然很高,显然这是过拟合了。