x = torch.tensor([1.0, 2, 4, 8]) y = torch.tensor([2, 2, 2, 2]) x + y, x - y, x * y, x / y, x ** y #The calculation will conduct using one element in the first tensor and corresponding element in the second.
a = torch.arange(3).reshape((3, 1)) b = torch.arange(2).reshape((1, 2)) a, b
(tensor([[0],
[1],
[2]]),
tensor([[0, 1]]))
1 2 3 4
a + b #Due to diffenent shape, both the two tensor would broadcast rows and columns respectively, #and become the same size, then complete addition. #a deplicate columns, b deplicate rows.
before = id(Y) Y = Y + X id(Y) == before #id shows the accurate address of the accessed object Y. #Y = Y + X, then id(Y) will point to a new location.
False
1 2 3 4 5 6
#but we don't want to use new memory every time. Z = torch.zeros_like(Y)# it has the same shape as Y, but elements all 0 print('id(Z):', id(Z)) Z[:] = X + Y; print('id(Z):', id(Z)) #That is we can use cutting into piece method to assign new results to previous Z
id(Z): 4460493216
id(Z): 4460493216
1 2 3 4
before = id(X) X += Y id(X) == before #we can also use X adding to itself, to save memory.
True
2.1.6 translating into other Python objects
1 2 3 4
A = X.numpy() B = torch.tensor(A) A, B, type(A), type(B) #torch tensor and numpy array could share their bottom memory.
a = torch.tensor([3.5]) a, a.item(), float(a), int(a)
(tensor([3.5000]), 3.5, 3.5, 3)
2.2 data preprocessing
2.2.1 reading datasets
1 2 3 4 5 6 7 8 9 10 11 12
import os
os.makedirs(os.path.join('.', 'data'), exist_ok=True)#if dir exists, no error will be displayed, if not, we will create it. data_file = os.path.join('.', 'data', 'house_tiny.csv') withopen(data_file, 'w') as f: f.write(''' NumRooms, RoofType, Price NA, NA, 127500 2, NA, 106000 4, Slate, 178100 NA, NA, 140000''') f.close()
1 2 3 4
import pandas as pd data = pd.read_csv(data_file) print(data) #extract information from table.
NumRooms RoofType Price
0 NA NA 127500
1 2 NA 106000
2 4 Slate 178100
3 NA NA 140000
2.2.2 data preparation
1 2 3 4 5 6 7
inputs, targets = data.iloc[:, 0:2], data.iloc[:, 2] #note: in this function iloc(), the inputs index we specify is 0th column and 1st column, different as previous. inputs = pd.get_dummies(inputs, dummy_na=True) print(inputs) #iloc is to choose specific rows and columns. #get_dummies is a encoding method called one-hot encoding, #which can regard NaN as a feature like previous features, creating 2-based columns.