1.定义网络net(X)
2.定义损失函数l=loss(net(X),Y)
数据集(X,Y)
def squared_loss(y_hat, y): #@save
"""均方损失。"""
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
3.定义优化算法
def sgd(params, lr, batch_size): #@save
"""小批量随机梯度下降。"""
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
4.训练
- 初始化参数
- 重复,直到完成
- 计算梯度
- 更新参数
- 计算梯度
```python lr = 0.03 num_epochs = 3 net = linreg loss = squared_loss
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y) # X
和y
的小批量损失
# 因为`l`形状是(`batch_size`, 1),而不是一个标量。`l`中的所有元素被加到一起,
# 并以此计算关于[`w`, `b`]的梯度
l.sum().backward()
sgd([w, b], lr, batch_size) # 使用参数的梯度更新参数
with torch.no_grad():
train_l = loss(net(features, w, b), labels)
print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
<a name="QEsQU"></a>
## 简洁实现
定义网络
```python
# `nn` 是神经网络的缩写
from torch import nn
net = nn.Sequential(nn.Linear(2, 1))
初始化参数
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
定义损失函数
loss = nn.MSELoss()
3.定义优化函数
trainer = torch.optim.SGD(net.parameters(), lr=0.03)
4.训练
num_epochs = 3
for epoch in range(num_epochs):
for X, y in data_iter:
l = loss(net(X) ,y)
"""梯度清零"""
trainer.zero_grad()
"""计算梯度"""
l.backward()
"""执行优化步骤"""
trainer.step()
l = loss(net(features), labels)
print(f'epoch {epoch + 1}, loss {l:f}')