image.png8.4 循环神经网络 - 图2

实现

读取一个小说

  1. %matplotlib inline
  2. import math
  3. import torch
  4. from torch import nn
  5. from torch.nn import functional as F
  6. from d2l import torch as d2l
  7. batch_size, num_steps = 32, 35
  8. train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

独热编码 词典长度为28
返回0,和2 的独热编码

  1. F.one_hot(torch.tensor([0, 2]), len(vocab))
  1. tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  2. 0, 0, 0, 0],
  3. [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4. 0, 0, 0, 0]])
  1. for X, Y in train_iter:
  2. print('X: ', X, '\nY:', Y)
  1. X: tensor([[13, 2, 1, ..., 1, 3, 10],
  2. [ 1, 3, 9, ..., 8, 2, 11],
  3. [ 1, 12, 2, ..., 4, 25, 5],
  4. ...,
  5. [ 6, 26, 14, ..., 3, 21, 2],
  6. [ 3, 4, 12, ..., 1, 21, 2],
  7. [ 9, 4, 6, ..., 11, 1, 8]])
  8. Y: tensor([[ 2, 1, 13, ..., 3, 10, 4],
  9. [ 3, 9, 2, ..., 2, 11, 1],
  10. [12, 2, 4, ..., 25, 5, 12],
  11. ...,
  12. [26, 14, 10, ..., 21, 2, 16],
  13. [ 4, 12, 12, ..., 21, 2, 1],
  14. [ 4, 6, 5, ..., 1, 8, 4]])
  15. X: tensor([[ 4, 22, 2, ..., 5, 2, 6],
  16. [ 1, 5, 6, ..., 1, 9, 5],
  17. [12, 19, 4, ..., 9, 5, 8],
  18. ...,
  19. [16, 7, 10, ..., 1, 3, 9],
  20. [ 1, 2, 24, ..., 6, 12, 2],
  21. [ 4, 3, 1, ..., 1, 3, 9]])
  22. Y: tensor([[22, 2, 12, ..., 2, 6, 3],
  23. [ 5, 6, 1, ..., 9, 5, 8],
  24. [19, 4, 11, ..., 5, 8, 1],
  25. ...,
  26. [ 7, 10, 2, ..., 3, 9, 2],
  27. [ 2, 24, 20, ..., 12, 2, 8],
  28. [ 3, 1, 11, ..., 3, 9, 2]])
  29. X: tensor([[ 3, 1, 3, ..., 4, 1, 10],
  30. [ 8, 1, 20, ..., 8, 1, 10],
  31. [ 1, 6, 2, ..., 1, 9, 5],
  32. ...,
  33. [ 2, 1, 3, ..., 12, 21, 19],
  34. [ 8, 8, 1, ..., 20, 3, 2],
  35. [ 2, 3, 4, ..., 9, 2, 1]])
  36. Y: tensor([[ 1, 3, 7, ..., 1, 10, 2],
  37. [ 1, 20, 4, ..., 1, 10, 4],
  38. [ 6, 2, 17, ..., 9, 5, 8],
  39. ...,
  40. [ 1, 3, 5, ..., 21, 19, 1],
  41. [ 8, 1, 9, ..., 3, 2, 11],
  42. [ 3, 4, 21, ..., 2, 1, 21]])
  43. X: tensor([[ 2, 15, 7, ..., 19, 2, 8],
  44. [ 4, 3, 9, ..., 20, 7, 6],
  45. [ 8, 1, 16, ..., 10, 2, 16],
  46. ...,
  47. [ 1, 8, 1, ..., 9, 2, 1],
  48. [11, 1, 5, ..., 3, 9, 5],
  49. [21, 10, 5, ..., 9, 2, 1]])
  50. Y: tensor([[15, 7, 6, ..., 2, 8, 1],
  51. [ 3, 9, 2, ..., 7, 6, 1],
  52. [ 1, 16, 2, ..., 2, 16, 14],
  53. ...,
  54. [ 8, 1, 4, ..., 2, 1, 3],
  55. [ 1, 5, 8, ..., 9, 5, 6],
  56. [10, 5, 18, ..., 2, 1, 13]])
  57. X: tensor([[ 1, 8, 9, ..., 12, 19, 1],
  58. [ 1, 4, 6, ..., 2, 10, 1],
  59. [14, 12, 12, ..., 7, 6, 2],
  60. ...,
  61. [ 3, 5, 13, ..., 17, 4, 8],
  62. [ 6, 18, 1, ..., 18, 7, 6],
  63. [13, 7, 11, ..., 11, 7, 25]])
  64. Y: tensor([[ 8, 9, 7, ..., 19, 1, 20],
  65. [ 4, 6, 11, ..., 10, 1, 11],
  66. [12, 12, 19, ..., 6, 2, 1],
  67. ...,
  68. [ 5, 13, 2, ..., 4, 8, 1],
  69. [18, 1, 9, ..., 7, 6, 4],
  70. [ 7, 11, 2, ..., 7, 25, 2]])
  71. X: tensor([[20, 4, 12, ..., 2, 11, 1],
  72. [11, 5, 6, ..., 7, 4, 13],
  73. [ 1, 7, 10, ..., 22, 2, 10],
  74. ...,
  75. [ 1, 4, 1, ..., 8, 15, 4],
  76. [ 4, 12, 1, ..., 14, 3, 1],
  77. [ 2, 6, 1, ..., 11, 12, 2]])
  78. Y: tensor([[ 4, 12, 2, ..., 11, 1, 3],
  79. [ 5, 6, 6, ..., 4, 13, 8],
  80. [ 7, 10, 1, ..., 2, 10, 8],
  81. ...,
  82. [ 4, 1, 18, ..., 15, 4, 10],
  83. [12, 1, 3, ..., 3, 1, 3],
  84. [ 6, 1, 15, ..., 12, 2, 8]])
  85. X: tensor([[ 3, 9, 2, ..., 8, 7, 16],
  86. [ 8, 1, 18, ..., 1, 7, 16],
  87. [ 8, 4, 12, ..., 6, 8, 3],
  88. ...,
  89. [10, 15, 2, ..., 4, 6, 11],
  90. [ 3, 9, 2, ..., 3, 9, 2],
  91. [ 8, 3, 5, ..., 4, 12, 1]])
  92. Y: tensor([[ 9, 2, 16, ..., 7, 16, 3],
  93. [ 1, 18, 10, ..., 7, 16, 1],
  94. [ 4, 12, 12, ..., 8, 3, 4],
  95. ...,
  96. [15, 2, 12, ..., 6, 11, 1],
  97. [ 9, 2, 1, ..., 9, 2, 1],
  98. [ 3, 5, 15, ..., 12, 1, 5]])
  99. X: tensor([[ 3, 1, 10, ..., 18, 9, 3],
  100. [ 1, 20, 10, ..., 6, 1, 3],
  101. [ 4, 6, 15, ..., 8, 1, 16],
  102. ...,
  103. [ 1, 22, 2, ..., 22, 7, 10],
  104. [ 1, 16, 5, ..., 10, 14, 18],
  105. [ 5, 6, 1, ..., 10, 5, 12]])
  106. Y: tensor([[ 1, 10, 4, ..., 9, 3, 8],
  107. [20, 10, 2, ..., 1, 3, 9],
  108. [ 6, 15, 2, ..., 1, 16, 7],
  109. ...,
  110. [22, 2, 10, ..., 7, 10, 19],
  111. [16, 5, 10, ..., 14, 18, 1],
  112. [ 6, 1, 8, ..., 5, 12, 12]])
print(list(vocab.token_to_idx.items())[:])
[('<unk>', 0), (' ', 1), ('e', 2), ('t', 3), ('a', 4), ('i', 5), ('n', 6), ('o', 7), ('s', 8), ('h', 9), ('r', 10), ('d', 11), ('l', 12), ('m', 13), ('u', 14), ('c', 15), ('f', 16), ('w', 17), ('g', 18), ('y', 19), ('p', 20), ('b', 21), ('v', 22), ('k', 23), ('x', 24), ('z', 25), ('j', 26), ('q', 27)]
X = torch.arange(10).reshape((2, 5))
F.one_hot(X.T, 28).shape
torch.Size([5, 2, 28])
F.one_hot(X.T, 28)
tensor([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]]])

torch.randn(*sizes, out=None) → Tensor
返回一个张量,包含了从标准正态分布(均值为0,方差为1,即高斯白噪声)中抽取的一组随机数。张量的形状由参数sizes定义。

def get_params(vocab_size, num_hiddens, device):
    num_inputs = num_outputs = vocab_size

    def normal(shape):
        """均值为0方差为0.01"""
        return torch.randn(size=shape, device=device) * 0.01

    # 隐藏层参数
    W_xh = normal((num_inputs, num_hiddens))
    W_hh = normal((num_hiddens, num_hiddens))
    b_h = torch.zeros(num_hiddens, device=device)
    # 输出层参数
    W_hq = normal((num_hiddens, num_outputs))
    b_q = torch.zeros(num_outputs, device=device)
    # 附加梯度
    params = [W_xh, W_hh, b_h, W_hq, b_q]
    for param in params:
        param.requires_grad_(True)
    return params

8.4 循环神经网络 - 图3

def init_rnn_state(batch_size, num_hiddens, device):
    return (torch.zeros((batch_size, num_hiddens), device=device), )
def rnn(inputs, state, params):
    # `inputs`的形状:(`时间步数量`,`批量大小`,`词表大小`)
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    # `X`的形状:(`批量大小`,`词表大小`)
    for X in inputs:
        H = torch.tanh(torch.mm(X, W_xh) + torch.mm(H, W_hh) + b_h)
        Y = torch.mm(H, W_hq) + b_q
        outputs.append(Y)
    return torch.cat(outputs, dim=0), (H,)

定义了所有需要的函数之后,接下来我们创建一个类来包装这些函数,并存储从零开始实现的循环神经网络模型的参数。

class RNNModelScratch: 
    """从零开始实现的循环神经网络模型"""
    def __init__(self, vocab_size, num_hiddens, device,
                 get_params, init_state, forward_fn):
        self.vocab_size, self.num_hiddens = vocab_size, num_hiddens
        self.params = get_params(vocab_size, num_hiddens, device)
        self.init_state, self.forward_fn = init_state, forward_fn

    def __call__(self, X, state):
        X = F.one_hot(X.T, self.vocab_size).type(torch.float32)
        return self.forward_fn(X, state, self.params)

    def begin_state(self, batch_size, device):
        return self.init_state(batch_size, self.num_hiddens, device)