内容可见注释

    1. #simple note in learning pytorch
    2. import torch
    3. import numpy
    4. '''
    5. # create tensor
    6. x = torch.empty(5,3)
    7. y = torch.rand(5,3)
    8. z = torch.zeros(5,3)
    9. print(y)
    10. print(x)
    11. print(z)
    12. print (x.dtype)
    13. print(z.dtype)
    14. a=torch.tensor([5,5,3])
    15. print(a)
    16. b=torch.tensor([6,6,8])
    17. print(b)
    18. #basic operation
    19. b=a+b
    20. print(b)
    21. b=torch.add(a,b)
    22. print(b)
    23. #special in place
    24. b.add_(a)
    25. print(b)
    26. #resize
    27. test=torch.rand(4,2)
    28. print(test)
    29. test=test.view(8)
    30. print(test)
    31. test=torch.rand(1)
    32. print(test)
    33. test2=test.item()
    34. print(test2)
    35. # transition between torch and numpy
    36. # they share the data
    37. a=torch.ones(5)
    38. print (a)
    39. b=a.numpy()
    40. print(b)
    41. b[1]=2
    42. print(a)
    43. print(b)
    44. a =numpy.ones(5)
    45. b=torch.from_numpy(a)
    46. print(a)
    47. print(b)
    48. # if you have a GPU you can .....
    49. if torch.cuda.is_available():
    50. device=torch.device("cuda")
    51. print("yes I have")
    52. y=torch.ones_like(b,device=device)
    53. b=b.to(device)
    54. #warming up
    55. # try to build a simple two layer neural net (using numpy)
    56. N,D_in,H,D_out = 64,1000,100,10
    57. x=numpy.random.randn(N,D_in)
    58. y=numpy.random.randn(N,D_out)
    59. w1=numpy.random.randn(D_in,H)
    60. w2=numpy.random.randn(H,D_out)
    61. learning_rate=1e-6
    62. for t in range(500):
    63. # forward pass
    64. h=x.dot(w1)
    65. h_Relu=numpy.maximum(h,0)
    66. y_pred=h_Relu.dot(w2)
    67. #compute loss and use square loss
    68. loss = numpy.square(y_pred-y).sum()
    69. print(t,loss)
    70. #backward pass
    71. # 1. compute gradiend
    72. grad_y_pred=2.0*(y_pred-y)
    73. grad_w2 =h_Relu.T.dot(grad_y_pred)
    74. grad_h_relu=grad_y_pred.dot(w2.T)
    75. grad_h = grad_h_relu.copy()
    76. grad_h[h<0]=0
    77. grad_w1 = x.T.dot(grad_h)
    78. # Update weights
    79. w1 -= learning_rate * grad_w1
    80. w2 -= learning_rate * grad_w2
    81. #now try to use torch to do the job
    82. N,D_in,H,D_out = 64,1000,100,10
    83. x=torch.randn(N,D_in)
    84. y=torch.randn(N,D_out)
    85. w1=torch.randn(D_in,H)
    86. w2=torch.randn(H,D_out)
    87. learning_rate=1e-6
    88. for t in range(500):
    89. # forward pass
    90. h=x.mm(w1)
    91. h_Relu=h.clamp(min=0)
    92. y_pred=h_Relu.mm(w2)
    93. #compute loss and use square loss
    94. loss = (y_pred-y).pow(2).sum().item()
    95. print(t,loss)
    96. #backward pass
    97. # 1. compute gradiend
    98. grad_y_pred=2.0*(y_pred-y)
    99. grad_w2 =h_Relu.t().mm(grad_y_pred)
    100. grad_h_relu=grad_y_pred.mm(w2.t())
    101. grad_h = grad_h_relu.clone()
    102. grad_h[h<0]=0
    103. grad_w1 = x.T.mm(grad_h)
    104. # Update weights
    105. w1 -= learning_rate * grad_w1
    106. w2 -= learning_rate * grad_w2
    107. #final version by using auto_backward
    108. N,D_in,H,D_out = 64,1000,100,10
    109. x=torch.randn(N,D_in,requires_grad=True)#default is False
    110. y=torch.randn(N,D_out,requires_grad=True)
    111. w1=torch.randn(D_in,H,requires_grad=True)
    112. w2=torch.randn(H,D_out,requires_grad=True)
    113. learning_rate=1e-6
    114. for t in range(500):
    115. # forward pass
    116. y_pred =x.mm(w1).clamp(min=0).mm(w2)
    117. #compute loss and use square loss
    118. loss = (y_pred-y).pow(2).sum()
    119. #
    120. loss_num=loss.item()
    121. print(t, loss_num)
    122. #backward pass
    123. # compute gradiend
    124. loss.backward()
    125. # Update weights
    126. with torch.no_grad():
    127. w1 -= learning_rate * w1.grad
    128. w2 -= learning_rate * w2.grad
    129. w1.grad.zero_()
    130. w2.grad.zero_()
    131. '''
    132. import torch.nn as nn
    133. import time
    134. time_start=time.time()
    135. N,D_in,H,D_out = 64,1000,100,10
    136. x=torch.randn(N,D_in,requires_grad=True)#default is False
    137. y=torch.randn(N,D_out,requires_grad=True)
    138. model=torch.nn.Sequential(
    139. torch.nn.Linear(D_in,H),
    140. torch.nn.ReLU(),
    141. torch.nn.Linear(H,D_out),
    142. )
    143. if torch.cuda.is_available():
    144. model=model.cuda()
    145. x=x.cuda()
    146. y=y.cuda()
    147. loss_func=nn.MSELoss(reduction='sum')
    148. learning_rate=1e-6
    149. for t in range(20000):
    150. # forward pass
    151. y_pred =model(x)
    152. #compute loss and use square loss
    153. loss = loss_func(y_pred , y)
    154. #loss_num=loss.item()
    155. print(t)
    156. #backward pass
    157. # compute gradiend
    158. loss.backward()
    159. with torch.no_grad():
    160. for param in model.parameters():
    161. param-=learning_rate*param.grad
    162. model.zero_grad()
    163. # Update weights
    164. time_end=time.time()
    165. print('totally cost',time_end-time_start)

    最后分别使用GPU和CPU运行尝试
    image.png

    image.png

    从而有了若干问题

    1. 使用CPU运行时间只有GPU一半,,反而更快
    2. 使用了模型进行统一化之后,反而收敛效果更差,loss值降不下来