pytorch中Gelu的实现:
近似计算1:快,但不精确
近似计算2:精确,但是不快
i = torch.randn(20)
c1 = nn.GELU()
c2 = nn.Sigmoid()
c3 = nn.Tanh()
print(c1(i))
print(i*c2(1.702*i))
print(0.5 * i * (1 + c3(i * 0.7978845608 * (1 + 0.044715 * i * i))))
print(i * 0.5 * (1.0 + torch.erf(i / math.sqrt(2.0))))
tensor([-0.1466, 0.2441, 1.7839, 1.9687, 0.4893, -0.1635, -0.1200, -0.0208,
-0.0149, -0.1698, -0.0576, -0.1675, 0.1781, 1.6642, 0.1036, -0.1044,
1.0159, 0.2654, 0.9210, 0.1539])
tensor([-0.1425, 0.2472, 1.7674, 1.9497, 0.4954, -0.1582, -0.1176, -0.0207,
-0.0342, -0.1635, -0.0745, -0.1615, 0.1801, 1.6499, 0.1045, -0.1116,
1.0172, 0.2689, 0.9241, 0.1555])
tensor([-0.1466, 0.2441, 1.7839, 1.9688, 0.4892, -0.1636, -0.1200, -0.0208,
-0.0144, -0.1699, -0.0576, -0.1676, 0.1781, 1.6641, 0.1036, -0.1046,
1.0157, 0.2654, 0.9208, 0.1539])
tensor([-0.1466, 0.2441, 1.7839, 1.9687, 0.4893, -0.1635, -0.1200, -0.0208,
-0.0149, -0.1698, -0.0576, -0.1675, 0.1781, 1.6642, 0.1036, -0.1044,
1.0159, 0.2654, 0.9210, 0.1539])