对于昨天的代码使用代码自身的函数和模块进行进一步的精简
import torch.nn as nnimport timetime_start=time.time()N,D_in,H,D_out = 64,1000,100,10x=torch.randn(N,D_in,requires_grad=True)#default is Falsey=torch.randn(N,D_out,requires_grad=True)class TwoLayerNet(torch.nn.Module):def __init__(self,D_in,H,D_out):# define model architecturesuper(TwoLayerNet,self).__init__()self.linear1=torch.nn.Linear(D_in,H,bias=False)self.linear2=torch.nn.Linear(H,D_out,bias=False)def forward(self,x):y_pred =self.linear2(self.linear1(x). clamp(min=0))return y_pred#model=torch.nn.Sequential(# torch.nn.Linear(D_in,H),# torch.nn.ReLU(),# torch.nn.Linear(H,D_out),#)model=TwoLayerNet(D_in,H,D_out)learning_rate=1e-4#torch.nn.init.normal_(model[0].weight)#torch.nn.init.normal_(model[2].weight)optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)if torch.cuda.is_available():model=model.cuda()x=x.cuda()y=y.cuda()loss_func=nn.MSELoss(reduction='sum')for t in range(1000):# forward passy_pred =model(x).cuda()#compute loss and use square lossloss = loss_func(y_pred , y).cuda()loss_num=loss.item()print(t,loss_num)#backward pass# compute gradiendoptimizer.zero_grad()loss.backward()# Update weightsoptimizer.step()time_end=time.time()print('totally cost',time_end-time_start)
同时定义了一个简单的游戏,然后没有告知规则的情况下利用数据进行训练
经过测试训练100回合正确率61%,1000回合正确率94%,训练10000回合正确率98%
def fizz_buzz_encode(i):if i % 15 == 0:return 3elif i % 5 == 0:return 2elif i % 3 == 0:return 1else:return 0def fizz_buzz_decode(i,prediction):return [str(i), "fizz", "buzz", "fizzbuzz"][prediction]def helper(i):print(fizz_buzz_decode(i,fizz_buzz_encode(i)))import numpy as npimport torchNUM_DIGITS = 10# Represent each input by an array of its binary digits.def binary_encode(i, num_digits):return np.array([i >> d & 1 for d in range(num_digits)][::-1])trX = torch.Tensor([binary_encode(i, NUM_DIGITS) for i in range(101, 2 ** NUM_DIGITS)])trY = torch.LongTensor([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DIGITS)])# Define the modelNUM_HIDDEN = 100model = torch.nn.Sequential(torch.nn.Linear(NUM_DIGITS, NUM_HIDDEN),torch.nn.ReLU(),torch.nn.Linear(NUM_HIDDEN, 4))loss_fn = torch.nn.CrossEntropyLoss()optimizer = torch.optim.SGD(model.parameters(), lr = 0.5)# Start training itBATCH_SIZE = 128if torch.cuda.is_available():model=model.cuda()trX=trX.cuda()trY=trY.cuda()for epoch in range(10000):for start in range(0, len(trX), BATCH_SIZE):end = start + BATCH_SIZEbatchX = trX[start:end]batchY = trY[start:end]batchX=batchX.cuda()batchY=batchY.cuda()y_pred = model(batchX)y_pred=y_pred.cuda()loss = loss_fn(y_pred, batchY)optimizer.zero_grad()loss.backward()optimizer.step()# Find loss on training dataloss = loss_fn(model(trX), trY).item()print('Epoch:', epoch, 'Loss:', loss)# Output nowtestX = torch.Tensor([binary_encode(i, NUM_DIGITS) for i in range(1, 101)])testX=testX.cuda()with torch.no_grad():testY = model(testX)predictions = zip(range(1, 101), list(testY.max(1)[1].data.tolist()))print([fizz_buzz_decode(i, x) for (i, x) in predictions])print(np.sum(testY.cpu().max(1)[1].numpy() == np.array([fizz_buzz_encode(i) for i in range(1,101)])))testY.cpu().max(1)[1].numpy() == np.array([fizz_buzz_encode(i) for i in range(1,101)])
