《深度学习之Pytorch实战计算机视觉》阅读笔记 第11章:自动编码器
1. 自动编码器
一种可以进行无监督学习的神经网络模型
- 自动编码器:1)用于核心特征提取的编码部分;2)实现数据重构的解码部分
- 简化模型:
最左侧用于数据输入的输入层,在输入数据通过神经网络的层层传递之后得到了中间的输入特征的核心特征。然后将输入数据的核心特征再传递到一个逆向的神经网络中,核心特征会被解压并重构,最后得到一个和输入数据相近的输出数据。 - 作用:实现输入数据的清洗,对数据的某些关键特征进行增强和放大
2. 去除图片马赛克
马赛克图片生成
# 添加马赛克
noisy_x_train=x_train+0.5*torch.randn(x_train.shape)
noisy_x_train=torch.clamp(noisy_x_train,0.,1.)
模型定义
1)线性变换
2)卷积变换class AutoEncoder_Linear(torch.nn.Module):
def __init__(self):
super(AutoEncoder_Linear, self).__init__()
self.encoder = torch.nn.Sequential(torch.nn.Linear(28 * 28, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 32),
torch.nn.ReLU())
self.decoder = torch.nn.Sequential(torch.nn.Linear(32, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 28 * 28))
def forward(self, input):
output = self.encoder(input)
output = self.decoder(output)
return output
# 卷积变换
class AutoEncoder_Conv(torch.nn.Module):
def __init__(self):
super(AutoEncoder_Conv, self).__init__()
self.encoder = torch.nn.Sequential(
torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(), torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(), torch.nn.MaxPool2d(kernel_size=2, stride=2))
self.decoder = torch.nn.Sequential(
torch.nn.Upsample(scale_factor=2, mode="nearest"),
torch.nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(), torch.nn.Upsample(scale_factor=2, mode="nearest"),
torch.nn.Conv2d(64, 1, kernel_size=3, stride=1, padding=1))
def forward(self, input):
output = self.encoder(input)
output = self.decoder(output)
return output
加载数据
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])])
dataset_train = datasets.MNIST(root="../data",
transform=transform,
train=True,
download=True)
dataset_test = datasets.MNIST(root="../data", transform=transform, train=False)
train_load = torch.utils.data.DataLoader(dataset=dataset_train,
batch_size=4,
shuffle=True)
test_load = torch.utils.data.DataLoader(dataset=dataset_test,
batch_size=4,
shuffle=True)
训练
epoch_n = 10
for epoch in range(epoch_n):
running_loss = 0.0
print("Epoch {}/{}".format(epoch, epoch_n))
print("-" * 20)
for data in train_load:
x_train, _ = data
# Linear
x_train, noisy_x_train = Variable(x_train.view(-1, 28 * 28)), Variable(
noisy_x_train.view(-1, 28 * 28))
# Conv
# x_train, noisy_x_train = Variable(x_train.cuda()), Variable(
# noisy_x_train.cuda())
x_train, noisy_x_train = Variable(x_train.view(-1, 28 * 28)), Variable(
noisy_x_train.view(-1, 28 * 28))
image_pre = model(noisy_x_train)
loss = loss_fun(image_pre, x_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
print("Loss is {:.4f}".format(running_loss / len(dataset_train)))
Test ```python
Test
data_loader_test = torch.utils.data.DataLoader(dataset=dataset_test,
batch_size=4,
shuffle=True)
xtest, = next(iter(data_loader_test)) print(x_test) img1 = torchvision.utils.make_grid((x_test)) img1 = img1.numpy().transpose(1, 2, 0) std = [0.5, 0.5, 0.5] mean = [0.5, 0.5, 0.5] img1 = img1 * std + mean
noisy_x_test = img1 + 0.5 np.random.randn(img1.shape) noisy_x_test = np.clip(noisy_x_test, 0., 1.)
plt.figure() plt.imshow(noisy_x_test)
img2 = x_test 0.5 torch.randn(*x_test.shape) img2 = torch.clamp(img2, 0., 1.)
img2 = Variable(img2.view(-1, 28 * 28))
test_pred = model(img2)
img_test = test_pred.data.view(-1, 1, 28, 28) img2 = torchvision.utils.make_grid(img_test) img2 = img2.numpy().transpose(1, 2, 0) img2 = img2 * std + mean img2 = np.clip(img2, 0., 1.) plt.figure() plt.imshow(img2) plt.show() ```