代码地址:https://github.com/Fanziapril/mvfnet

环境配置

  1. conda create -n mvfnet python=2.7
  2. 安装 pytorch

要求安装pytorch 0.4.0,conda install pytorch=0.4.0 cuda92 -c pytorch,在python中import torch,报错ImportError: libcudart.so.9.0: cannot open shared object file: No such file or directory
改成安装pytorch 1.0.0, conda install pytorch==1.0.0 torchvision==0.2.1 cuda100 -c pytorch。不行,后面运行时会报错cannot import name download_url_to_file,版本太低。要安装 pytorch 1.2.0 之后的版本。

  1. conda install -c 1adrianb face_alignment
  2. Model_shape.mat and Model_Expression.mat from 3DDFA.
    1. Model_shape.mat要用代码生成。在3DDFA_Release\Matlab\ModelGeneration路径下,下载BFM模型中的01_MorphableModel.mat,运行ModelGenerate.m
  3. download the CNN model from here.

test

python test_img.py --image_path ./data/imgs --save_dir ./result

报错与解决

1. No module named parse

image.png
修改报错行代码为:

  1. try:
  2. from urllib.parse import urlparse
  3. except ImportError:
  4. from urlparse import urlparse

2. cannot import name download_url_to_file

image.png
The version of pytorch may be too low. Before 1.2.0 there is no download_url_to_file in torch.hub. Please upgrade pytorch.

输入输出

输入:正视、左、右视角各一张。
front.jpgleft.jpgright.jpg
输出:一个shape.ply文件。展示:
image.pngimage.png

代码解读

读取输入的3张图片,

  1. imgA = Image.open(os.path.join(options.image_path, 'front.jpg')).convert('RGB')
  2. imgB = Image.open(os.path.join(options.image_path, 'left.jpg')).convert('RGB')
  3. imgC = Image.open(os.path.join(options.image_path, 'right.jpg')).convert('RGB')
  4. imgA = transforms.functional.to_tensor(imgA)
  5. imgB = transforms.functional.to_tensor(imgB)
  6. imgC = transforms.functional.to_tensor(imgC)
  7. input_tensor = torch.cat([imgA, imgB, imgC], 0).view(1, 9, 224, 224).cuda()

通过CNN模型,获得预测的人脸参数

  1. model = VggEncoder()
  2. model = torch.nn.DataParallel(model).cuda()
  3. ckpt = torch.load('data/net.pth')
  4. model.load_state_dict(ckpt)
  5. preds = model(input_tensor)

模型细节:

  1. class VggEncoder(nn.Module):
  2. def __init__(self):
  3. super(VggEncoder, self).__init__()
  4. self.featChannel = 512
  5. self.layer1 = tvmodel.vgg16_bn(pretrained=True).features
  6. self.layer1 = nn.Sequential(OrderedDict([
  7. ('conv1', nn.Conv2d(3, 64, (3, 3), (1, 1), (1, 1))),
  8. ('bn1', nn.BatchNorm2d(64)),
  9. ('relu1', nn.ReLU(True)),
  10. ('pool1', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
  11. ('conv2', nn.Conv2d(64, 128, (3, 3), (1, 1), (1, 1))),
  12. ('bn2', nn.BatchNorm2d(128)),
  13. ('relu2', nn.ReLU(True)),
  14. ('pool2', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
  15. ('conv3', nn.Conv2d(128, 256, (3, 3), (1, 1), (1, 1))),
  16. ('bn3', nn.BatchNorm2d(256)),
  17. ('relu3', nn.ReLU(True)),
  18. ('conv4', nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1))),
  19. ('bn4', nn.BatchNorm2d(256)),
  20. ('relu4', nn.ReLU(True)),
  21. ('pool3', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
  22. ('conv5', nn.Conv2d(256, 512, (3, 3), (1, 1), 1)),
  23. ('bn5', nn.BatchNorm2d(512)),
  24. ('relu5', nn.ReLU(True)),
  25. ('pool4', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
  26. ('conv6', nn.Conv2d(512, 512, (3, 3), stride=1, padding=1)),
  27. ('bn6', nn.BatchNorm2d(512)),
  28. ('relu6', nn.ReLU(True)),
  29. ('conv7', nn.Conv2d(512, 512, (3, 3), (1, 1), 1)),
  30. ('bn7', nn.BatchNorm2d(512)),
  31. ('relu7', nn.ReLU(True)),
  32. ('pool5', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
  33. ]))
  34. self.fc_3dmm = nn.Sequential(OrderedDict([
  35. ('fc1', nn.Linear(self.featChannel*3, 256*3)),
  36. ('relu1', nn.ReLU(True)),
  37. ('fc2', nn.Linear(256*3, 228))]))
  38. self.fc_pose = nn.Sequential(OrderedDict([
  39. ('fc3', nn.Linear(512, 256)),
  40. ('relu2', nn.ReLU(True)),
  41. ('fc4', nn.Linear(256, 7))]))
  42. reset_params(self.fc_3dmm)
  43. reset_params(self.fc_pose)
  44. def forward(self, x):
  45. imga = x[:, 0:3, :, :]
  46. feata = self.layer1(imga)
  47. feata = F.avg_pool2d(feata, feata.size()[2:]).view(feata.size(0), feata.size(1))
  48. posea = self.fc_pose(feata)
  49. imgb = x[:, 3:6, :, :]
  50. featb = self.layer1(imgb)
  51. featb = F.avg_pool2d(featb, featb.size()[2:]).view(featb.size(0), featb.size(1))
  52. poseb = self.fc_pose(featb)
  53. imgc = x[:, 6:9, :, :]
  54. featc = self.layer1(imgc)
  55. featc = F.avg_pool2d(featc, featc.size()[2:]).view(featc.size(0), featc.size(1))
  56. posec = self.fc_pose(featc)
  57. para = self.fc_3dmm(torch.cat([feata, featb, featc], dim=1))
  58. out = torch.cat([para, posea, poseb, posec], dim=1)
  59. return out
  60. def reset_params(net):
  61. for m in net.modules():
  62. if isinstance(m, nn.Conv2d):
  63. nn.init.normal(m.weight, 0.0, 0.02)
  64. if m.bias is not None:
  65. nn.init.constant(m.bias, 0)
  66. elif isinstance(m, nn.Linear):
  67. nn.init.normal(m.weight, 0.0, 0.0001)
  68. if m.bias is not None:
  69. nn.init.constant(m.bias, 0)
  70. elif isinstance(m, nn.BatchNorm2d):
  71. nn.init.constant(m.weight, 1)
  72. nn.init.normal(m.weight, 1.0, 0.02)
  73. nn.init.constant(m.bias, 0)

从预测的参数中恢复3D人脸,

  1. faces3d = tools.preds_to_shape(preds[0].detach().cpu().numpy())

提取出人脸参数 alpha, beta, R, t, s,计算 face_shape,以及3个view的 model_shape,

  1. def preds_to_shape(preds):
  2. # paras = torch.mul(preds[:228, :], label_std[:199+29, :])
  3. alpha = np.reshape(preds[:199], [199,1]) * np.reshape(model_shape['sigma'], [199,1])
  4. beta = np.reshape(preds[199:228], [29, 1]) * 1.0/(1000.0 * np.reshape(data['sigma_exp'], [29, 1]))
  5. face_shape = np.matmul(model_shape['w'], alpha) + np.matmul(model_exp['w_exp'], beta) + model_shape['mu_shape']
  6. face_shape = face_shape.reshape(-1, 3)
  7. R, t, s = preds_to_pose(preds[228:228+7])
  8. kptA = np.matmul(face_shape[kpt_index], s*R[:2].transpose()) + np.repeat(np.reshape(t,[1,2]), 68, axis=0)
  9. kptA[:, 1] = 224 - kptA[:, 1]
  10. R, t, s = preds_to_pose(preds[228+7:228+14])
  11. kptB = np.matmul(face_shape[kpt_index], s*R[:2].transpose()) + np.repeat(np.reshape(t,[1,2]), 68, axis=0)
  12. kptB[:, 1] = 224 - kptB[:, 1]
  13. R, t, s = preds_to_pose(preds[228+14:])
  14. kptC = np.matmul(face_shape[kpt_index], s*R[:2].transpose()) + np.repeat(np.reshape(t,[1,2]), 68, axis=0)
  15. kptC[:, 1] = 224 - kptC[:, 1]
  16. return [face_shape, model_shape['tri'].astype(np.int64).transpose() - 1, kptA, kptB, kptC]
  17. def preds_to_pose(preds):
  18. pose = preds * pose_std + pose_mean
  19. R = angle_to_rotation(pose[:3])
  20. t2d = pose[3:5]
  21. s = pose[6]
  22. return R, t2d, s

最后,将人脸写入mesh文件。

  1. tools.write_ply(os.path.join(options.save_dir, 'shape.ply'), faces3d[0], faces3d[1])
  1. def write_ply(filename, points=None, mesh=None, colors=None, as_text=True):
  2. points = pd.DataFrame(points, columns=["x", "y", "z"])
  3. mesh = pd.DataFrame(mesh, columns=["v1", "v2", "v3"])
  4. if colors is not None:
  5. colors = pd.DataFrame(colors, columns=["red", "green", "blue"])
  6. points = pd.concat([points, colors], axis=1)
  7. if not filename.endswith('ply'):
  8. filename += '.ply'
  9. # open in text mode to write the header
  10. with open(filename, 'w') as ply:
  11. header = ['ply']
  12. if as_text:
  13. header.append('format ascii 1.0')
  14. else:
  15. header.append('format binary_' + sys.byteorder + '_endian 1.0')
  16. if points is not None:
  17. header.extend(describe_element('vertex', points))
  18. if mesh is not None:
  19. mesh = mesh.copy()
  20. mesh.insert(loc=0, column="n_points", value=3)
  21. mesh["n_points"] = mesh["n_points"].astype("u1")
  22. header.extend(describe_element('face', mesh))
  23. header.append('end_header')
  24. for line in header:
  25. ply.write("%s\n" % line)
  26. if as_text:
  27. if points is not None:
  28. points.to_csv(filename, sep=" ", index=False, header=False, mode='a',
  29. encoding='ascii')
  30. if mesh is not None:
  31. mesh.to_csv(filename, sep=" ", index=False, header=False, mode='a',
  32. encoding='ascii')
  33. else:
  34. # open in binary/append to use tofile
  35. with open(filename, 'ab') as ply:
  36. if points is not None:
  37. points.to_records(index=False).tofile(ply)
  38. if mesh is not None:
  39. mesh.to_records(index=False).tofile(ply)
  40. return True

ply文件预览:

  1. ply
  2. format ascii 1.0
  3. element vertex 53215
  4. property float x
  5. property float y
  6. property float z
  7. element face 105840
  8. property list uchar int vertex_indices
  9. end_header
  10. -57861.697444928475 41402.07863996125 80754.95994012126
  11. -57825.751646592085 41156.33884778988 80756.22177179152
  12. -57787.105240666366 40912.49501439184 80753.9681079146
  13. -57752.693245361734 40606.47652473882 80702.31823879934
  14. -57714.708572422285 40303.915314277816 80648.21141466901
  15. -57665.74191753105 39946.24824403958 80564.4580453337
  16. -57614.64365355126 39588.81458608739 80479.47785714144
  17. -57566.487798421236 39210.266665891104 80364.30959778109
  18. -57502.62117264804 38830.57059010113 80246.91493798506
  19. -57432.458661227414 38409.00673801282 80117.38173290923
  20. -57353.70860100195 37990.52765622997 79981.96981050582
  21. -57262.74744467564 37559.03210288704 79831.46206180744
  22. -57166.9358116317 37130.19943151452 79677.16034162804
  23. ...

用mesh方式打开:
image.png

多视角视频人脸重建

模型加载时间:3.69
图片加载、裁剪等图片处理的时间:8.69
使用模型预测的时间:0.65
19cb7f7b0984c2e44ff5fc3bc1c6bb1.jpg
0b6d3819b63950607dd71d881a34474.jpg

7bf3ade6eae7c0cc7141fc3fd3c6478.jpg
6cd96073bcc14765417e9ee83ed08ac.jpg