代码地址:https://github.com/Fanziapril/mvfnet
环境配置
conda create -n mvfnet python=2.7
- 安装 pytorch
要求安装pytorch 0.4.0,conda install pytorch=0.4.0 cuda92 -c pytorch
,在python中import torch,报错ImportError: libcudart.so.9.0: cannot open shared object file: No such file or directory
。
改成安装pytorch 1.0.0, conda install pytorch==1.0.0 torchvision==0.2.1 cuda100 -c pytorch
。不行,后面运行时会报错cannot import name download_url_to_file
,版本太低。要安装 pytorch 1.2.0 之后的版本。
conda install -c 1adrianb face_alignment
Model_shape.mat
andModel_Expression.mat
from 3DDFA.Model_shape.mat
要用代码生成。在3DDFA_Release\Matlab\ModelGeneration
路径下,下载BFM模型中的01_MorphableModel.mat
,运行ModelGenerate.m
。
- download the CNN model from here.
test
python test_img.py --image_path ./data/imgs --save_dir ./result
报错与解决
1. No module named parse
修改报错行代码为:
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
2. cannot import name download_url_to_file
The version of pytorch may be too low. Before 1.2.0 there is no download_url_to_file in torch.hub. Please upgrade pytorch.
输入输出
输入:正视、左、右视角各一张。
输出:一个shape.ply
文件。展示:
代码解读
读取输入的3张图片,
imgA = Image.open(os.path.join(options.image_path, 'front.jpg')).convert('RGB')
imgB = Image.open(os.path.join(options.image_path, 'left.jpg')).convert('RGB')
imgC = Image.open(os.path.join(options.image_path, 'right.jpg')).convert('RGB')
imgA = transforms.functional.to_tensor(imgA)
imgB = transforms.functional.to_tensor(imgB)
imgC = transforms.functional.to_tensor(imgC)
input_tensor = torch.cat([imgA, imgB, imgC], 0).view(1, 9, 224, 224).cuda()
通过CNN模型,获得预测的人脸参数
model = VggEncoder()
model = torch.nn.DataParallel(model).cuda()
ckpt = torch.load('data/net.pth')
model.load_state_dict(ckpt)
preds = model(input_tensor)
模型细节:
class VggEncoder(nn.Module):
def __init__(self):
super(VggEncoder, self).__init__()
self.featChannel = 512
self.layer1 = tvmodel.vgg16_bn(pretrained=True).features
self.layer1 = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(3, 64, (3, 3), (1, 1), (1, 1))),
('bn1', nn.BatchNorm2d(64)),
('relu1', nn.ReLU(True)),
('pool1', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
('conv2', nn.Conv2d(64, 128, (3, 3), (1, 1), (1, 1))),
('bn2', nn.BatchNorm2d(128)),
('relu2', nn.ReLU(True)),
('pool2', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
('conv3', nn.Conv2d(128, 256, (3, 3), (1, 1), (1, 1))),
('bn3', nn.BatchNorm2d(256)),
('relu3', nn.ReLU(True)),
('conv4', nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1))),
('bn4', nn.BatchNorm2d(256)),
('relu4', nn.ReLU(True)),
('pool3', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
('conv5', nn.Conv2d(256, 512, (3, 3), (1, 1), 1)),
('bn5', nn.BatchNorm2d(512)),
('relu5', nn.ReLU(True)),
('pool4', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
('conv6', nn.Conv2d(512, 512, (3, 3), stride=1, padding=1)),
('bn6', nn.BatchNorm2d(512)),
('relu6', nn.ReLU(True)),
('conv7', nn.Conv2d(512, 512, (3, 3), (1, 1), 1)),
('bn7', nn.BatchNorm2d(512)),
('relu7', nn.ReLU(True)),
('pool5', nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True)),
]))
self.fc_3dmm = nn.Sequential(OrderedDict([
('fc1', nn.Linear(self.featChannel*3, 256*3)),
('relu1', nn.ReLU(True)),
('fc2', nn.Linear(256*3, 228))]))
self.fc_pose = nn.Sequential(OrderedDict([
('fc3', nn.Linear(512, 256)),
('relu2', nn.ReLU(True)),
('fc4', nn.Linear(256, 7))]))
reset_params(self.fc_3dmm)
reset_params(self.fc_pose)
def forward(self, x):
imga = x[:, 0:3, :, :]
feata = self.layer1(imga)
feata = F.avg_pool2d(feata, feata.size()[2:]).view(feata.size(0), feata.size(1))
posea = self.fc_pose(feata)
imgb = x[:, 3:6, :, :]
featb = self.layer1(imgb)
featb = F.avg_pool2d(featb, featb.size()[2:]).view(featb.size(0), featb.size(1))
poseb = self.fc_pose(featb)
imgc = x[:, 6:9, :, :]
featc = self.layer1(imgc)
featc = F.avg_pool2d(featc, featc.size()[2:]).view(featc.size(0), featc.size(1))
posec = self.fc_pose(featc)
para = self.fc_3dmm(torch.cat([feata, featb, featc], dim=1))
out = torch.cat([para, posea, poseb, posec], dim=1)
return out
def reset_params(net):
for m in net.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal(m.weight, 0.0, 0.02)
if m.bias is not None:
nn.init.constant(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal(m.weight, 0.0, 0.0001)
if m.bias is not None:
nn.init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant(m.weight, 1)
nn.init.normal(m.weight, 1.0, 0.02)
nn.init.constant(m.bias, 0)
从预测的参数中恢复3D人脸,
faces3d = tools.preds_to_shape(preds[0].detach().cpu().numpy())
提取出人脸参数 alpha, beta, R, t, s,计算 face_shape,以及3个view的 model_shape,
def preds_to_shape(preds):
# paras = torch.mul(preds[:228, :], label_std[:199+29, :])
alpha = np.reshape(preds[:199], [199,1]) * np.reshape(model_shape['sigma'], [199,1])
beta = np.reshape(preds[199:228], [29, 1]) * 1.0/(1000.0 * np.reshape(data['sigma_exp'], [29, 1]))
face_shape = np.matmul(model_shape['w'], alpha) + np.matmul(model_exp['w_exp'], beta) + model_shape['mu_shape']
face_shape = face_shape.reshape(-1, 3)
R, t, s = preds_to_pose(preds[228:228+7])
kptA = np.matmul(face_shape[kpt_index], s*R[:2].transpose()) + np.repeat(np.reshape(t,[1,2]), 68, axis=0)
kptA[:, 1] = 224 - kptA[:, 1]
R, t, s = preds_to_pose(preds[228+7:228+14])
kptB = np.matmul(face_shape[kpt_index], s*R[:2].transpose()) + np.repeat(np.reshape(t,[1,2]), 68, axis=0)
kptB[:, 1] = 224 - kptB[:, 1]
R, t, s = preds_to_pose(preds[228+14:])
kptC = np.matmul(face_shape[kpt_index], s*R[:2].transpose()) + np.repeat(np.reshape(t,[1,2]), 68, axis=0)
kptC[:, 1] = 224 - kptC[:, 1]
return [face_shape, model_shape['tri'].astype(np.int64).transpose() - 1, kptA, kptB, kptC]
def preds_to_pose(preds):
pose = preds * pose_std + pose_mean
R = angle_to_rotation(pose[:3])
t2d = pose[3:5]
s = pose[6]
return R, t2d, s
最后,将人脸写入mesh文件。
tools.write_ply(os.path.join(options.save_dir, 'shape.ply'), faces3d[0], faces3d[1])
def write_ply(filename, points=None, mesh=None, colors=None, as_text=True):
points = pd.DataFrame(points, columns=["x", "y", "z"])
mesh = pd.DataFrame(mesh, columns=["v1", "v2", "v3"])
if colors is not None:
colors = pd.DataFrame(colors, columns=["red", "green", "blue"])
points = pd.concat([points, colors], axis=1)
if not filename.endswith('ply'):
filename += '.ply'
# open in text mode to write the header
with open(filename, 'w') as ply:
header = ['ply']
if as_text:
header.append('format ascii 1.0')
else:
header.append('format binary_' + sys.byteorder + '_endian 1.0')
if points is not None:
header.extend(describe_element('vertex', points))
if mesh is not None:
mesh = mesh.copy()
mesh.insert(loc=0, column="n_points", value=3)
mesh["n_points"] = mesh["n_points"].astype("u1")
header.extend(describe_element('face', mesh))
header.append('end_header')
for line in header:
ply.write("%s\n" % line)
if as_text:
if points is not None:
points.to_csv(filename, sep=" ", index=False, header=False, mode='a',
encoding='ascii')
if mesh is not None:
mesh.to_csv(filename, sep=" ", index=False, header=False, mode='a',
encoding='ascii')
else:
# open in binary/append to use tofile
with open(filename, 'ab') as ply:
if points is not None:
points.to_records(index=False).tofile(ply)
if mesh is not None:
mesh.to_records(index=False).tofile(ply)
return True
ply文件预览:
ply
format ascii 1.0
element vertex 53215
property float x
property float y
property float z
element face 105840
property list uchar int vertex_indices
end_header
-57861.697444928475 41402.07863996125 80754.95994012126
-57825.751646592085 41156.33884778988 80756.22177179152
-57787.105240666366 40912.49501439184 80753.9681079146
-57752.693245361734 40606.47652473882 80702.31823879934
-57714.708572422285 40303.915314277816 80648.21141466901
-57665.74191753105 39946.24824403958 80564.4580453337
-57614.64365355126 39588.81458608739 80479.47785714144
-57566.487798421236 39210.266665891104 80364.30959778109
-57502.62117264804 38830.57059010113 80246.91493798506
-57432.458661227414 38409.00673801282 80117.38173290923
-57353.70860100195 37990.52765622997 79981.96981050582
-57262.74744467564 37559.03210288704 79831.46206180744
-57166.9358116317 37130.19943151452 79677.16034162804
...
用mesh方式打开:
多视角视频人脸重建
模型加载时间:3.69
图片加载、裁剪等图片处理的时间:8.69
使用模型预测的时间:0.65