简单的黑白边界检测
*
有如下一张5050的黑白图片:
问题:如何找出图片中明暗分界线所处位置?
解题思路:使用如下卷积核对图片进行卷积运算
卷积核大小为1*3,让其在图像中滑动:
这样,通过卷积运算后,只有黑白边界处的像素值不等于0,程序实现如下:
import matplotlib.pyplot as pltimport numpy as npimport paddleimport paddle.fluid as fluidfrom paddle.fluid.dygraph.nn import Conv2Dfrom paddle.fluid.initializer import NumpyArrayInitializer%matplotlib inlinewith fluid.dygraph.guard():# 创建初始化权重参数ww = np.array([1, 0, -1], dtype='float32')# 将权重参数调整成维度为[cout, cin, kh, kw]的四维张量w = w.reshape([1, 1, 1, 3])# 创建卷积算子,设置输出通道数,卷积核大小,和初始化权重参数# filter_size = [1, 3]表示kh = 1, kw=3# 创建卷积算子的时候,通过参数属性param_attr,指定参数初始化方式# 这里的初始化方式时,从numpy.ndarray初始化卷积参数conv = Conv2D(num_channels=1, num_filters=1, filter_size=[1, 3],param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer(value=w)))# 创建输入图片,图片左边的像素点取值为1,右边的像素点取值为0img = np.ones([50,50], dtype='float32')img[:, 30:] = 0.# 将图片形状调整为[N, C, H, W]的形式x = img.reshape([1,1,50,50])# 将numpy.ndarray转化成paddle中的tensorx = fluid.dygraph.to_variable(x)# 使用卷积算子作用在输入图片上y = conv(x)# 将输出tensor转化为numpy.ndarrayout = y.numpy()f = plt.subplot(121)f.set_title('input image', fontsize=15)plt.imshow(img, cmap='gray')f = plt.subplot(122)f.set_title('output featuremap', fontsize=15)# 卷积算子Conv2D输出数据形状为[N, C, H, W]形式# 此处N, C=1,输出数据形状为[1, 1, H, W],是4维数组# 但是画图函数plt.imshow画灰度图时,只接受2维数组# 通过numpy.squeeze函数将大小为1的维度消除plt.imshow(out.squeeze(), cmap='gray')plt.show()
输出:
# 查看卷积层的参数with fluid.dygraph.guard():# 通过 conv.parameters()查看卷积层的参数,返回值是list,包含两个元素print(conv.parameters())# 查看卷积层的权重参数名字和数值print(conv.parameters()[0].name, conv.parameters()[0].numpy())# 参看卷积层的偏置参数名字和数值print(conv.parameters()[1].name, conv.parameters()[1].numpy())
输出:
[name conv2d_0.w_0, dtype: VarType.FP32 shape: [1, 1, 1, 3] lod: {}dim: 1, 1, 1, 3layout: NCHWdtype: floatdata: [1 0 -1], name conv2d_0.b_0, dtype: VarType.FP32 shape: [1] lod: {}dim: 1layout: NCHWdtype: floatdata: [0]]conv2d_0.w_0 [[[[ 1. 0. -1.]]]]conv2d_0.b_0 [0.]
图像中物体边缘检测
**
上面展示的是一个人为构造出来的简单图片,使用卷积网络检测图片明暗分界处的示例。对于真实的图片,也可以使用合适的卷积核对其进行操作,用来检测物体的外形轮廓,观察输出特征图跟原图之间的对应关系。
import matplotlib.pyplot as pltfrom PIL import Imageimport numpy as npimport paddleimport paddle.fluid as fluidfrom paddle.fluid.dygraph.nn import Conv2Dfrom paddle.fluid.initializer import NumpyArrayInitializerimg = Image.open('./work/001.png')with fluid.dygraph.guard():# 设置卷积核参数w = np.array([[-1,-1,-1], [-1,8,-1], [-1,-1,-1]], dtype='float32')/8w = w.reshape([1, 1, 3, 3])# 由于输入通道数是3,将卷积核的形状从[1,1,3,3]调整为[1,3,3,3]w = np.repeat(w, 3, axis=1)# 创建卷积算子,输出通道数为1,卷积核大小为3x3,# 并使用上面的设置好的数值作为卷积核权重的初始化参数conv = Conv2D(num_channels=3, num_filters=1, filter_size=[3, 3],param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer(value=w)))# 将读入的图片转化为float32类型的numpy.ndarrayx = np.array(img).astype('float32')# 图片读入成ndarry时,形状是[H, W, 3],# 将通道这一维度调整到最前面x = np.transpose(x, (2,0,1))# 将数据形状调整为[N, C, H, W]格式x = x.reshape(1, 3, img.height, img.width)x = fluid.dygraph.to_variable(x)y = conv(x)out = y.numpy()plt.figure(figsize=(20, 10))f = plt.subplot(121)f.set_title('input image', fontsize=15)plt.imshow(img)f = plt.subplot(122)f.set_title('output feature map', fontsize=15)plt.imshow(out.squeeze(), cmap='gray')plt.show()
输出:
图像均值模糊
另外一种比较常见的卷积核是用当前像素跟它邻域内的像素取平均,这样可以使图像上噪声比较大的点变得更平滑。
import matplotlib.pyplot as pltfrom PIL import Imageimport numpy as npimport paddleimport paddle.fluid as fluidfrom paddle.fluid.dygraph.nn import Conv2Dfrom paddle.fluid.initializer import NumpyArrayInitializer# 读入图片并转成numpy.ndarrayimg = Image.open('./work/002.png').convert('L')img = np.array(img)# 换成灰度图with fluid.dygraph.guard():# 创建初始化参数w = np.ones([1, 1, 5, 5], dtype = 'float32')/25conv = Conv2D(num_channels=1, num_filters=1, filter_size=[5, 5],param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer(value=w)))x = img.astype('float32')x = x.reshape(1,1,img.shape[0], img.shape[1])x = fluid.dygraph.to_variable(x)y = conv(x)out = y.numpy()plt.figure(figsize=(20, 12))f = plt.subplot(121)f.set_title('input image')plt.imshow(img, cmap='gray')f = plt.subplot(122)f.set_title('output feature map')out = out.squeeze()plt.imshow(out, cmap='gray')plt.show()
输出:
