学习笔记 - weigh-only quantization测试用例 - 《人工智能》

权重量化是将conv2d的权重进行int8量化，需要将量化的结果进行比较。
tvm中的用于比较2个tensor是否近似相同的函数是：
tvm**.testing.**assert_allclose,
该函数等效于 **numpy.testing.assert_allclose**
from warnings import showwarning
import numpy as np
import tensorflow as tf
import tvm
import vacc.quantize_weight
from tvm import relay
from tvm.relay import testing
from tvm.relay.frontend.common import infer_value
from tvm.relay import op as _op
from tvm.relay import analysis as _analysis
from tvm.relay import transform
from tvm.contrib import graph_runtime
def test_weight_quantization_result():
    # tgt = "llvm"
    # ctx = tvm.cpu()
    # targets = {
    #     tvm.expr.IntImm("int32", ctx.device_type): tgt
    # }
    targets = "llvm"
    ctx = tvm.cpu(0)
    dtype = "float32"
    dshape = (1, 4, 8, 8)
    wshape = (4, 4, 3, 3)
    # function
    data = relay.var("data", dtype=dtype, shape=dshape)
    w0 = relay.var("weight", dtype=dtype, shape=wshape)
    z = relay.nn.conv2d(data, w0)
    func = relay.Function([data, w0], z)
    # module
    mod = relay.Module.from_expr(func)
    # params
    act = np.random.uniform(-1, 1, dshape).astype(dtype)
    wei = np.random.uniform(-0.5, 0.5, wshape).astype(dtype)
    DATA = tvm.nd.array(act, ctx=ctx)
    WEIGHT = tvm.nd.array(wei, ctx=ctx)
    params = {"weight": WEIGHT}
    def get_output(mod, params):
        print("\n---------------------------------------------")
        print("original module and params:")
        print(mod.astext(show_meta_data=False))
        print(params)
        print("---------------------------------------------")
        # build
        with relay.build_config(opt_level=2):
            g_json, mmod, params = relay.build(
                mod, targets, "llvm", params=params)
        # create module
        rt = graph_runtime.create(g_json, mmod, ctx)
        # set input and parameters
        rt.set_input("data", DATA)
        # rt.load_params(relay.save_param_dict(params))
        rt.set_input(**params)
        # run
        rt.run()
        # get output
        out = rt.get_output(0)
        print("out:\n", out)
        return out.asnumpy()
    def normal_compute():
        """
        normal compute
        """
        return get_output(mod, params)
    def data_processed():
        """
        data are processed (divide, round, clip, multiply)
        """
        max_scale = np.amax(np.abs(wei))
        valid_range = 127
        k_scale = max_scale/valid_range
        clip_min = -valid_range
        clip_max = valid_range
        wei_scaled = wei/k_scale
        wei_scaled = np.round(wei_scaled)
        wei_scaled = np.clip(wei_scaled, clip_min, clip_max)
        wei_scaled = wei_scaled * k_scale
        WEIGHT2 = tvm.nd.array(wei_scaled, ctx=ctx)
        params2 = {"weight": WEIGHT2}
        return get_output(mod, params2)
    def weightonly_quantization():
        """
        weight-only quantization
        """
        # quantize
        with vacc.quantize_weight.qconfig():
            qmod = vacc.quantize_weight.quantize(mod, params)
        print("\n---------------------------------------------")
        print("module after quantization:")
        print(qmod.astext(show_meta_data=False))
        print("---------------------------------------------")
        return get_output(qmod, params)
    def compute_by_tf():
        """
        compute by tensorflow
        """
        x_in = act.transpose(0, 2, 3, 1)
        kernel_in = wei.transpose(2, 3, 1, 0)
        x = tf.constant(x_in, dtype=tf.float32)
        kernel = tf.constant(kernel_in, dtype=tf.float32)
        res = tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
        with tf.Session() as sess:
            out = sess.run(res)
            print(out)
            print("input shape:", x_in.shape)
            print("kernel shape:", kernel_in.shape)
            print("output shape:", out.shape)
            print("out:\n", out)
            return out
    r1 = normal_compute()
    r2 = data_processed()
    r3 = weightonly_quantization()
    # note : layout of tensorflow is NHWC, transpose to NCHW
    r4 = compute_by_tf().transpose(0, 3, 1, 2)
    # np.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-2)
    tvm.testing.assert_allclose(r2, r3, rtol=1e-7, atol=1e-3)
    tvm.testing.assert_allclose(r1, r4, rtol=1e-7, atol=1e-3)