权重量化是将conv2d的权重进行int8量化,需要将量化的结果进行比较。

    tvm中的用于比较2个tensor是否近似相同的函数是:
    tvm**.testing.**assert_allclose,
    该函数等效于 **numpy.testing.assert_allclose**

    1. from warnings import showwarning
    2. import numpy as np
    3. import tensorflow as tf
    4. import tvm
    5. import vacc.quantize_weight
    6. from tvm import relay
    7. from tvm.relay import testing
    8. from tvm.relay.frontend.common import infer_value
    9. from tvm.relay import op as _op
    10. from tvm.relay import analysis as _analysis
    11. from tvm.relay import transform
    12. from tvm.contrib import graph_runtime
    13. def test_weight_quantization_result():
    14. # tgt = "llvm"
    15. # ctx = tvm.cpu()
    16. # targets = {
    17. # tvm.expr.IntImm("int32", ctx.device_type): tgt
    18. # }
    19. targets = "llvm"
    20. ctx = tvm.cpu(0)
    21. dtype = "float32"
    22. dshape = (1, 4, 8, 8)
    23. wshape = (4, 4, 3, 3)
    24. # function
    25. data = relay.var("data", dtype=dtype, shape=dshape)
    26. w0 = relay.var("weight", dtype=dtype, shape=wshape)
    27. z = relay.nn.conv2d(data, w0)
    28. func = relay.Function([data, w0], z)
    29. # module
    30. mod = relay.Module.from_expr(func)
    31. # params
    32. act = np.random.uniform(-1, 1, dshape).astype(dtype)
    33. wei = np.random.uniform(-0.5, 0.5, wshape).astype(dtype)
    34. DATA = tvm.nd.array(act, ctx=ctx)
    35. WEIGHT = tvm.nd.array(wei, ctx=ctx)
    36. params = {"weight": WEIGHT}
    37. def get_output(mod, params):
    38. print("\n---------------------------------------------")
    39. print("original module and params:")
    40. print(mod.astext(show_meta_data=False))
    41. print(params)
    42. print("---------------------------------------------")
    43. # build
    44. with relay.build_config(opt_level=2):
    45. g_json, mmod, params = relay.build(
    46. mod, targets, "llvm", params=params)
    47. # create module
    48. rt = graph_runtime.create(g_json, mmod, ctx)
    49. # set input and parameters
    50. rt.set_input("data", DATA)
    51. # rt.load_params(relay.save_param_dict(params))
    52. rt.set_input(**params)
    53. # run
    54. rt.run()
    55. # get output
    56. out = rt.get_output(0)
    57. print("out:\n", out)
    58. return out.asnumpy()
    59. def normal_compute():
    60. """
    61. normal compute
    62. """
    63. return get_output(mod, params)
    64. def data_processed():
    65. """
    66. data are processed (divide, round, clip, multiply)
    67. """
    68. max_scale = np.amax(np.abs(wei))
    69. valid_range = 127
    70. k_scale = max_scale/valid_range
    71. clip_min = -valid_range
    72. clip_max = valid_range
    73. wei_scaled = wei/k_scale
    74. wei_scaled = np.round(wei_scaled)
    75. wei_scaled = np.clip(wei_scaled, clip_min, clip_max)
    76. wei_scaled = wei_scaled * k_scale
    77. WEIGHT2 = tvm.nd.array(wei_scaled, ctx=ctx)
    78. params2 = {"weight": WEIGHT2}
    79. return get_output(mod, params2)
    80. def weightonly_quantization():
    81. """
    82. weight-only quantization
    83. """
    84. # quantize
    85. with vacc.quantize_weight.qconfig():
    86. qmod = vacc.quantize_weight.quantize(mod, params)
    87. print("\n---------------------------------------------")
    88. print("module after quantization:")
    89. print(qmod.astext(show_meta_data=False))
    90. print("---------------------------------------------")
    91. return get_output(qmod, params)
    92. def compute_by_tf():
    93. """
    94. compute by tensorflow
    95. """
    96. x_in = act.transpose(0, 2, 3, 1)
    97. kernel_in = wei.transpose(2, 3, 1, 0)
    98. x = tf.constant(x_in, dtype=tf.float32)
    99. kernel = tf.constant(kernel_in, dtype=tf.float32)
    100. res = tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
    101. with tf.Session() as sess:
    102. out = sess.run(res)
    103. print(out)
    104. print("input shape:", x_in.shape)
    105. print("kernel shape:", kernel_in.shape)
    106. print("output shape:", out.shape)
    107. print("out:\n", out)
    108. return out
    109. r1 = normal_compute()
    110. r2 = data_processed()
    111. r3 = weightonly_quantization()
    112. # note : layout of tensorflow is NHWC, transpose to NCHW
    113. r4 = compute_by_tf().transpose(0, 3, 1, 2)
    114. # np.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-2)
    115. tvm.testing.assert_allclose(r2, r3, rtol=1e-7, atol=1e-3)
    116. tvm.testing.assert_allclose(r1, r4, rtol=1e-7, atol=1e-3)