权重量化是将conv2d的权重进行int8量化,需要将量化的结果进行比较。
tvm中的用于比较2个tensor是否近似相同的函数是:
tvm**.testing.**assert_allclose,
该函数等效于 **numpy.testing.assert_allclose**
from warnings import showwarningimport numpy as npimport tensorflow as tfimport tvmimport vacc.quantize_weightfrom tvm import relayfrom tvm.relay import testingfrom tvm.relay.frontend.common import infer_valuefrom tvm.relay import op as _opfrom tvm.relay import analysis as _analysisfrom tvm.relay import transformfrom tvm.contrib import graph_runtimedef test_weight_quantization_result():# tgt = "llvm"# ctx = tvm.cpu()# targets = {# tvm.expr.IntImm("int32", ctx.device_type): tgt# }targets = "llvm"ctx = tvm.cpu(0)dtype = "float32"dshape = (1, 4, 8, 8)wshape = (4, 4, 3, 3)# functiondata = relay.var("data", dtype=dtype, shape=dshape)w0 = relay.var("weight", dtype=dtype, shape=wshape)z = relay.nn.conv2d(data, w0)func = relay.Function([data, w0], z)# modulemod = relay.Module.from_expr(func)# paramsact = np.random.uniform(-1, 1, dshape).astype(dtype)wei = np.random.uniform(-0.5, 0.5, wshape).astype(dtype)DATA = tvm.nd.array(act, ctx=ctx)WEIGHT = tvm.nd.array(wei, ctx=ctx)params = {"weight": WEIGHT}def get_output(mod, params):print("\n---------------------------------------------")print("original module and params:")print(mod.astext(show_meta_data=False))print(params)print("---------------------------------------------")# buildwith relay.build_config(opt_level=2):g_json, mmod, params = relay.build(mod, targets, "llvm", params=params)# create modulert = graph_runtime.create(g_json, mmod, ctx)# set input and parametersrt.set_input("data", DATA)# rt.load_params(relay.save_param_dict(params))rt.set_input(**params)# runrt.run()# get outputout = rt.get_output(0)print("out:\n", out)return out.asnumpy()def normal_compute():"""normal compute"""return get_output(mod, params)def data_processed():"""data are processed (divide, round, clip, multiply)"""max_scale = np.amax(np.abs(wei))valid_range = 127k_scale = max_scale/valid_rangeclip_min = -valid_rangeclip_max = valid_rangewei_scaled = wei/k_scalewei_scaled = np.round(wei_scaled)wei_scaled = np.clip(wei_scaled, clip_min, clip_max)wei_scaled = wei_scaled * k_scaleWEIGHT2 = tvm.nd.array(wei_scaled, ctx=ctx)params2 = {"weight": WEIGHT2}return get_output(mod, params2)def weightonly_quantization():"""weight-only quantization"""# quantizewith vacc.quantize_weight.qconfig():qmod = vacc.quantize_weight.quantize(mod, params)print("\n---------------------------------------------")print("module after quantization:")print(qmod.astext(show_meta_data=False))print("---------------------------------------------")return get_output(qmod, params)def compute_by_tf():"""compute by tensorflow"""x_in = act.transpose(0, 2, 3, 1)kernel_in = wei.transpose(2, 3, 1, 0)x = tf.constant(x_in, dtype=tf.float32)kernel = tf.constant(kernel_in, dtype=tf.float32)res = tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')with tf.Session() as sess:out = sess.run(res)print(out)print("input shape:", x_in.shape)print("kernel shape:", kernel_in.shape)print("output shape:", out.shape)print("out:\n", out)return outr1 = normal_compute()r2 = data_processed()r3 = weightonly_quantization()# note : layout of tensorflow is NHWC, transpose to NCHWr4 = compute_by_tf().transpose(0, 3, 1, 2)# np.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-2)tvm.testing.assert_allclose(r2, r3, rtol=1e-7, atol=1e-3)tvm.testing.assert_allclose(r1, r4, rtol=1e-7, atol=1e-3)
