权重量化是将conv2d的权重进行int8量化,需要将量化的结果进行比较。
tvm中的用于比较2个tensor是否近似相同的函数是:
tvm**.testing.**assert_allclose,
该函数等效于 **numpy.testing.assert_allclose**
from warnings import showwarning
import numpy as np
import tensorflow as tf
import tvm
import vacc.quantize_weight
from tvm import relay
from tvm.relay import testing
from tvm.relay.frontend.common import infer_value
from tvm.relay import op as _op
from tvm.relay import analysis as _analysis
from tvm.relay import transform
from tvm.contrib import graph_runtime
def test_weight_quantization_result():
# tgt = "llvm"
# ctx = tvm.cpu()
# targets = {
# tvm.expr.IntImm("int32", ctx.device_type): tgt
# }
targets = "llvm"
ctx = tvm.cpu(0)
dtype = "float32"
dshape = (1, 4, 8, 8)
wshape = (4, 4, 3, 3)
# function
data = relay.var("data", dtype=dtype, shape=dshape)
w0 = relay.var("weight", dtype=dtype, shape=wshape)
z = relay.nn.conv2d(data, w0)
func = relay.Function([data, w0], z)
# module
mod = relay.Module.from_expr(func)
# params
act = np.random.uniform(-1, 1, dshape).astype(dtype)
wei = np.random.uniform(-0.5, 0.5, wshape).astype(dtype)
DATA = tvm.nd.array(act, ctx=ctx)
WEIGHT = tvm.nd.array(wei, ctx=ctx)
params = {"weight": WEIGHT}
def get_output(mod, params):
print("\n---------------------------------------------")
print("original module and params:")
print(mod.astext(show_meta_data=False))
print(params)
print("---------------------------------------------")
# build
with relay.build_config(opt_level=2):
g_json, mmod, params = relay.build(
mod, targets, "llvm", params=params)
# create module
rt = graph_runtime.create(g_json, mmod, ctx)
# set input and parameters
rt.set_input("data", DATA)
# rt.load_params(relay.save_param_dict(params))
rt.set_input(**params)
# run
rt.run()
# get output
out = rt.get_output(0)
print("out:\n", out)
return out.asnumpy()
def normal_compute():
"""
normal compute
"""
return get_output(mod, params)
def data_processed():
"""
data are processed (divide, round, clip, multiply)
"""
max_scale = np.amax(np.abs(wei))
valid_range = 127
k_scale = max_scale/valid_range
clip_min = -valid_range
clip_max = valid_range
wei_scaled = wei/k_scale
wei_scaled = np.round(wei_scaled)
wei_scaled = np.clip(wei_scaled, clip_min, clip_max)
wei_scaled = wei_scaled * k_scale
WEIGHT2 = tvm.nd.array(wei_scaled, ctx=ctx)
params2 = {"weight": WEIGHT2}
return get_output(mod, params2)
def weightonly_quantization():
"""
weight-only quantization
"""
# quantize
with vacc.quantize_weight.qconfig():
qmod = vacc.quantize_weight.quantize(mod, params)
print("\n---------------------------------------------")
print("module after quantization:")
print(qmod.astext(show_meta_data=False))
print("---------------------------------------------")
return get_output(qmod, params)
def compute_by_tf():
"""
compute by tensorflow
"""
x_in = act.transpose(0, 2, 3, 1)
kernel_in = wei.transpose(2, 3, 1, 0)
x = tf.constant(x_in, dtype=tf.float32)
kernel = tf.constant(kernel_in, dtype=tf.float32)
res = tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
with tf.Session() as sess:
out = sess.run(res)
print(out)
print("input shape:", x_in.shape)
print("kernel shape:", kernel_in.shape)
print("output shape:", out.shape)
print("out:\n", out)
return out
r1 = normal_compute()
r2 = data_processed()
r3 = weightonly_quantization()
# note : layout of tensorflow is NHWC, transpose to NCHW
r4 = compute_by_tf().transpose(0, 3, 1, 2)
# np.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-2)
tvm.testing.assert_allclose(r2, r3, rtol=1e-7, atol=1e-3)
tvm.testing.assert_allclose(r1, r4, rtol=1e-7, atol=1e-3)