int main(int argc, const char* argv[]) {
if (argc < 4) {
DLOG(INFO) << "Usage: ./quantized.out src.mnn dst.mnn preTreatConfig.json\n";
return 0;
}
const char* modelFile = argv[1];
const char* preTreatConfig = argv[3];
const char* dstFile = argv[2];
DLOG(INFO) << ">>> modelFile: " << modelFile;
DLOG(INFO) << ">>> preTreatConfig: " << preTreatConfig;
DLOG(INFO) << ">>> dstFile: " << dstFile;
std::unique_ptr<MNN::NetT> netT;
// 读取MNN模型
{
std::ifstream input(modelFile);
std::ostringstream outputOs;
outputOs << input.rdbuf();
netT = MNN::UnPackNet(outputOs.str().c_str());
}
// temp build net for inference
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = MNN::Net::Pack(builder, netT.get());
builder.Finish(offset);
int size = builder.GetSize();
auto ocontent = builder.GetBufferPointer();
// model buffer for creating mnn Interpreter
// 构造两个buffer,一个负责进行推理统计,一个负责生成量化网络
std::unique_ptr<uint8_t> modelForInference(new uint8_t[size]);
memcpy(modelForInference.get(), ocontent, size);
std::unique_ptr<uint8_t> modelOriginal(new uint8_t[size]);
memcpy(modelOriginal.get(), ocontent, size);
netT.reset();
netT = MNN::UnPackNet(modelOriginal.get());
// quantize model's weight
DLOG(INFO) << "Calibrate the feature and quantize model...";
// 构造校准类从而进行量化
std::shared_ptr<Calibration> calibration(
new Calibration(netT.get(), modelForInference.get(), size, preTreatConfig));
calibration->runQuantizeModel();
DLOG(INFO) << "Quantize model done!";
flatbuffers::FlatBufferBuilder builderOutput(1024);
builderOutput.ForceDefaults(true);
auto len = MNN::Net::Pack(builderOutput, netT.get());
builderOutput.Finish(len);
{
std::ofstream output(dstFile);
output.write((const char*)builderOutput.GetBufferPointer(), builderOutput.GetSize());
}
}
quantized.cpp 文件负责实现量化,流程如下所示:
- 通过flatbuffer读取模型内容,并生成两个模型buffer,其中一个负责进行推理统计激活值信息,另一个负责生成量化网络。
- 调用Calibration类进行量化,生成量化网络。
- 保存量化后的模型。