int main(int argc, const char* argv[]) {if (argc < 4) {DLOG(INFO) << "Usage: ./quantized.out src.mnn dst.mnn preTreatConfig.json\n";return 0;}const char* modelFile = argv[1];const char* preTreatConfig = argv[3];const char* dstFile = argv[2];DLOG(INFO) << ">>> modelFile: " << modelFile;DLOG(INFO) << ">>> preTreatConfig: " << preTreatConfig;DLOG(INFO) << ">>> dstFile: " << dstFile;std::unique_ptr<MNN::NetT> netT;// 读取MNN模型{std::ifstream input(modelFile);std::ostringstream outputOs;outputOs << input.rdbuf();netT = MNN::UnPackNet(outputOs.str().c_str());}// temp build net for inferenceflatbuffers::FlatBufferBuilder builder(1024);auto offset = MNN::Net::Pack(builder, netT.get());builder.Finish(offset);int size = builder.GetSize();auto ocontent = builder.GetBufferPointer();// model buffer for creating mnn Interpreter// 构造两个buffer,一个负责进行推理统计,一个负责生成量化网络std::unique_ptr<uint8_t> modelForInference(new uint8_t[size]);memcpy(modelForInference.get(), ocontent, size);std::unique_ptr<uint8_t> modelOriginal(new uint8_t[size]);memcpy(modelOriginal.get(), ocontent, size);netT.reset();netT = MNN::UnPackNet(modelOriginal.get());// quantize model's weightDLOG(INFO) << "Calibrate the feature and quantize model...";// 构造校准类从而进行量化std::shared_ptr<Calibration> calibration(new Calibration(netT.get(), modelForInference.get(), size, preTreatConfig));calibration->runQuantizeModel();DLOG(INFO) << "Quantize model done!";flatbuffers::FlatBufferBuilder builderOutput(1024);builderOutput.ForceDefaults(true);auto len = MNN::Net::Pack(builderOutput, netT.get());builderOutput.Finish(len);{std::ofstream output(dstFile);output.write((const char*)builderOutput.GetBufferPointer(), builderOutput.GetSize());}}
quantized.cpp 文件负责实现量化,流程如下所示:
- 通过flatbuffer读取模型内容,并生成两个模型buffer,其中一个负责进行推理统计激活值信息,另一个负责生成量化网络。
- 调用Calibration类进行量化,生成量化网络。
- 保存量化后的模型。
