
- // 对卷积进行量化
- int QuantizeConvPerChannel(const float* weight, const int size, const float* bias, int8_t* quantizedWeight,
-                            int32_t* quantizedBias, float* scale, const std::vector<float>& inputScale,
-                            const std::vector<float>& outputScale, std::string method, bool mergeChannel) {
-     const int inputChannels  = inputScale.size();
-     const int outputChannels = outputScale.size();
-     const int icXoc          = inputChannels * outputChannels;
-     DCHECK(size % icXoc == 0) << "Input Data Size Error!";
-     std::vector<float> quantizedWeightScale(outputChannels);
-     float inputScalexWeight = 1.0f;
-     // 默认为true
-     if (mergeChannel) {
-         if (method == "MAX_ABS"){
-             SymmetricQuantizeWeight(weight, size, quantizedWeight, quantizedWeightScale.data(), outputChannels);
-         }
-         else if (method == "ADMM") {
-             QuantizeWeightADMM(weight, size, quantizedWeight, quantizedWeightScale.data(), outputChannels);
-         }
-         // 输入scale
-         inputScalexWeight = inputScale[0];
-     } else {
-         const int kernelSize = size / icXoc;
-         const int ocStride   = size / outputChannels;
-         std::vector<float> weightMultiByInputScale(size);
-         for (int oc = 0; oc < outputChannels; ++oc) {
-             for (int ic = 0; ic < inputChannels; ++ic) {
-                 for (int i = 0; i < kernelSize; ++i) {
-                     const int index                = oc * ocStride + ic * kernelSize + i;
-                     weightMultiByInputScale[index] = inputScale[ic] * weight[index];
-                 }
-             }
-         }
-         if (method == "MAX_ABS"){
-             SymmetricQuantizeWeight(weightMultiByInputScale.data(), size, quantizedWeight, quantizedWeightScale.data(), outputChannels);
-         }
-         else if (method == "ADMM") {
-             QuantizeWeightADMM(weightMultiByInputScale.data(), size, quantizedWeight, quantizedWeightScale.data(), outputChannels);
-         }
-     }
-     for (int i = 0; i < outputChannels; ++i) {
-         if (outputScale[i] == 0) {
-             scale[i] = 0.0f;
-         } else {
-             // 计算最终scale
-             scale[i] = inputScalexWeight * quantizedWeightScale[i] / outputScale[0];
-         }
-     }
-     // bias量化为int32
-     if (bias) {
-         for (int i = 0; i < outputChannels; ++i) {
-             if (inputScalexWeight == 0 || quantizedWeightScale[i] == 0) {
-                 quantizedBias[i] = 0;
-             } else {
-                 quantizedBias[i] = static_cast<int32_t>(bias[i] / (inputScalexWeight * quantizedWeightScale[i]));
-             }
-         }
-     }
-     return 0;
- }
- // 对深度卷积进行量化,类似之前的
- int QuantizeDepthwiseConv(const float* weight, const int size, const float* bias, int8_t* quantizedWeight,
-                           int32_t* quantizedBias, float* scale, const std::vector<float>& inputScale,
-                           const std::vector<float>& outputScale, std::string method) {
-     const int inputChannels  = inputScale.size();
-     const int outputChannels = outputScale.size();
-     DCHECK(inputChannels == outputChannels) << "Input Data Size Error!";
-     std::vector<float> quantizedWeightScale(inputChannels);
-     if (method == "MAX_ABS") {
-         SymmetricQuantizeWeight(weight, size, quantizedWeight, quantizedWeightScale.data(), inputChannels);
-     }
-     else if (method == "ADMM") {
-         QuantizeWeightADMM(weight, size, quantizedWeight, quantizedWeightScale.data(), inputChannels);
-     }
-     for (int c = 0; c < inputChannels; ++c) {
-         const int index = c;
-         if (outputScale[c] == 0) {
-             scale[index] = 0.0f;
-         } else {
-             scale[index] = inputScale[c] * quantizedWeightScale[c] / outputScale[c];
-         }
-     }
-     if (bias) {
-         for (int i = 0; i < outputChannels; ++i) {
-             if (inputScale[i] == 0 || quantizedWeightScale[i] == 0) {
-                 quantizedBias[i] = 0;
-             } else {
-                 quantizedBias[i] = static_cast<int32_t>(bias[i] / (inputScale[i] * quantizedWeightScale[i]));
-             }
-         }
-     }
-     return 0;
- }
- // weight format is [co, ci, kh, kw]
- int SymmetricQuantizeWeight(const float* weight, const int size, int8_t* quantizedWeight, float* scale,
-                             const int channels) {
-     DCHECK((size % channels) == 0) << "weight size error!";
-     const int channelStride     = size / channels;
-     const int quantizedMaxValue = 127;
-     for (int c = 0; c < channels; ++c) {
-         const auto weightChannelStart    = weight + c * channelStride;
-         auto quantizedWeightChannelStart = quantizedWeight + c * channelStride;
-         auto minmaxValue                 = std::minmax_element(weightChannelStart, weightChannelStart + channelStride);
-         // 计算数据maxabs
-         const float dataAbsMax           = std::max(std::abs(*minmaxValue.first), std::abs(*minmaxValue.second));
-         float scaleDataToInt8 = 1.0f;
-         if (dataAbsMax == 0) {
-             scale[c] = 0.0f;
-         } else {
-             // 统计scale
-             scale[c]        = dataAbsMax / quantizedMaxValue;
-             scaleDataToInt8 = quantizedMaxValue / dataAbsMax;
-         }
-         for (int i = 0; i < channelStride; ++i) {
-             // 将量化后的参数保存
-             const int32_t quantizedInt8Value = static_cast<int32_t>(roundf(weightChannelStart[i] * scaleDataToInt8));
-             quantizedWeightChannelStart[i] =
-                 std::min(quantizedMaxValue, std::max(-quantizedMaxValue, quantizedInt8Value));
-         }
-     }
-     return 0;
- }