Backend是MNN对计算设备的抽象。MNN当前已经支持CPU、Vulkan、OpenCL、Metal等Backend,但外部npu backend的添加方式存在差异。

1. 框架图

image.png

2. 新增后端

2.1 实现

继承Backend抽象类,并实现所有纯虚函数。

  1. //类函数执行流程:构造 -> onCreate -> onResizeBegin -> onResizeEnd -> onAcquire -> onCopyBuffer -> onExecuteBegin -> onExecuteEnd -> onCopyBuffer -> onClearBuffer
  2. class NPUBackend : public Backend {
  3. public:
  4. //NPUBackend构造函数,NPURuntime用于存放cache参赛以及附加编译选项
  5. NPUBackend(const NPURuntime* runtime);
  6. virtual ~NPUBackend();
  7. //onCreate : 根据传入op的类型,创建对应npu算子
  8. virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op) override;
  9. //推理前准备函数
  10. virtual void onExecuteBegin() const override;
  11. //推理后处理函数
  12. virtual void onExecuteEnd() const override;
  13. //内存申请统一函数
  14. virtual Backend::MemObj* onAcquire(const Tensor* tensor, StorageType storageType) override;
  15. //清除缓存
  16. virtual bool onClearBuffer() override;
  17. //内存拷贝函数(通常用于数据格式转换)
  18. virtual void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const override;
  19. //维度计算/内存申请前准备
  20. virtual void onResizeBegin() override;
  21. //计算维度/内存申请后处理
  22. virtual void onResizeEnd() override;
  23. }

2.2 onCreate

Backend需要通过onCreate为op创建出exection,一个exection通常代表一个算子实例:

  1. Execution* NPUBackend::onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op) {
  2. //获取已注册的npu算子map
  3. auto map = getCreatorMap();
  4. auto iter = map->find(op->type());
  5. if (iter == map->end()) {
  6. MNN_ERROR("map not find !!! \n");
  7. if(op != nullptr){
  8. if(op->name() != nullptr){
  9. MNN_PRINT("[NPU] Don't support type %d, %s\n", op->type(), op->name()->c_str());
  10. }
  11. }
  12. return nullptr;
  13. }
  14. //当查找到npu支持该算子,即创建exection
  15. auto exe = iter->second->onCreate(inputs, outputs, op, this);
  16. if (nullptr == exe) {
  17. MNN_ERROR("nullptr == exe !!! \n");
  18. if(op != nullptr){
  19. if(op->name() != nullptr){
  20. MNN_PRINT("[NPU] The Creator Don't support type %d, %s\n", op->type(), op->name()->c_str());
  21. }
  22. }
  23. return nullptr;
  24. }
  25. return exe;
  26. }

2.3 onCopyBuffer

拷贝可能在backend内部,也可能在npu backend与CPU backend之间。拷贝需要处理Tensor间的布局转换,相同布局时,可以直接拷贝数据;不同布局,如NHWCNC4HW4,则一般需要做特殊转换。该部分工作需要在onCopyBuffer函数中实现。具体参考:https://github.com/alibaba/MNN/blob/master/source/backend/hiai/backend/NPUBackend.cpp

2.4 onResizeEnd

用于对构图后的模型,进行编译,生产npu可执行模型文件

  1. void NPUBackend::onResizeEnd() {
  2. bulidIRModelAndLoad();
  3. }

2.5 onExecuteEnd

模型推理代码,即npu sdk提供的推理api在此添加

  1. void NPUBackend::onExecuteEnd() const {
  2. process(0);
  3. }

2.6 注册

定义Backend Creator,注册方法中调用MNNInsertExtraBackendCreator就可以完成Backend的注册,这里的注册方法需要在BackendRegister.cpp中声明并调用:

  1. struct NPUBackendCreator : RuntimeCreator {
  2. //用于做初始化npu环境,并判断设备兼容性情况,如不兼容,直接回退
  3. virtual Runtime* onCreate(const Backend::Info& info) const override {
  4. shared_ptr<hiai::AiModelMngerClient> mgrClient = make_shared<hiai::AiModelMngerClient>();
  5. if(mgrClient.get() == nullptr){
  6. MNN_ERROR("mgrClient.get() == NULL");
  7. return nullptr;
  8. auto ret = mgrClient->Init(nullptr);
  9. if (ret != hiai::AI_SUCCESS) {
  10. MNN_ERROR("[NPU] AiModelMngerClient Init Failed!\n");
  11. return nullptr;
  12. }
  13. const char* currentversion = mgrClient->GetVersion();
  14. if(currentversion != nullptr){
  15. MNN_PRINT("[NPU] ddk currentversion : %s \n", currentversion);
  16. }else{
  17. MNN_ERROR("[NPU] current version don't support, return nullptr\n");
  18. return nullptr;
  19. }
  20. if(string(currentversion).compare("100.330.000.000") <= 0){
  21. MNN_PRINT("[NPU] current version don't support,version=%s \n",currentversion);
  22. return nullptr;
  23. }
  24. }
  25. return new NPURuntime(info);
  26. }
  27. static const auto __npu_global_initializer = []() {
  28. MNNInsertExtraRuntimeCreator(MNN_FORWARD_USER_0, new NPUBackendCreator, true);
  29. return true;
  30. }();

3. 新增算子

3.1 实现

每个新增算子都要继承Execution,并重写两个函数onResize,onExecute。onExecute用于mnn到npu参数转换,并重新构图。onExecute在npu没用到,只需返回NO_ERROR;

  1. class NPUCommonExecution : public Execution {
  2. public:
  3. NPUCommonExecution(Backend *backend, const Op *op);
  4. virtual ~NPUCommonExecution() = default;
  5. virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
  6. virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
  7. };
  8. ErrorCode NPUActivation::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
  9. auto opName = mOp->name()->str();
  10. auto xOp = mNpuBackend->getInputOps(mOp);
  11. shared_ptr<ge::op::Activation> relu(new ge::op::Activation(opName + "_relu"));
  12. (*relu)
  13. .set_input_x(*xOp.get())
  14. .set_attr_coef(.000000)
  15. .set_attr_mode(mType);
  16. mNpuBackend->setOutputOps(mOp, {relu}, outputs);
  17. return NO_ERROR;
  18. }

3.2 注册

Op Execution中,就可以通过注册追加Op类型:

  1. template <class T>
  2. class NPUCreatorRegister {
  3. public:
  4. NPUCreatorRegister(OpType type) {
  5. T *t = new T;
  6. NPUBackend::addCreator(type, t);
  7. }
  8. ~NPUCreatorRegister() = default;
  9. };
  10. template <typename T>
  11. class TypedCreator : public NPUBackend::Creator {
  12. public:
  13. virtual ~TypedCreator() = default;
  14. virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, const MNN::Op *op,
  15. Backend *backend) const override {
  16. auto newOp = new T(backend, op, inputs, outputs);
  17. return newOp;
  18. }
  19. };
  20. class ActivationCreator : public NPUBackend::Creator {
  21. public:
  22. virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
  23. const MNN::Op *op, Backend *backend) const override {
  24. return new NPUActivation(backend, op, inputs, outputs, 1);
  25. }
  26. };
  27. NPUCreatorRegister<ActivationCreator> __relu_op(OpType_ReLU);

4. 工程构建

使用cmake编译时,完成代码修改后,也需要相应修改CMakeLists.txt

  1. 在后端目录(如hiai/tensorrt)中添加CMakeLists.txt,添加依赖库引入,并指定编译文件

    1. file(GLOB_RECURSE MNN_NPU_SRCS ${CMAKE_CURRENT_LIST_DIR}/*.cpp)
    2. add_library(
    3. MNN_NPU
    4. SHARED
    5. ${MNN_NPU_SRCS}
    6. )
    7. add_library(hiai SHARED IMPORTED )
    8. set_target_properties(hiai PROPERTIES
    9. IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/${ANDROID_ABI}/libhiai.so")
    10. target_include_directories(MNN_NPU PRIVATE ${CMAKE_CURRENT_LIST_DIR}/backend/)
    11. target_include_directories(MNN_NPU PRIVATE ${CMAKE_CURRENT_LIST_DIR}/3rdParty/include/)
  2. 在主目录中,修改CMakeLists.txt,增加NPU宏开关,并添加后端依赖

    1. # NPU
    2. IF(MNN_NPU)
    3. add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/hiai/)
    4. IF(MNN_SEP_BUILD)
    5. list(APPEND MNN_DEPS MNN_NPU)
    6. ELSE()
    7. list(APPEND MNN_TARGETS MNN_NPU)
    8. list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_NPU>)
    9. list(APPEND MNN_EXTRA_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/source/backend/hiai/3rdParty/${ANDROID_ABI}/libhiai.so)
    10. list(APPEND MNN_EXTRA_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/source/backend/hiai/3rdParty/${ANDROID_ABI}/libhiai_ir_build.so)
    11. list(APPEND MNN_EXTRA_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/source/backend/hiai/3rdParty/${ANDROID_ABI}/libhiai_ir.so)
    12. ENDIF()
    13. ENDIF()

    5. 参考代码

    5.1 华为HIAI