trt_int8.pytrt_run.pytf_muti_run.pytf_one_run.py

quick start
https://docs.nvidia.com/deeplearning/tensorrt/quick-start-guide/index.html#precision

查看官网。 示例可参考 tensorRT 的 github 示例

本人的工作 有
1 tensorflow 的模型 ckpt 模式 转为 frezen_graph 模式
2 convert to uff 格式
3.1 tensorRT load uff格式的model, 进行 inference.
3.2 在3.1的基础上 调整 precision 为 int8

https://github.com/NVIDIA/TensorRT/issues/301

pycuda教程
TensorRT-基础(engine/context/buffer等)
https://zhuanlan.zhihu.com/p/336256668

trt_sample.py

trt_sample.py

  1. # default_model_name: "resnet_v2_50"
  2. import os
  3. import sys
  4. import pycuda.driver as cuda
  5. # This import causes pycuda to automatically manage CUDA context creation and cleanup.
  6. import pycuda.autoinit
  7. import tensorflow as tf
  8. import tensorrt as trt
  9. from numpy.lib.npyio import load
  10. from tensorflow.python.tools import freeze_graph
  11. from trt_common import tf_image_to_trt_images, EntropyCalibrator
  12. from trt_common import (convert_prob_to_dict, deserialize_cuda_engine,
  13. do_inference, load_labels, serialize_cuda_engine,
  14. to_calculate_value_softmax)
  15. # below is private_module.
  16. parentpath = '/yyy/xxx/'
  17. path_list = [
  18. parentpath, parentpath + 'lib', parentpath + 'engine',
  19. parentpath + 'inference', parentpath + 'applications',
  20. os.path.join(parentpath, "applications/kkk")
  21. ]
  22. sys.path += path_list
  23. import letrain
  24. from nets import nets_factory
  25. from preprocessing import image_preprocessing, preprocessing_factory
  26. # above is private_module.
  27. # tf.app.flags.DEFINE_string('model_name', '', '')
  28. tf.app.flags.DEFINE_string('model_path', '', '')
  29. tf.app.flags.DEFINE_string('output_path', '.', '')
  30. tf.app.flags.DEFINE_string('trt_mode', 'float32', '')
  31. tf.app.flags.DEFINE_string('image_path', '', '')
  32. tf.app.flags.DEFINE_string('build_or_infer', "infer", '')
  33. FLAGS = tf.app.flags.FLAGS
  34. TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
  35. INT8_ENGINE = "trt_int8.engine"
  36. FLOAT32_ENGINE = "trt_float32.engine"
  37. # CALIBRATION_IMAGE = "/home/daiyi1/for_autoML/trt/trt_im/engine_dir/train_62.jpg" # single
  38. CALIBRATION_IMAGE = "/home/daiyi1/for_autoML/trt/trt_im/images_list_dir/"
  39. class Classification(object):
  40. def _all_pre_load(self, model_path, model_name):
  41. self.labels_dict = load_labels(model_path)
  42. num_classes = len(self.labels_dict)
  43. self.model_name = model_name
  44. self.network_fn = nets_factory.get_network_fn(model_name,
  45. num_classes,
  46. is_training=False)
  47. self.image_size = self.network_fn.default_image_size
  48. def _pre_load_ckpt(self, model_path):
  49. self.input_node_name = "input_node_1"
  50. self.output_node_name = "output_node_1"
  51. # 模型的checkpoint文件地址
  52. # model_name = "resnet_v2_50"
  53. if os.path.isdir(model_path):
  54. self.ckpt_path = tf.train.latest_checkpoint(model_path)
  55. self.model_path = model_path
  56. def preprocess_file(self, filename, height, width):
  57. """Preprocess data from single image file
  58. Args:
  59. filename: image file name (full path)
  60. height: expected file size
  61. width: expected file size
  62. Return:
  63. image with expected height/width using corresponding nets model.
  64. """
  65. class Record(object):
  66. pass
  67. # decode img
  68. _data = Record()
  69. _data.name = filename
  70. file_contents = tf.read_file(_data.name)
  71. record_bytes = tf.image.decode_image(file_contents,
  72. channels=3,
  73. name='image_decode')
  74. _data.uint8image = record_bytes
  75. _name = self.model_name
  76. _fn = preprocessing_factory.get_preprocessing(_name, is_training=False)
  77. image = get_image(_data, height, width, _fn)
  78. return tf.reshape(image, [1, height, width, 3])
  79. def preprocess_filelist(self, filenamelist, height, width, num_batch):
  80. """Preprocess data from image file list
  81. Args:
  82. filename_list: image file name list (full path)
  83. height: expected file size
  84. width: expected file size
  85. num_batch: number of files in filename_list
  86. Return:
  87. images with expected height/width using corresponding nets model.
  88. """
  89. class Record(object):
  90. pass
  91. _data_list = []
  92. for i in range(num_batch):
  93. filename = filenamelist[i]
  94. result = Record()
  95. result.name = filename
  96. file_contents = tf.read_file(result.name)
  97. record_bytes = tf.image.decode_image(file_contents,
  98. channels=3,
  99. name='image_decode')
  100. result.uint8image = record_bytes
  101. _data_list.append(result)
  102. _name = self.model_name
  103. _reshaped_img_list = []
  104. _fn = preprocessing_factory.get_preprocessing(_name, is_training=False)
  105. for _data in _data_list:
  106. image = get_image(_data, height, width, _fn)
  107. image = tf.reshape(image, [height, width, 3])
  108. _reshaped_img_list.append(image)
  109. return tf.stack(_reshaped_img_list)
  110. def ckpt_to_freeze(self):
  111. file_path = os.path.join(self.model_path, 'pb_model')
  112. if not os.path.exists(file_path):
  113. os.system('mkdir %s' % file_path)
  114. tf.reset_default_graph()
  115. # image_place 是你的输入变量
  116. image_place = tf.compat.v1.placeholder(
  117. tf.float32, [1, self.image_size, self.image_size, 3],
  118. name=self.input_node_name)
  119. # network_fn 是你自己定义的模型
  120. logits, _ = self.network_fn(image_place)
  121. tf.identity(logits, name=self.output_node_name)
  122. freeze_pb_path = os.path.join(self.model_path, 'frozen_model.pb')
  123. with tf.compat.v1.Session() as sess:
  124. # 保存图,在./pb_model文件夹中生成model.pb文件
  125. # model.pb文件将作为input_graph给到接下来的freeze_graph函数
  126. tf.io.write_graph(sess.graph_def, file_path, 'model.pb')
  127. # 把图和参数结构一起
  128. freeze_graph.freeze_graph(input_graph=os.path.join(
  129. file_path, 'model.pb'),
  130. input_saver='',
  131. input_binary=False,
  132. input_checkpoint=self.ckpt_path,
  133. output_node_names=self.output_node_name,
  134. restore_op_name='save/restore_all',
  135. filename_tensor_name='save/Const:0',
  136. output_graph=freeze_pb_path,
  137. clear_devices=True,
  138. initializer_nodes='')
  139. return freeze_pb_path
  140. def freeze_to_uff(self, freeze_pb_path):
  141. os.system("convert-to-uff %s" % freeze_pb_path)
  142. full_file_path = freeze_pb_path.split(".")
  143. file_path = full_file_path[0:-1]
  144. file_path = file_path + ["uff"]
  145. return ".".join(file_path)
  146. def build_engine_by_uff(self, model_file, calib=None):
  147. # For more information on TRT basics,
  148. # refer to the introductory samples.
  149. with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
  150. ) as network, builder.create_builder_config() as config, trt.UffParser(
  151. ) as parser:
  152. config.max_workspace_size = GiB(1)
  153. if calib:
  154. config.set_flag(trt.BuilderFlag.INT8)
  155. config.int8_calibrator = calib
  156. # Parse the Uff Network
  157. parser.register_input(self.input_node_name,
  158. (3, self.image_size, self.image_size))
  159. parser.register_output(self.output_node_name)
  160. parser.parse(model_file, network)
  161. # Build and return an engine.
  162. engine = builder.build_engine(network, config)
  163. return engine
  164. def ckpt_to_plan_file(self, mode, output_path):
  165. uff_file_path = self.freeze_to_uff(self.ckpt_to_freeze())
  166. if mode == "float32":
  167. engine = self.build_engine_by_uff(uff_file_path)
  168. serialize_cuda_engine(os.path.join(output_path, FLOAT32_ENGINE),
  169. engine)
  170. elif mode == "int8":
  171. self._process_images(CALIBRATION_IMAGE)
  172. calib_file_sample = [i['pre_value'] for i in self.image_dict_list]
  173. calibration_cache = "calibration.cache"
  174. calib = EntropyCalibrator(calib_file_sample, calibration_cache)
  175. engine = self.build_engine_by_uff(uff_file_path, calib)
  176. serialize_cuda_engine(os.path.join(output_path, INT8_ENGINE),
  177. engine)
  178. def _process_images(self, image_file):
  179. self.image_dict_list = []
  180. if os.path.isfile(image_file):
  181. image = self.preprocess_file(image_file, self.image_size,
  182. self.image_size)
  183. tf_image_list = [{'file_path': image_file, 'pre_value': image}]
  184. trt_image_list = tf_image_to_trt_images(tf_image_list,
  185. is_single=True)
  186. elif os.path.isdir(image_file):
  187. tf_image_list = [{
  188. 'file_path': os.path.join(image_file, file)
  189. } for file in os.listdir(image_file)]
  190. images_list_value = self.preprocess_filelist(
  191. [file['file_path'] for file in tf_image_list], self.image_size,
  192. self.image_size, len(tf_image_list))
  193. for idx in range(len(tf_image_list)):
  194. tf_image_list[idx]['pre_value'] = images_list_value[idx]
  195. trt_image_list = tf_image_to_trt_images(tf_image_list,
  196. is_single=False)
  197. self.image_dict_list = trt_image_list
  198. def inference(self, plan_file):
  199. self.engine = deserialize_cuda_engine(plan_file)
  200. image_dict_list = do_inference(self.image_dict_list, self.engine)
  201. all_prob = to_calculate_value_softmax(image_dict_list)
  202. res = convert_prob_to_dict(image_dict_list, all_prob, self.labels_dict)
  203. print("############ res is : ", res)
  204. return res
  205. def from_ckpt_to_plan_all(obj, model_path, trt_mode):
  206. output_path = FLAGS.output_path.strip()
  207. obj._pre_load_ckpt(model_path)
  208. obj.ckpt_to_plan_file(trt_mode, output_path)
  209. def run_plan_all(obj, model_path, trt_mode):
  210. image_path = FLAGS.image_path.strip()
  211. if trt_mode.lower() == "float32":
  212. plan_file = os.path.join(model_path, FLOAT32_ENGINE)
  213. elif trt_mode.lower() == "int8":
  214. plan_file = os.path.join(model_path, INT8_ENGINE)
  215. obj._process_images(image_file=image_path)
  216. obj.inference(plan_file=plan_file)
  217. def get_image(_data, height, width, _fn):
  218. if FLAGS.data_format == 'raw_image' and FLAGS.task == 'custom':
  219. resize_side = int(height * 1.15)
  220. image = image_preprocessing.preprocess_image(_data.uint8image,
  221. height,
  222. width,
  223. resize_side=resize_side)
  224. elif FLAGS.data_format == 'tfrecord' and FLAGS.task == 'custom':
  225. raise Exception("ERROR custom data not support tfrecrd data")
  226. else:
  227. image = _fn(_data.uint8image, height, width)
  228. return image
  229. def GiB(val):
  230. return val * 1 << 30
  231. if __name__ == "__main__":
  232. model_name = FLAGS.model_name.strip()
  233. model_path = FLAGS.model_path.strip()
  234. trt_mode = FLAGS.trt_mode.strip()
  235. if trt_mode.lower() not in ["float32", "int8"]:
  236. raise "ERROR, please confirm your 'trt_mode'. \
  237. It only could be 'int8' or 'float32'"
  238. obj = Classification(model_path, model_name)
  239. obj._all_pre_load(model_path, model_name)
  240. is_infer = FLAGS.build_or_infer.strip()
  241. if is_infer.lower().startswith("build"):
  242. from_ckpt_to_plan_all(obj, model_path, trt_mode)
  243. elif is_infer.lower().startswith("infer"):
  244. run_plan_all(obj, model_path, trt_mode)
  245. else:
  246. raise ValueError("please input which action you want. \
  247. The options is to build engine or inference based on engine. \
  248. so you can input 'build' or 'infer'.")

trt_common.py

from logging import raiseExceptions
import os
import sys

import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import six
import tensorflow as tf

parentpath = '/opt/letrain/'
path_list = [
    parentpath, parentpath + 'lib', parentpath + 'engine',
    parentpath + 'inference', parentpath + 'applications',
    os.path.join(parentpath, "applications/frcnn")
]
sys.path += path_list

from nets import nets_factory
from preprocessing import image_preprocessing

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.WARN)
FLAGS = tf.app.flags.FLAGS
import tensorrt as trt

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


def convert_prob_to_dict(img_list, all_val, labels_dict):
    new_prob_val = []
    for val in all_val:
        if type(val) == list:
            new_prob_val.append(val)
        else:
            new_prob_val.append(val.tolist())
    prob_val = np.array(new_prob_val)
    classification_data = []
    if isinstance(prob_val, np.ndarray):
        probs = []
        labels = []
        index = np.argmax(prob_val, axis=1)
        for i, j in enumerate(index):
            probs.append(prob_val[i][j])
            labels.append(labels_dict[str(j)])
            result = {
                'label': labels_dict[str(j)],
                'probability': round(float(prob_val[i][j]), 4)
            }
            classification_data.append(result)

    _true, _false = [], []
    for id, _ in enumerate(classification_data):
        if img_list[id]['file_path'].split("/")[1].split(
                "_")[0] == classification_data[id]['label']:
            _true.append(classification_data[id]['probability'])
        else:
            _false.append(classification_data[id]['probability'])
    result = {}
    result['true_count'] = len(_true)
    result['true_ave_value'] = sum(_true) / len(_true) if len(_true) else 0
    result['false_count'] = len(_false)
    result['false_ave_value'] = sum(_false) / len(_false) if len(_false) else 0
    return ("## inference_result: ", result)


def do_inference(image_dict_list, engine):
    inputs, outputs, bindings, stream = local_allocate_buffers(engine)
    import copy
    for image_file in image_dict_list:
        context2 = engine.create_execution_context()
        np.copyto(inputs[0].host, image_file['pre_value'])
        [
            cuda.memcpy_htod_async(inp.device, inp.host, stream)
            for inp in inputs
        ]
        # Run inference.
        context2.execute_async(batch_size=1,
                               bindings=bindings,
                               stream_handle=stream.handle)
        # Transfer predictions back from the GPU.
        [
            cuda.memcpy_dtoh_async(out.host, out.device, stream)
            for out in outputs
        ]
        # Synchronize the stream
        stream.synchronize()
        # Return only the host outputs.
        [logits_result] = [out.host for out in outputs]
        image_file['post_value'] = copy.deepcopy(logits_result)
        del context2

    return image_dict_list


def load_labels(model_path):
    LABELS_FILE = os.path.join(model_path, 'labels.txt')
    labels_dict = {}
    with open(LABELS_FILE) as f:
        for line in f:
            key, value = line.rstrip('\n').split(':')
            labels_dict[key] = value
    return labels_dict


def tf_image_to_trt_images(img_list, is_single=True):
    all_tr_images = []
    for image_path in img_list:
        before_value = image_path['pre_value']
        if is_single:
            after_value = tf.transpose(before_value, [0, 3, 1, 2])
        else:
            after_value = tf.transpose(before_value, [2, 0, 1])

        print("##### after image_path['pre_value']: ", after_value)
        all_tr_images.append(after_value)

    all_tr_images = tf_init_run_value(all_tr_images)

    for key, val in enumerate(img_list):
        val['pre_value'] = np.expand_dims([all_tr_images[key]], axis=0).ravel()

    return img_list


def tf_init_run_value(tf_palceholders):
    gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.3)
    sess = tf.compat.v1.Session(
        config=tf.compat.v1.ConfigProto(gpu_options=gpu_options,
                                        allow_soft_placement=True,
                                        log_device_placement=False))
    tf_values = sess.run(tf_palceholders)
    return tf_values


def to_calculate_value_softmax(img_list):
    with tf.Graph().as_default():
        with tf.device('/device:GPU:0'):
            all_prob = []
            for image_file in img_list:
                # Softmax简单的说就是把一个N*1的向量归一化为(0,1)之间的值
                prob = tf.nn.softmax(image_file['post_value'])
                all_prob.append(prob)
            return tf_init_run_value(all_prob)


def local_allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * 1
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        # 分配内存
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream


# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


class EntropyCalibrator(trt.IInt8EntropyCalibrator2):
    def __init__(self, training_data, cache_file, batch_size=1):
        # Whenever you specify a custom constructor for a TensorRT class,
        # you MUST call the constructor of the parent explicitly.
        trt.IInt8EntropyCalibrator2.__init__(self)

        self.cache_file = cache_file

        # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned.
        self.data = training_data
        self.batch_size = batch_size
        self.current_index = 0

        # Allocate enough memory for a whole batch.
        self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size)

    def get_batch_size(self):
        return self.batch_size

    # TensorRT passes along the names of the engine bindings to the get_batch function.
    # You don't necessarily have to use them, but they can be useful to understand the order of
    # the inputs. The bindings list is expected to have the same ordering as 'names'.
    def get_batch(self, names):
        # if self.current_index + self.batch_size > self.data.shape[0]:
        if self.current_index + self.batch_size > len(self.data):
            return None
        # The most important thing is 'return None', this is a singal to terminate.
        # because this function will be called repeatedly.
        # if not None vluae, it will never stop.

        current_batch = int(self.current_index / self.batch_size)
        if current_batch % 10 == 0:
            print("Calibrating batch {:}, containing {:} images".format(
                current_batch, self.batch_size))

        # batch = self.data[self.current_index:self.current_index + self.batch_size].ravel()
        batch = self.data[self.current_index:self.current_index + self.batch_size]
        cuda.memcpy_htod(self.device_input, batch)
        self.current_index += self.batch_size
        return [self.device_input]

    def read_calibration_cache(self):
        # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
        if os.path.exists(self.cache_file):
            with open(self.cache_file, "rb") as f:
                return f.read()

    def write_calibration_cache(self, cache):
        with open(self.cache_file, "wb") as f:
            f.write(cache)


def serialize_cuda_engine(plan_file, engine):
    with open(plan_file, "wb") as f:
        f.write(engine.serialize())
    return plan_file


def deserialize_cuda_engine(plan_file):
    trt_runtime = trt.Runtime(TRT_LOGGER)
    with open(plan_file, "rb") as f:
        engine = trt_runtime.deserialize_cuda_engine(f.read())
    return engine

启动app

deploy_app_service.py

import tensorflow as tf
from factory import create_app

if __name__ == 'deploy_app_service':
    gun_args = ['workers', 'bind', 'pythonpath', 'access-logfile', 'error-logfile', 'env', 'timeout']
    for param in gun_args:
        tf.app.flags.DEFINE_string(param, '', '')
    inf_app = create_app()


if __name__ == '__main__':
    tf.app.flags.DEFINE_enum('scenario', 'classification', ['classification', 'objectdetection', 'segmentation'],"Scenario for this service")
    tf.app.flags.DEFINE_string('url', '/api/service', 'URL used to access the deployed service')
    tf.app.flags.DEFINE_string('deploy_path', '', "deploy service path")
    tf.app.flags.DEFINE_string('lico_model_path', '', "model path, for inference job")
    tf.app.flags.DEFINE_string('lico_image_file', '', "image file to deal, for inference job")
    tf.app.flags.DEFINE_string('lico_output_dir', '', "output dir, for inference job")
    tf.app.flags.DEFINE_string('model_name', '', "model_name of the deployed service")
    tf.app.flags.DEFINE_string('trt_mode', 'float32', "trt_mode of the deployed service")
    tf.app.flags.DEFINE_integer('port', 61234, 'HTTP server port for the deployed service')
    tf.app.flags.DEFINE_string('service_uuid', '', 'Deployed service which would be add to URI')
    FLAGS = tf.app.flags.FLAGS

    from os.path import dirname
    from subprocess import check_call

    cmd = [
        'gunicorn',
        '--workers',
        '1',
        '--bind',
        '0.0.0.0:{0}'.format(FLAGS.port),
        '--pythonpath',
        dirname(__file__),
        '--access-logfile',
        '-',
        '--error-logfile',
        '-',
        '--timeout',
        '600',
        '--env',
        'TEST_SCENARIO={0}'.format(FLAGS.scenario),
        '--env',
        'TEST_MODEL_PATH={0}'.format(FLAGS.deploy_path),
        '--env',
        'TEST_MODEL_NAME={0}'.format(FLAGS.model_name),
        '--env',
        'TEST_TRT_MODE={0}'.format(FLAGS.trt_mode),
        '--env',
        'SERVICE_UUID={0}'.format(FLAGS.service_uuid),
        '--env',
        'SERVICE_URL={0}'.format(FLAGS.url),
        'deploy_service:inf_app'
    ]
    check_call(cmd)

factory.py

from resource import DeployServiceRunner


def create_app():
    import os
    import falcon

    api = falcon.API()
    api.req_options.strip_url_path_trailing_slash = True
    scenario = os.environ.get('TEST_SCENARIO', '')
    model_path = os.environ.get('TEST_MODEL_PATH', '')
    model_name = os.environ.get('TEST_MODEL_NAME', '')
    trt_mode = os.environ.get('TEST_TRT_MODE', '')
    service_uuid = os.environ.get('SERVICE_UUID', '')
    service_url = os.environ.get('SERVICE_URL', '')
    if service_uuid:
        service_url = '/api/service/{0}'.format(service_uuid)
    api.add_route(
        service_url,
        DeployServiceRunner(scenario, model_path, model_name, trt_mode)
    )

    return api

resource.py


import base64
import falcon
import os
import sys
import traceback

from falcon.media.validators import jsonschema
from tempfile import NamedTemporaryFile


def get_infe_serv(scenario):
    print('Init Load Model')
    if scenario == "classification":
        from tensorrt_sample import Classification
        inference_service_mgt = Classification()
    return inference_service_mgt

class DeployServiceRunner(object):
    def __init__(self, scenario, deploy_path, model_name, trt_mode):
        self.deploy_path = deploy_path
        self.model_path = os.path.join(deploy_path, 'model')
        self.trt_mode = trt_mode
        self.InferenceServiceMgt = get_infe_serv(scenario)
        self.InferenceServiceMgt.load_model(self.model_path, model_name)
        print('Load Model Success')
        sys.stdout.flush()

    @jsonschema.validate(
        {
            "type": "object",
            "properties": {
                "image": {"type": "string"},
                "image_type": {
                    "type": "string",
                    "enum": ["BASE64"]
                },
            },
            "required": ["image", "image_type"]
        }
    )
    def on_post(self, req, resp):
        try:
            content = base64.b64decode(req.media.get('image'))
            with NamedTemporaryFile(mode='wb') as buf:
                buf.write(content)
                buf.flush()
                inf_result = self.InferenceServiceMgt.inference(buf.name)
        except ValueError:
            traceback.print_exc()
            raise falcon.HTTPBadRequest(
                description='The format of image is unsupported'
            )
        except TypeError:
            traceback.print_exc()
            raise falcon.HTTPBadRequest(
                description='It is not an image'
            )
        except Exception:
            traceback.print_exc()
            raise falcon.HTTP_INTERNAL_SERVER_ERROR(
                description='Internal error of inference server'
            )
        resp.media = inf_result

end