[翻译]交叉编译和RPC — tvm 0.7.dev1文档



# **



git clone —recursive https://github.com/apache/incubator-tvm tvm cd tvm make runtime -j2 ~/.bashrc``~/.bashrc``vi ~/.bashrc``~/tvm export PYTHONPATH=$PYTHONPATH:~/tvm/python source ~/.bashrc

python -m tvm.exec.rpc_server —host 0.0.0.0 —port=9090

INFO:root:RPCServer: bind to 0.0.0.0:9090


import numpy as np

import tvm from tvm import te from tvm import rpc from tvm.contrib import util

n = tvm.runtime.convert(1024) A = te.placeholder((n,), name=’A’) B = te.compute((n,), lambda i: A[i] + 1.0, name=’B’) s = te.create_schedule(B.op)

local_demo = True

if local_demo: target = ‘llvm’ else: target = ‘llvm -target=armv7l-linux-gnueabihf’

func = tvm.build(s, [A, B], target=target, name=’add_one’)

save the lib at a local temp folder

temp = util.tempdir() path = temp.relpath(‘lib.tar’) func.export_library(path) 
local_demo``target``build``'llvm -target=armv7l-linux-gnueabihf'``'llvm -target=aarch64-linux-gnu'
gcc -v``Target:
-target

  • llc -mtriple= -mattr=help

if local_demo: remote = rpc.LocalSession() else:

  1. # The following is my environment, change this to the IP address of your target device
  2. host = '10.77.1.162'
  3. port = 9090
  4. remote = [rpc.connect](https://tvm.apache.org/docs/api/python/rpc.html#tvm.rpc.connect)(host, port)

remote.upload(path) func = remote.load_module(‘lib.tar’)

create arrays on the remote device

ctx = remote.cpu() a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)

the function will run on the remote device

func(a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) time_evaluator time_f = func.time_evaluator(func.entry_name, ctx, number=10) cost = time_f(a, b).mean print(‘%g secs/op’ % cost)

1.292e-07 secs/op



cp cmake/config.cmake . sed -i “s/USE_OPENCL OFF/USE_OPENCL ON/“ config.cmake make runtime -j4

def run_opencl():

  1. # NOTE: This is the setting for my rk3399 board. You need to modify
  2. # them according to your environment.
  3. target_host = "llvm -target=aarch64-linux-gnu"
  4. opencl_device_host = '10.77.1.145'
  5. opencl_device_port = 9090
  6. # create schedule for the above "add one" compute declaration
  7. s = [te.create_schedule](https://tvm.apache.org/docs/api/python/te.html#tvm.te.create_schedule)(B.op)
  8. xo, xi = s[B].split(B.op.axis[0], factor=32)
  9. s[B].bind(xo, [te.thread_axis](https://tvm.apache.org/docs/api/python/te.html#tvm.te.thread_axis)("blockIdx.x"))
  10. s[B].bind(xi, [te.thread_axis](https://tvm.apache.org/docs/api/python/te.html#tvm.te.thread_axis)("threadIdx.x"))
  11. func = [tvm.build](https://tvm.apache.org/docs/api/python/driver.html#tvm.build)(s, [A, B], "opencl", target_host=target_host)
  12. remote = [rpc.connect](https://tvm.apache.org/docs/api/python/rpc.html#tvm.rpc.connect)(opencl_device_host, opencl_device_port)
  13. # export and upload
  14. path = temp.relpath('lib_cl.tar')
  15. func.export_library(path)
  16. remote.upload(path)
  17. func = remote.load_module('lib_cl.tar')
  18. # run
  19. ctx = remote.cl()
  20. a = [tvm.nd.array](https://tvm.apache.org/docs/api/python/ndarray.html#tvm.nd.array)(np.random.uniform(size=1024).astype(A.dtype), ctx)
  21. b = [tvm.nd.array](https://tvm.apache.org/docs/api/python/ndarray.html#tvm.nd.array)([np.zeros](https://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros.html#numpy.zeros)(1024, dtype=A.dtype), ctx)
  22. func(a, b)
  23. [np.testing.assert_equal](https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_equal.html#numpy.testing.assert_equal)(b.asnumpy(), a.asnumpy() + 1)
  24. print("OpenCL test passed!")