day_1

import cv2
import numpy as np
from numba import cuda
import time
import math
# gpu function
@cuda.jit
def process_gpu(img, channels):
    tx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    ty = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
    # all in one loop
    for c in range(channels):
        color = img[tx, ty][c] * 2.0 + 30
        if color > 255:
            img[tx, ty][c] = 255
        elif color < 0:
            img[tx, ty][c] = 0
        else:
            img[tx, ty][c] = color
def process_cpu(img, dst):
    rows, cols, channels = img.shape
    for i in range(rows):
        for j in range(cols):
            for c in range(3):
                color = img[i,j][c]*2.0 + 30
                if color > 255:
                    dst[i,j][c] = 255
                elif color < 0:
                    dst[i,j][c] = 0
                else:
                    dst[i,j][c] = color
if __name__ == "__main__":
    # create an image
    img = cv2.imread("dog_test_101.jpg")
    rows, cols, channels = img.shape
    dst_cpu = img.copy()
    dst_gpu = img.copy()
    start_cpu = time.time()
    process_cpu(img, dst_cpu)
    end_cpu = time.time()
    print("cpu process time: ", end_cpu - start_cpu)
    # gpu function
    dImg = cuda.to_device(img)
    threadsprblock = (16, 16)
    blockspergrid_x = int(math.ceil(rows/threadsprblock[0]))
    blockspergrid_y = int(math.ceil(cols/threadsprblock[1]))
    blockspergrid = (blockspergrid_x, blockspergrid_y)
    cuda.synchronize()
    # 同步
    start_gpu = time.time()
    process_gpu[blockspergrid, threadsprblock](dImg, channels)
    cuda.synchronize()
    end_gpu = time.time()
    dst_gpu = dImg.copy_to_host()
    print("gpu process time: ", end_gpu - start_gpu)
    # save
    cv2.imwrite("result_cpu.jpg", dst_cpu)
    cv2.imwrite("result_gpu.jpg", dst_gpu)
    print("####   done")

what_is_cuda

异构计算

SM:

每16 核共享一个解码器/译码器
每32 核共享一个 context, memory.

只读： constant memory， texture memory
其余读写

IT_programmer

cuda B站视频

day_1

what_is_cuda

异构计算

cuda python

install cuda

end