fhog.py

  1. import numpy as np
  2. import cv2
  3. from numba import jit
  4. # 梯度直方图中的区域数
  5. NUM_SECTOR = 9
  6. FLT_EPSILON = 1e-07
  7. # FLT_EPSILON用于float类型
  8. # 目的:计算每个像素点的梯度(包括大小和方向)
  9. # 参数:x方向滤波后的梯度分量,y方向滤波后的梯度分量,cos数组,sin数组,高,宽,通道数
  10. @jit
  11. def func1(dx, dy, boundary_x, boundary_y, height, width, numChannels):
  12. # ------------------------返回值初始化------------------------------- #
  13. # r为一个height行,width列的float32型二维数组
  14. r = np.zeros((height, width), np.float32)
  15. # alfa为一个height行,width列的int型三维数组
  16. alfa = np.zeros((height, width, 2), np.int)
  17. # ----------------------------------------------------------------- #
  18. # -----------------------计算梯度大小-------------------------------- #
  19. for j in range(1, height - 1):
  20. # 遍历该行每一个元素
  21. for i in range(1, width - 1):
  22. # 第一颜色通道
  23. c = 0
  24. # x为该点第一颜色通道处x方向的梯度值
  25. x = dx[j, i, c]
  26. # y为该点第一颜色通道处y方向的梯度值
  27. y = dy[j, i, c]
  28. # 计算该点处的梯度幅值
  29. r[j, i] = np.sqrt(x * x + y * y)
  30. # 获取该点其它颜色通道的梯度值
  31. for ch in range(1, numChannels):
  32. tx = dx[j, i, ch]
  33. ty = dy[j, i, ch]
  34. # 计算该点处在其它颜色通道的梯度幅值
  35. magnitude = np.sqrt(tx * tx + ty * ty)
  36. # 使用梯度幅值最大的通道替代储存值
  37. if magnitude > r[j, i]:
  38. r[j, i] = magnitude
  39. # c = ch # 由于后续计算用不到c,因此可以注释掉
  40. x = tx
  41. y = ty
  42. # ----------------------------------------------------------------- #
  43. # -----------------------计算梯度方向-------------------------------- #
  44. # 此时计算的是该点处的梯度方向属于9个方向中的哪个
  45. # mmax为一个中间值,当mmax最大时,此时数组元素的位置即为第几个方向
  46. # maxi为梯度的方向
  47. # -------- mmax和maxi的初始化----------------- #
  48. mmax = boundary_x[0] * x + boundary_y[0] * y
  49. maxi = 0
  50. # ------------------------------------------ #
  51. # -------求取mmax的最大绝对值和此时的maxi-------- #
  52. for kk in range(0, NUM_SECTOR):
  53. dotProd = boundary_x[kk] * x + boundary_y[kk] * y
  54. if dotProd > mmax:
  55. mmax = dotProd
  56. maxi = kk
  57. elif -dotProd > mmax:
  58. mmax = -dotProd
  59. maxi = kk + NUM_SECTOR
  60. alfa[j, i, 0] = maxi % NUM_SECTOR
  61. alfa[j, i, 1] = maxi
  62. # ------------------------------------------------------------------ #
  63. # 返回每个像素点的梯度大小、方向
  64. return r, alfa
  65. # 目的:为每个cell进行特征mapping
  66. # 参数:每个像素点的梯度大小、方向、方向向量nearest、加权w矩阵、高、宽、x方向上的cell数、y方向上的cell数、每个cell的特征个数、3*每个cell的特征个数
  67. @jit
  68. def func2(r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize):
  69. # mapp:sizeX * sizeY * p个0元素的一维数组
  70. mapp = np.zeros((sizeX * sizeY * p), np.float32)
  71. for i in range(sizeY):
  72. for j in range(sizeX):
  73. for ii in range(k):
  74. for jj in range(k):
  75. if (i * k + ii > 0) and (i * k + ii < height - 1) and (j * k + jj > 0) and (j * k + jj < width - 1):
  76. mapp[i * stringSize + j * p + alfa[k * i + ii, j * k + jj, 0]] += \
  77. r[k * i + ii, j * k + jj] * w[ii, 0] * w[jj, 0]
  78. mapp[i * stringSize + j * p + alfa[k * i + ii, j * k + jj, 1] + NUM_SECTOR] += \
  79. r[k * i + ii, j * k + jj] * w[ii, 0] * w[jj, 0]
  80. if (i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1):
  81. mapp[(i + nearest[ii]) * stringSize + j * p + alfa[k * i + ii, j * k + jj, 0]] += \
  82. r[k * i + ii, j * k + jj] *w[ii, 1] *w[jj, 0]
  83. mapp[(i + nearest[ii]) * stringSize + j * p + alfa[k * i + ii, j * k + jj, 1] + NUM_SECTOR] += \
  84. r[k * i + ii, j * k + jj] * w[ii, 1] * w[jj, 0]
  85. if (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1):
  86. mapp[i * stringSize + (j + nearest[jj]) * p + alfa[k * i + ii, j * k + jj, 0]] += \
  87. r[k * i + ii, j * k + jj] *w[ii, 0] *w[jj, 1]
  88. mapp[i * stringSize + (j + nearest[jj]) * p + alfa[k * i + ii, j * k + jj, 1] + NUM_SECTOR] += \
  89. r[k * i + ii, j * k + jj] * w[ii, 0] * w[jj, 1]
  90. if (i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1) and (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1):
  91. mapp[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * p + alfa[k * i + ii, j * k + jj, 0]] += \
  92. r[k * i + ii, j * k + jj] * w[ii, 1] * w[jj, 1]
  93. mapp[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * p + alfa[k * i + ii, j * k + jj, 1] + NUM_SECTOR] += \
  94. r[k * i + ii, j * k + jj] * w[ii, 1] * w[jj, 1]
  95. # 返回mapping后的特征信息
  96. return mapp
  97. @jit
  98. def func3(partOfNorm, mappmap, sizeX, sizeY, p, xp, pp):
  99. newData = np.zeros((sizeY * sizeX * pp), np.float32)
  100. for i in range(1, sizeY + 1):
  101. for j in range(1, sizeX + 1):
  102. pos1 = i * (sizeX + 2) * xp + j * xp
  103. pos2 = (i - 1) * sizeX * pp + (j - 1) * pp
  104. valOfNorm = np.sqrt(partOfNorm[i * (sizeX + 2) + j] +
  105. partOfNorm[i * (sizeX + 2) + (j + 1)] +
  106. partOfNorm[(i + 1) * (sizeX + 2) + j] +
  107. partOfNorm[(i + 1) * (sizeX + 2) + (j + 1)]) + FLT_EPSILON
  108. newData[pos2:pos2 + p] = mappmap[pos1:pos1 + p] / valOfNorm
  109. newData[pos2 + 4 * p:pos2 + 6 * p] = mappmap[pos1 + p:pos1 + 3 * p] / valOfNorm
  110. valOfNorm = np.sqrt(partOfNorm[i * (sizeX + 2) + j] +
  111. partOfNorm[i * (sizeX + 2) + (j + 1)] +
  112. partOfNorm[(i - 1) * (sizeX + 2) + j] +
  113. partOfNorm[(i - 1) * (sizeX + 2) + (j + 1)]) + FLT_EPSILON
  114. newData[pos2 + p:pos2 + 2 * p] = mappmap[pos1:pos1 + p] / valOfNorm
  115. newData[pos2 + 6 * p:pos2 + 8 * p] = mappmap[pos1 + p:pos1 + 3 * p] / valOfNorm
  116. valOfNorm = np.sqrt(partOfNorm[i * (sizeX + 2) + j] +
  117. partOfNorm[i * (sizeX + 2) + (j - 1)] +
  118. partOfNorm[(i + 1) * (sizeX + 2) + j] +
  119. partOfNorm[(i + 1) * (sizeX + 2) + (j - 1)]) + FLT_EPSILON
  120. newData[pos2 + 2 * p:pos2 + 3 * p] = mappmap[pos1:pos1 + p] / valOfNorm
  121. newData[pos2 + 8 * p:pos2 + 10 * p] = mappmap[pos1 + p:pos1 + 3 * p] / valOfNorm
  122. valOfNorm = np.sqrt(partOfNorm[i * (sizeX + 2) + j] +
  123. partOfNorm[i * (sizeX + 2) + (j - 1)] +
  124. partOfNorm[(i - 1) * (sizeX + 2) + j] +
  125. partOfNorm[(i - 1) * (sizeX + 2) + (j - 1)]) + FLT_EPSILON
  126. newData[pos2 + 3 * p:pos2 + 4 * p] = mappmap[pos1:pos1 + p] / valOfNorm
  127. newData[pos2 + 10 * p:pos2 + 12 * p] = mappmap[pos1 + p:pos1 + 3 * p] / valOfNorm
  128. return newData
  129. @jit
  130. def func4(mappmap, p, sizeX, sizeY, pp, yp, xp, nx, ny):
  131. newData = np.zeros((sizeX * sizeY * pp), np.float32)
  132. for i in range(sizeY):
  133. for j in range(sizeX):
  134. pos1 = (i * sizeX + j) * p
  135. pos2 = (i * sizeX + j) * pp
  136. for jj in range(2 * xp): # 2*9
  137. newData[pos2 + jj] = np.sum(mappmap[pos1 + yp * xp + jj: pos1 + 3 * yp * xp + jj: 2 * xp]) * ny
  138. for jj in range(xp): # 9
  139. newData[pos2 + 2 * xp + jj] = np.sum(mappmap[pos1 + jj: pos1 + jj + yp * xp: xp]) * ny
  140. for ii in range(yp): # 4
  141. newData[pos2 + 3 * xp + ii] = np.sum(
  142. mappmap[pos1 + yp * xp + ii * xp * 2: pos1 + yp * xp + ii * xp * 2 + 2 * xp]) * nx
  143. return newData
  144. # 函数功能:计算image的hog特征,结果在map结构中的map大小为sizeX*sizeY*NUM_SECTOR*3
  145. # 函数参数:选中的子图,cell的大小,返回的特征图
  146. # 函数返回:提取到的特征信息
  147. def getFeatureMaps(image, k, mapp):
  148. # kernel为一个一维数组/向量,kernel=[[-1. 0. 1.]](卷积核)
  149. kernel = np.array([[-1., 0., 1.]], np.float32)
  150. # 获得子图的高和宽
  151. height = image.shape[0]
  152. width = image.shape[1]
  153. # 断言子图是一个三通道图片,否则报错
  154. assert (image.ndim == 3 and image.shape[2])
  155. # 子图的通道数
  156. numChannels = 3
  157. # 将图像分割成多个元胞(cell),计算x和y方向上cell的个数,cell=k
  158. sizeX = width // k
  159. sizeY = height // k
  160. # 每个cell的特征个数
  161. px = 3 * NUM_SECTOR
  162. p = px
  163. stringSize = sizeX * p
  164. # mapp为一个字典类型
  165. # mapp中相关数据项赋值
  166. mapp['sizeX'] = sizeX
  167. mapp['sizeY'] = sizeY
  168. mapp['numFeatures'] = p
  169. # Mapping后的特征大小为:sizeX*sizeY*numFeatures
  170. mapp['map'] = np.zeros((mapp['sizeX'] * mapp['sizeY'] * mapp['numFeatures']), np.float32)
  171. # 通过x-轴和y-轴 的cv2.filter2D操作对图像进行线性滤波,获取x-轴和y-轴的梯度,即dx、dy为子图在x、y轴上的梯度分量
  172. dx = cv2.filter2D(np.float32(image), -1, kernel) # 按x方向滤波
  173. dy = cv2.filter2D(np.float32(image), -1, kernel.T) # 按y方向滤波
  174. # 数组,为[0、1/9、2/9......1]*pi
  175. arg_vector = np.arange(NUM_SECTOR + 1).astype(np.float32) * np.pi / NUM_SECTOR
  176. # 数组,为arg_vector数组中各角度对应的cos值
  177. boundary_x = np.cos(arg_vector)
  178. # 数组,为arg_vector数组中各角度对应的sin值
  179. boundary_y = np.sin(arg_vector)
  180. # 获得子图上每个像素点的梯度梯度(包括大小和方向)
  181. r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels)
  182. # r:子图每个像素点的梯度大小;alfa:子图每个像素点的梯度方向
  183. # 根据HOG的cell大小设置权重向量w,设置“最近”标识向量nearest。原则是离cell越近,加权w越大,越远加权w越小。
  184. # “向量nearest”用于计算跟上下左右近邻的4个“半个cell”做加权。
  185. # nearest:k列的行向量,元素都为1
  186. nearest = np.ones(k, np.int)
  187. nearest[0:k // 2] = -1
  188. # w:k行2列的0矩阵
  189. # 以下过程为计算加权w矩阵的过程
  190. w = np.zeros((k, 2), np.float32)
  191. a_x = np.concatenate((k / 2 - np.arange(k / 2) - 0.5, np.arange(k / 2, k) - k / 2 + 0.5)).astype(np.float32)
  192. b_x = np.concatenate((k / 2 + np.arange(k / 2) + 0.5, -np.arange(k / 2, k) + k / 2 - 0.5 + k)).astype(np.float32)
  193. w[:, 0] = 1.0 / a_x * ((a_x * b_x) / (a_x + b_x))
  194. w[:, 1] = 1.0 / b_x * ((a_x * b_x) / (a_x + b_x))
  195. mapp['map'] = func2(r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize)
  196. return mapp
  197. # 函数功能:对梯度进行归一化和截断
  198. # 函数参数:提取到的特征信息、截断阈值
  199. # 函数返回:处理后的特征信息
  200. def normalizeAndTruncate(mapp, alfa):
  201. # 初始赋值
  202. sizeX = mapp['sizeX']
  203. sizeY = mapp['sizeY']
  204. p = NUM_SECTOR
  205. xp = NUM_SECTOR * 3
  206. pp = NUM_SECTOR * 12
  207. idx = np.arange(0, sizeX * sizeY * mapp['numFeatures'], mapp['numFeatures']).reshape((sizeX * sizeY, 1)) + np.arange(p)
  208. partOfNorm = np.sum(mapp['map'][idx] ** 2, axis=1)
  209. sizeX, sizeY = sizeX - 2, sizeY - 2
  210. newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp)
  211. # 截断
  212. newData[newData > alfa] = alfa
  213. mapp['numFeatures'] = pp
  214. mapp['sizeX'] = sizeX
  215. mapp['sizeY'] = sizeY
  216. mapp['map'] = newData
  217. return mapp
  218. # 函数功能:对特征信息进行PCA降维
  219. # 函数参数:归一化和截断后的特征信息
  220. # 函数返回:PCA降维处理后的特征信息
  221. def PCAFeatureMaps(mapp):
  222. sizeX = mapp['sizeX']
  223. sizeY = mapp['sizeY']
  224. p = mapp['numFeatures']
  225. pp = NUM_SECTOR * 3 + 4
  226. yp = 4
  227. xp = NUM_SECTOR
  228. nx = 1.0 / np.sqrt(xp * 2)
  229. ny = 1.0 / np.sqrt(yp)
  230. newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny)
  231. mapp['numFeatures'] = pp
  232. mapp['map'] = newData
  233. return mapp

kcftracker.py

import numpy as np
import cv2
import fhog


# --------------------------------
# 定义FFTtools(快速傅里叶变换工具)函数
# --------------------------------

# 参数:图片
# 作用:返回操作后的图片
def fftd(img, backwards=False):
    # 图像的shape可能是 (m,n), (m,n,1) 或者 (m,n,2)
    # 在测试中, numpy 和 scipy 提供的 fft 比 cv2.dft 慢
    # cv2.dft():傅里叶变换函数
    # 返回float32型的图像
    return cv2.dft(np.float32(img), flags=((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT))
    # 如果backwards=True,对图像进行了反向的一维或者二维的转换 或 矩阵的元素数量除以它,产生缩放效果
    # 如果backwards=False,对图片执行正向转换


# 参数:图片
# 作用:返回图片像素的实部
def real(img):
    return img[:, :, 0]


# 参数:图片
# 作用:返回图片像素的虚部
def imag(img):
    return img[:, :, 1]


# 参数:两张图片
# 作用:进行两张图片像素值(实数+复数)的乘操作,返回操作后的图片
def complexMultiplication(a, b):
    # res为一个像素值全为0,与a图片(a图片与b图片类型相同)类型相同的图片
    res = np.zeros(a.shape, a.dtype)
    # res像素值的实部
    res[:, :, 0] = a[:, :, 0] * b[:, :, 0] - a[:, :, 1] * b[:, :, 1]
    # res像素值的虚部
    res[:, :, 1] = a[:, :, 0] * b[:, :, 1] + a[:, :, 1] * b[:, :, 0]
    return res


# 参数:两张图片
# 作用:进行两张图片像素值(实数+复数)的除操作,返回操作后的图片
def complexDivision(a, b):
    # res为一个像素值全为0,与a图片(a图片与b图片类型相同)类型相同的图片
    res = np.zeros(a.shape, a.dtype)
    # divisor为  b像素值实部的平方+虚部的平方
    divisor = 1. / (b[:, :, 0] ** 2 + b[:, :, 1] ** 2)
    # res像素值的实部
    res[:, :, 0] = (a[:, :, 0] * b[:, :, 0] + a[:, :, 1] * b[:, :, 1]) * divisor
    # res像素值的虚部
    res[:, :, 1] = (a[:, :, 1] * b[:, :, 0] - a[:, :, 0] * b[:, :, 1]) * divisor
    return res


# 参数:图片
# 作用:将图像中的低频部分移动到图像的中心后,返回图像
def rearrange(img):
    # 断言图片的维度为2,即灰度图,否则抛出异常
    assert (img.ndim == 2)
    # img_为一个像素值全为0,与img类型相同的图片
    img_ = np.zeros(img.shape, img.dtype)
    # 将图像中的低频部分移动到图像的中心
    xh, yh = img.shape[1] // 2, img.shape[0] // 2
    img_[0:yh, 0:xh], img_[yh:img.shape[0], xh:img.shape[1]] = img[yh:img.shape[0], xh:img.shape[1]], img[0:yh, 0:xh]
    img_[0:yh, xh:img.shape[1]], img_[yh:img.shape[0], 0:xh] = img[yh:img.shape[0], 0:xh], img[0:yh, xh:img.shape[1]]
    return img_


# --------------------------------
# 定义RECTtools(矩形工具)函数
# --------------------------------

# 作用:返回rect的右边界
def x2(rect):
    return rect[0] + rect[2]


# 作用:返回rect的下边界
def y2(rect):
    return rect[1] + rect[3]


# 参数:rect,limit
# 把rect限制在limit范围内                      # 也许就是KCF跟踪窗窗口大小不变的原因
def limit(rect, limit):
    # rect右边界超出limit左边界
    if rect[0] + rect[2] > limit[0] + limit[2]:
        rect[2] = limit[0] + limit[2] - rect[0]
    # rect下边界超出limit下边界
    if rect[1] + rect[3] > limit[1] + limit[3]:
        rect[3] = limit[1] + limit[3] - rect[1]
    # rect左边界超出limit左边界
    if rect[0] < limit[0]:
        rect[2] -= (limit[0] - rect[0])
        rect[0] = limit[0]
    # rect上边界超出limit上边界
    if rect[1] < limit[1]:
        rect[3] -= (limit[1] - rect[1])
        rect[1] = limit[1]
    # rect的宽度<0,则置为0
    if rect[2] < 0:
        rect[2] = 0
    # rect的高度<0,则置为0
    if rect[3] < 0:
        rect[3] = 0
    return rect


# 参数:original:上一帧中的跟踪区域;limit:跟踪区域限制大小
# 作用:计算超出边界的大小,以数组(图片)形式返回
def getBorder(original, limited):
    res = [0, 0, 0, 0]
    res[0] = limited[0] - original[0]
    res[1] = limited[1] - original[1]
    res[2] = x2(original) - x2(limited)
    res[3] = y2(original) - y2(limited)
    # 断言数组中的所有值都大于等于0
    assert (np.all(np.array(res) >= 0))
    return res


# 参数:img:输入图像,当前帧;window:
# 作用:获取跟踪框的图像数组
def subwindow(img, window, borderType=cv2.BORDER_CONSTANT):
    # cutWindow为window的复制
    cutWindow = [x for x in window]
    # 把cutWindow限制在[0, 0, img的宽,img的高内]
    limit(cutWindow, [0, 0, img.shape[1], img.shape[0]])  # modify cutWindow
    # 断言cutWindow的宽高都>0,否则抛出异常
    assert (cutWindow[2] > 0 and cutWindow[3] > 0)
    # border为window超出cutWindow的区域
    border = getBorder(window, cutWindow)
    # res为img中的cutWindow范围图片
    res = img[cutWindow[1]:cutWindow[1] + cutWindow[3], cutWindow[0]:cutWindow[0] + cutWindow[2]]

    # 如果window与cutWindow的不完全重合
    if border != [0, 0, 0, 0]:
        # res为之前的res进行填充
        res = cv2.copyMakeBorder(res, border[1], border[3], border[0], border[2], borderType)
    return res


# KCF tracker
class KCFTracker:
    def __init__(self, hog=False, fixed_window=True, multiscale=False):
        # 正则化
        self.lambdar = 0.0001
        # 在上一帧检测到的目标周围扩大2.5倍取图像
        self.padding = 2.5
        # 高斯目标的带宽
        self.output_sigma_factor = 0.125

        if hog:  # HOG特征
            # VOT
            self.interp_factor = 0.012  # 自适应线性插值因子
            self.sigma = 0.6  # 高斯核带宽
            # TPAMI   #interp_factor = 0.02   #sigma = 0.5
            self.cell_size = 4  # HOG的cell大小
            self._hogfeatures = True
        else:  # 原始灰度图像 # aka CSK tracker
            self.interp_factor = 0.075
            self.sigma = 0.2
            self.cell_size = 1
            self._hogfeatures = False

        if multiscale:
            self.template_size = 96  # 模板大小
            self.scale_step = 1.05  # 多尺度估计的尺度步长
            self.scale_weight = 0.96  # 降低其他scale的权重检测分数,以增加稳定性
        elif fixed_window:
            self.template_size = 96
            self.scale_step = 1
        else:
            self.template_size = 1
            self.scale_step = 1

        self._tmpl_sz = [0, 0]
        # cv::Size, [width,height]  #[int,int]
        self._roi = [0., 0., 0., 0.]
        # cv::Rect2f, [x,y,width,height]  #[float,float,float,float]
        self.size_patch = [0, 0, 0]
        # [int,int,int]
        self._scale = 1.
        # float
        self._alphaf = None
        # numpy.ndarray    (size_patch[0], size_patch[1], 2)
        self._prob = None
        # numpy.ndarray    (size_patch[0], size_patch[1], 2)
        self._tmpl = None
        # numpy.ndarray    raw: (size_patch[0], size_patch[1])   hog: (size_patch[2], size_patch[0]*size_patch[1])
        self.hann = None
        # numpy.ndarray    raw: (size_patch[0], size_patch[1])   hog: (size_patch[2], size_patch[0]*size_patch[1])

    # 计算一维的亚像素峰值
    # 使用幅值作差来定位峰值的位置,返回的是需要改变的偏移量大小
    # 参数:float类型
    def subPixelPeak(self, left, center, right):
        divisor = 2 * center - right - left  # float
        return 0 if abs(divisor) < 1e-3 else 0.5 * (right - left) / divisor

    # 初始化hanning窗,只执行一次————在第一帧中
    def createHanningMats(self):
        hann2t, hann1t = np.ogrid[0:self.size_patch[0], 0:self.size_patch[1]]

        hann1t = 0.5 * (1 - np.cos(2 * np.pi * hann1t / (self.size_patch[1] - 1)))
        hann2t = 0.5 * (1 - np.cos(2 * np.pi * hann2t / (self.size_patch[0] - 1)))
        hann2d = hann2t * hann1t

        if self._hogfeatures:
            hann1d = hann2d.reshape(self.size_patch[0] * self.size_patch[1])
            self.hann = np.zeros((self.size_patch[2], 1), np.float32) + hann1d
        else:
            self.hann = hann2d
        self.hann = self.hann.astype(np.float32)

    # 创建高斯峰函数,函数只在第一帧的时候执行
    def createGaussianPeak(self, sizey, sizex):
        syh, sxh = sizey / 2, sizex / 2
        output_sigma = np.sqrt(sizex * sizey) / self.padding * self.output_sigma_factor
        mult = -0.5 / (output_sigma * output_sigma)
        y, x = np.ogrid[0:sizey, 0:sizex]
        y, x = (y - syh) ** 2, (x - sxh) ** 2
        res = np.exp(mult * (y + x))
        return fftd(res)

    # 对输入图像X和Y之间的所有相对位移使用带宽SIGMA计算高斯核
    # 必须都是MxN大小。二者必须都是周期的(即,通过一个cos窗口进行预处理)
    def gaussianCorrelation(self, x1, x2):
        if self._hogfeatures:
            c = np.zeros((self.size_patch[0], self.size_patch[1]), np.float32)
            for i in range(self.size_patch[2]):
                x1aux = x1[i, :].reshape((self.size_patch[0], self.size_patch[1]))
                x2aux = x2[i, :].reshape((self.size_patch[0], self.size_patch[1]))
                caux = cv2.mulSpectrums(fftd(x1aux), fftd(x2aux), 0, conjB=True)
                caux = real(fftd(caux, True))
                c += caux
            c = rearrange(c)
        else:
            c = cv2.mulSpectrums(fftd(x1), fftd(x2), 0, conjB=True)  # 'conjB=' is necessary!
            c = fftd(c, True)
            c = real(c)
            c = rearrange(c)

        if x1.ndim == 3 and x2.ndim == 3:
            d = (np.sum(x1[:, :, 0] * x1[:, :, 0]) + np.sum(x2[:, :, 0] * x2[:, :, 0]) - 2.0 * c) / (
                    self.size_patch[0] * self.size_patch[1] * self.size_patch[2])
        elif x1.ndim == 2 and x2.ndim == 2:
            d = (np.sum(x1 * x1) + np.sum(x2 * x2) - 2.0 * c) / (
                    self.size_patch[0] * self.size_patch[1] * self.size_patch[2])

        d = d * (d >= 0)
        d = np.exp(-d / (self.sigma * self.sigma))

        return d

    # 从图像得到子窗口,通过赋值填充并检测特征
    def getFeatures(self, image, inithann, scale_adjust=1.0):
        extracted_roi = [0, 0, 0, 0]  # [int,int,int,int]
        cx = self._roi[0] + self._roi[2] / 2  # float
        cy = self._roi[1] + self._roi[3] / 2  # float

        if inithann:
            padded_w = self._roi[2] * self.padding
            padded_h = self._roi[3] * self.padding

            if self.template_size > 1:
                if padded_w >= padded_h:
                    self._scale = padded_w / float(self.template_size)
                else:
                    self._scale = padded_h / float(self.template_size)
                self._tmpl_sz[0] = int(padded_w / self._scale)
                self._tmpl_sz[1] = int(padded_h / self._scale)
            else:
                self._tmpl_sz[0] = int(padded_w)
                self._tmpl_sz[1] = int(padded_h)
                self._scale = 1.

            if self._hogfeatures:
                self._tmpl_sz[0] = int(self._tmpl_sz[0]) // (
                        2 * self.cell_size) * 2 * self.cell_size + 2 * self.cell_size
                self._tmpl_sz[1] = int(self._tmpl_sz[1]) // (
                        2 * self.cell_size) * 2 * self.cell_size + 2 * self.cell_size
            else:
                self._tmpl_sz[0] = int(self._tmpl_sz[0]) // 2 * 2
                self._tmpl_sz[1] = int(self._tmpl_sz[1]) // 2 * 2

        extracted_roi[2] = int(scale_adjust * self._scale * self._tmpl_sz[0])
        extracted_roi[3] = int(scale_adjust * self._scale * self._tmpl_sz[1])
        extracted_roi[0] = int(cx - extracted_roi[2] / 2)
        extracted_roi[1] = int(cy - extracted_roi[3] / 2)

        z = subwindow(image, extracted_roi, cv2.BORDER_REPLICATE)
        if z.shape[1] != self._tmpl_sz[0] or z.shape[0] != self._tmpl_sz[1]:
            z = cv2.resize(z, tuple(self._tmpl_sz))

        if self._hogfeatures:
            mapp = {'sizeX': 0, 'sizeY': 0, 'numFeatures': 0, 'map': 0}
            mapp = fhog.getFeatureMaps(z, self.cell_size, mapp)
            mapp = fhog.normalizeAndTruncate(mapp, 0.2)
            mapp = fhog.PCAFeatureMaps(mapp)
            self.size_patch = list(map(int, [mapp['sizeY'], mapp['sizeX'], mapp['numFeatures']]))
            FeaturesMap = mapp['map'].reshape((self.size_patch[0] * self.size_patch[1],
                                               self.size_patch[2])).T  # (size_patch[2], size_patch[0]*size_patch[1])
        else:
            if z.ndim == 3 and z.shape[2] == 3:
                FeaturesMap = cv2.cvtColor(z, cv2.COLOR_BGR2GRAY)
                # z:(size_patch[0], size_patch[1], 3)  FeaturesMap:(size_patch[0], size_patch[1])   #np.int8  #0~255
            elif z.ndim == 2:
                FeaturesMap = z  # (size_patch[0], size_patch[1]) #np.int8  #0~255
            FeaturesMap = FeaturesMap.astype(np.float32) / 255.0 - 0.5
            self.size_patch = [z.shape[0], z.shape[1], 1]

        if inithann:
            self.createHanningMats()  # createHanningMats need size_patch

        FeaturesMap = self.hann * FeaturesMap
        return FeaturesMap

    # 在当前帧跟踪物体
    # z为前一帧样本
    # x为当前帧图像
    def detect(self, z, x):
        # 对输入图像X和Y之间的所有相对位移使用带宽SIGMA计算高斯核
        k = self.gaussianCorrelation(x, z)
        res = real(fftd(complexMultiplication(self._alphaf, fftd(k)), True))

        # 获得res中最大值和对应的位置?
        _, pv, _, pi = cv2.minMaxLoc(res)  # pv:float  pi:tuple of int
        # p为最大响应处的坐标
        p = [float(pi[0]), float(pi[1])]  # cv::Point2f, [x,y]  #[float,float]

        if 0 < pi[0] < res.shape[1] - 1:
            p[0] += self.subPixelPeak(res[pi[1], pi[0] - 1], pv, res[pi[1], pi[0] + 1])
        if 0 < pi[1] < res.shape[0] - 1:
            p[1] += self.subPixelPeak(res[pi[1] - 1, pi[0]], pv, res[pi[1] + 1, pi[0]])

        p[0] -= res.shape[1] / 2.
        p[1] -= res.shape[0] / 2.

        print('坐标:')
        print(p)
        print('响应:')
        print(pv)
        # 返回坐标p和最大响应值
        return p, pv

    # 使用图像进行训练——模板更新
    # x为当前帧图像
    def train_positive(self, x, train_interp_factor):
        # 对输入图像X和X之间的所有相对位移使用带宽SIGMA计算高斯核
        k = self.gaussianCorrelation(x, x)
        alphaf = complexDivision(self._prob, fftd(k) + self.lambdar)

        self._tmpl = (1 - train_interp_factor) * self._tmpl + train_interp_factor * x
        self._alphaf = (1 - train_interp_factor) * self._alphaf + train_interp_factor * alphaf

    def train_negative(self, x, train_interp_factor):
        # 对输入图像X和X之间的所有相对位移使用带宽SIGMA计算高斯核
        k = self.gaussianCorrelation(x, x)
        alphaf = complexDivision(self._prob, fftd(k) + self.lambdar)

        self._alphaf = (1 - train_interp_factor) * self._alphaf - train_interp_factor * alphaf

    # 对跟踪器进行初始化
    def init(self, roi, image):
        self._roi = list(map(float, roi))
        assert (roi[2] > 0 and roi[3] > 0)
        self._tmpl = self.getFeatures(image, 1)
        self._prob = self.createGaussianPeak(self.size_patch[0], self.size_patch[1])
        self._alphaf = np.zeros((self.size_patch[0], self.size_patch[1], 2), np.float32)
        # 初始帧操作后,self._tmpl和self._alphaf没有改变
        self.train_positive(self._tmpl, 1.0)

    def update(self, image):
        # 对roi区域进行修正
        if self._roi[0] + self._roi[2] <= 0:
            self._roi[0] = -self._roi[2] + 1
        if self._roi[1] + self._roi[3] <= 0:
            self._roi[1] = -self._roi[2] + 1
        if self._roi[0] >= image.shape[1] - 1:
            self._roi[0] = image.shape[1] - 2
        if self._roi[1] >= image.shape[0] - 1:
            self._roi[1] = image.shape[0] - 2

        # 求取roi区域中心坐标
        cx = self._roi[0] + self._roi[2] / 2.
        cy = self._roi[1] + self._roi[3] / 2.

        # loc:返回坐标
        # peak_value:最大响应值
        loc, peak_value = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0))

        if (self.scale_step != 1):
            # Test at a smaller _scale
            new_loc1, new_peak_value1 = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0 / self.scale_step))
            # Test at a bigger _scale
            new_loc2, new_peak_value2 = self.detect(self._tmpl, self.getFeatures(image, 0, self.scale_step))

            if (self.scale_weight * new_peak_value1 > peak_value and new_peak_value1 > new_peak_value2):
                loc = new_loc1
                peak_value = new_peak_value1
                self._scale /= self.scale_step
                self._roi[2] /= self.scale_step
                self._roi[3] /= self.scale_step
            elif (self.scale_weight * new_peak_value2 > peak_value):
                loc = new_loc2
                peak_value = new_peak_value2
                self._scale *= self.scale_step
                self._roi[2] *= self.scale_step
                self._roi[3] *= self.scale_step

        self._roi[0] = cx - self._roi[2] / 2.0 + loc[0] * self.cell_size * self._scale
        self._roi[1] = cy - self._roi[3] / 2.0 + loc[1] * self.cell_size * self._scale

        if (self._roi[0] >= image.shape[1] - 1):  self._roi[0] = image.shape[1] - 1
        if (self._roi[1] >= image.shape[0] - 1):  self._roi[1] = image.shape[0] - 1
        if (self._roi[0] + self._roi[2] <= 0):  self._roi[0] = -self._roi[2] + 2
        if (self._roi[1] + self._roi[3] <= 0):  self._roi[1] = -self._roi[3] + 2
        assert (self._roi[2] > 0 and self._roi[3] > 0)

        x = self.getFeatures(image, 0, 1.0)
        self.train(x, self.interp_factor)

        return self._roi

run.py

import sys
from time import time
import cv2
import kcftracker

# ------------- 表示跟踪器的状态 ------------------ #
# 是否正在选择跟踪物体
selectingObject = False
# 跟踪器是否进行了初始化
initTracking = False
# 是否正在跟踪
onTracking = False

# -------------- 跟踪物体框的参数 ------------------- #
# ix,iy:跟踪框初始化的左上角坐标
# cx,cy:跟踪框右下角的最终坐标
ix, iy, cx, cy = -1, -1, -1, -1
# w,h:跟踪框的宽度和高度
w, h = 0, 0

# 间隔(窗口等待时间)
interval = 1
# 计算帧率时用
duration = 0.01


# 鼠标响应/回调函数——表示鼠标的操作
# 设置五个参数,但其实仅三个有用是后续需要使用到参数的个数
# -----------------------------------------------------------------------------------------------------
def draw_boundingBox(event, x, y, flags, param):
    # 全局变量
    global selectingObject, initTracking, onTracking, ix, iy, cx, cy, w, h

    # 如果按下鼠标左键
    if event == cv2.EVENT_LBUTTONDOWN:
        # 正在选择跟踪物体,还没开始跟踪
        selectingObject = True
        onTracking = False
        # ix,iy:跟踪框左上角的坐标;  x,y:鼠标在图片上的坐标
        ix, iy = x, y
        # cx,cy:跟踪框右下角的坐标
        cx, cy = x, y
    # 如果鼠标移动(框选范围)
    elif event == cv2.EVENT_MOUSEMOVE:
        # 不断更新跟踪框右下角的坐标
        cx, cy = x, y
    # 如果放开鼠标左键
    elif event == cv2.EVENT_LBUTTONUP:
        # 物体选择完成
        selectingObject = False
        # 放开鼠标左键时,如果距离(ix,iy)超过了10
        if abs(x - ix) > 10 and abs(y - iy) > 10:
            # 计算跟踪框的宽高
            w, h = abs(x - ix), abs(y - iy)
            # 设定跟踪框左上角的坐标
            ix, iy = min(x, ix), min(y, iy)
            # 初始化跟踪完成
            initTracking = True
        else:
            # 选择的区域太小,不设定跟踪
            onTracking = False
    # 如果按下鼠标右键
    elif event == cv2.EVENT_RBUTTONDOWN:
        # 没有正在跟踪
        onTracking = False
        # 如果跟踪框有宽度,即跟踪框之前已经初始化过
        if w > 0:
            # 重新设定左上角坐标
            ix, iy = x - w / 2, y - h / 2
            # 重新初始化跟踪器
            initTracking = True


# 鼠标事件总结:
# 只有点击鼠标左键时,selectingObject=True,onTracking = False
# 放开鼠标左键时,selectingObject = False,距离合适则 initTracking = True,否则onTracking = False
# 点击鼠标右键时,取消跟踪 onTracking = False,重新初始化跟踪器 initTracking = True
# -------------------------------------------------------------------------------------------------------

if __name__ == '__main__':

    if len(sys.argv) == 1:
        cap = cv2.VideoCapture("s.mp4")
    elif len(sys.argv) == 2:
        if sys.argv[1].isdigit():
            cap = cv2.VideoCapture(int(sys.argv[1]))
        else:
            cap = cv2.VideoCapture(sys.argv[1])
            interval = 30
    else:
        assert 0, "too many arguments"

    tracker = kcftracker.KCFTracker(True, False, True)  # 使用HOG特征, 不使用固定的窗口, 使用多尺度
    # 如果使用了HOG特征,在画出跟踪框之前会有一个短暂的停顿,因为使用了numba的jit加速

    cv2.namedWindow('tracking')
    cv2.setMouseCallback('tracking', draw_boundingBox)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # 如果选中了跟踪物体
        if selectingObject:
            # 画出跟踪框
            cv2.rectangle(frame, (ix, iy), (cx, cy), (0, 255, 255), 1)
        # 如果没选择跟踪物体,但已经初始化跟踪器,即点击鼠标右键
        elif initTracking:
            # 也画出跟踪框
            cv2.rectangle(frame, (ix, iy), (ix + w, iy + h), (0, 255, 255), 2)
            # 重新初始化跟踪器
            tracker.init([ix, iy, w, h], frame)
            # 更新状态
            initTracking = False
            onTracking = True
        # 如果没选择跟踪物体,但是正在跟踪,即点击鼠标左键后放开鼠标左键
        elif onTracking:
            # 开始时间
            t0 = time()
            # 不断更新跟踪框
            boundingBox = tracker.update(frame)
            # 结束时间
            t1 = time()
            # ----------------------------------------------------------------------------------------------------
            # 画出新的跟踪框
            boundingBox = list(map(int, boundingBox))
            cv2.rectangle(frame, (boundingBox[0], boundingBox[1]),
                          (boundingBox[0] + boundingBox[2], boundingBox[1] + boundingBox[3]), (0, 255, 255), 1)

            # ----------------------------------------------------------------------------------------------------
            # 计算帧率并显示和出来
            duration = 0.8 * duration + 0.2 * (t1 - t0)     # 这个计算方式不是很理解
            cv2.putText(frame, 'FPS: ' + str(1 / duration)[:4].strip('.'), (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (0, 0, 255), 2)
        # ---------------------------------------------------------------------
        # 展示每一帧并设置退出条件
        cv2.imshow('tracking', frame)
        c = cv2.waitKey(interval) & 0xFF
        if c == 27 or c == ord('q'):
            break
    # 释放摄像头和窗口资源
    cap.release()
    cv2.destroyAllWindows()