State-of-the-art and curated Computer Vision algorithms for AI.
Face Detection

# !/usr/bin python
# -*- encoding: utf-8 -*-
# @author: xhades
# datetime: 2022-01-07
from PIL import Image
import kornia.geometry as K
import argparse
import cv2
import numpy as np
import torch
import kornia as K
from kornia.contrib import FaceDetector, FaceDetectorResult, FaceKeypoint
def draw_keypoint(img: np.ndarray, det: FaceDetectorResult, kpt_type: FaceKeypoint) -> np.ndarray:
    kpt = det.get_keypoint(kpt_type).int().tolist()
    return cv2.circle(img, kpt, 2, (255, 0, 0), 2)
def scale_image(img: np.ndarray, size: int) -> np.ndarray:
    h, w = img.shape[:2]
    scale = 1. * size / w
    return cv2.resize(img, (int(w * scale), int(h * scale)))
def apply_blur_face(img: torch.Tensor, img_vis: np.ndarray, det: FaceDetectorResult):
    # crop the face
    x1, y1 = det.xmin.int(), det.ymin.int()
    x2, y2 = det.xmax.int(), det.ymax.int()
    roi = img[..., y1:y2, x1:x2]
    # apply blurring and put back to the visualisation image
    roi = K.filters.gaussian_blur2d(roi, (21, 21), (35., 35.))
    roi = K.color.rgb_to_bgr(roi)
    img_vis[y1:y2, x1:x2] = K.tensor_to_image(roi)
def my_app(args):
    # select the device
    device = torch.device('cpu')
    if args.cuda and torch.cuda.is_available():
        device = torch.device('cuda:0')
    # load the image and scale
    img_raw = cv2.imread(args.image_file, cv2.IMREAD_COLOR)
    img_raw = scale_image(img_raw, args.image_size)
    # preprocess
    img = K.image_to_tensor(img_raw, keepdim=False).to(device)
    img = K.color.bgr_to_rgb(img.float())
    # create the detector and find the faces !
    face_detection = FaceDetector().to(device)
    with torch.no_grad():
        dets = face_detection(img)
    dets = [FaceDetectorResult(o) for o in dets]
    # show image
    img_vis = img_raw.copy()
    for b in dets:
        if b.score < args.vis_threshold:
            continue
        # draw face bounding box
        img_vis = cv2.rectangle(
            img_vis, b.top_left.int().tolist(), b.bottom_right.int().tolist(), (0, 255, 0), 4)
        if args.blur_faces:
            apply_blur_face(img, img_vis, b)
        if args.vis_keypoints:
            # draw facial keypoints
            img_vis = draw_keypoint(img_vis, b, FaceKeypoint.EYE_LEFT)
            img_vis = draw_keypoint(img_vis, b, FaceKeypoint.EYE_RIGHT)
            img_vis = draw_keypoint(img_vis, b, FaceKeypoint.NOSE)
            img_vis = draw_keypoint(img_vis, b, FaceKeypoint.MOUTH_LEFT)
            img_vis = draw_keypoint(img_vis, b, FaceKeypoint.MOUTH_RIGHT)
            # draw the text score
            cx = int(b.xmin)
            cy = int(b.ymin + 12)
            img_vis = cv2.putText(
                img_vis, f"{b.score:.2f}", (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
    # save and show image
    cv2.imwrite(args.image_out, img_vis)
    cv2.namedWindow('face_detection', cv2.WINDOW_NORMAL)
    cv2.imshow('face_detection', img_vis)
    cv2.waitKey(0)
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Face and Landmark Detection')
    parser.add_argument('--image_file', required=True, type=str, help='the image file to be detected.')
    parser.add_argument('--image_out', required=True, type=str, help='the file path to write the output.')
    parser.add_argument('--image_size', default=320, type=int, help='the image size to process.')
    parser.add_argument('--vis_threshold', default=0.8, type=float, help='visualization_threshold')
    parser.add_argument('--vis_keypoints', dest='vis_keypoints', action='store_true')
    parser.add_argument('--cuda', dest='cuda', action='store_true')
    parser.add_argument('--blur_faces', dest='blur_faces', action='store_true')
    args = parser.parse_args()
    my_app(args)
计算机视觉

Step by Step

Face Detection

Data Augmentation