目标检测

目标检测是计算机视觉的核心任务之一，目标是在图像中定位并识别特定对象。本章节介绍 OpenCV 中常用的目标检测方法。

模板匹配

模板匹配是最简单的目标检测方法，在图像中滑动模板寻找最匹配的位置。

基本使用

import cv2
import numpy as np

# 读取原图和模板
image = cv2.imread('scene.jpg', cv2.IMREAD_COLOR)
template = cv2.imread('template.jpg', cv2.IMREAD_COLOR)

h, w = template.shape[:2]

# 模板匹配
result = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)

# 找到最佳匹配位置
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

# 绘制匹配结果
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

cv2.imshow('模板匹配', image)
cv2.waitKey(0)

匹配方法

OpenCV 提供了多种模板匹配方法：

import cv2

image = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)
template = cv2.imread('template.jpg', cv2.IMREAD_GRAYSCALE)

methods = [
    cv2.TM_CCOEFF,
    cv2.TM_CCOEFF_NORMED,
    cv2.TM_CCORR,
    cv2.TM_CCORR_NORMED,
    cv2.TM_SQDIFF,
    cv2.TM_SQDIFF_NORMED
]

for method in methods:
    result = cv2.matchTemplate(image, template, method)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
    
    # TM_SQDIFF 和 TM_SQDIFF_NORMED 使用最小值
    if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
        top_left = min_loc
    else:
        top_left = max_loc
    
    print(f"方法 {method}: 匹配值 {max_val if method not in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED] else min_val}")

常用方法说明：

方法	说明	特点
`TM_CCOEFF_NORMED`	归一化相关系数	结果在 -1 到 1 之间，推荐使用
`TM_CCORR_NORMED`	归一化相关	结果在 0 到 1 之间
`TM_SQDIFF_NORMED`	归一化平方差	结果越小越匹配

多目标检测

检测图像中的多个目标：

import cv2
import numpy as np

image = cv2.imread('scene.jpg', cv2.IMREAD_COLOR)
template = cv2.imread('template.jpg', cv2.IMREAD_COLOR)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

h, w = template.shape[:2]

# 模板匹配
result = cv2.matchTemplate(gray, gray_template, cv2.TM_CCOEFF_NORMED)

# 设置阈值
threshold = 0.8

# 找到所有匹配位置
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))  # 转换为 (x, y) 格式

# 非极大值抑制
rectangles = []
for loc in locations:
    rect = [int(loc[0]), int(loc[1]), w, h]
    rectangles.append(rect)
    rectangles.append(rect)  # 添加两次，因为 groupRectangles 需要至少 2 个

rectangles, weights = cv2.groupRectangles(rectangles, 1, 0.2)

# 绘制结果
for (x, y, w, h) in rectangles:
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('多目标检测', image)
cv2.waitKey(0)

模板匹配的局限性：

只能检测与模板大小相同的目标
对旋转和缩放敏感
计算量较大

Haar 级联分类器

Haar 级联分类器是一种基于机器学习的目标检测方法，常用于人脸检测。

人脸检测

import cv2

# 加载预训练的人脸分类器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# 读取图像
image = cv2.imread('people.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 检测人脸
faces = face_cascade.detectMultiScale(
    gray,
    scaleFactor=1.1,    # 图像缩放比例
    minNeighbors=5,     # 候选框需要的邻居数
    minSize=(30, 30)    # 最小目标尺寸
)

# 绘制人脸框
for (x, y, w, h) in faces:
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('人脸检测', image)
cv2.waitKey(0)

人脸和眼睛检测

import cv2

# 加载分类器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')

image = cv2.imread('people.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 检测人脸
faces = face_cascade.detectMultiScale(gray, 1.3, 5)

for (x, y, w, h) in faces:
    # 绘制人脸框
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    # 在人脸区域检测眼睛
    roi_gray = gray[y:y + h, x:x + w]
    roi_color = image[y:y + h, x:x + w]
    
    eyes = eye_cascade.detectMultiScale(roi_gray)
    for (ex, ey, ew, eh) in eyes:
        cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (255, 0, 0), 2)

cv2.imshow('人脸和眼睛检测', image)
cv2.waitKey(0)

实时人脸检测

import cv2

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    cv2.imshow('实时人脸检测', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

可用的预训练分类器

OpenCV 提供了多种预训练的 Haar 分类器：

分类器文件	用途
`haarcascade_frontalface_default.xml`	正面人脸检测
`haarcascade_frontalface_alt.xml`	正面人脸检测（备选）
`haarcascade_eye.xml`	眼睛检测
`haarcascade_smile.xml`	微笑检测
`haarcascade_fullbody.xml`	全身检测
`haarcascade_upperbody.xml`	上半身检测
`haarcascade_profileface.xml`	侧脸检测

HOG 行人检测

HOG（Histogram of Oriented Gradients）是一种基于梯度方向直方图的特征描述符，常用于行人检测。

import cv2

# 创建 HOG 描述符和 SVM 分类器
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

# 读取图像
image = cv2.imread('pedestrians.jpg')

# 检测行人
boxes, weights = hog.detectMultiScale(
    image,
    winStride=(8, 8),     # 滑动窗口步长
    padding=(8, 8),       # 填充
    scale=1.05            # 缩放因子
)

# 绘制检测框
for (x, y, w, h) in boxes:
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('行人检测', image)
cv2.waitKey(0)

非极大值抑制

使用非极大值抑制（NMS）去除重叠的检测框：

import cv2
import numpy as np

def nms(boxes, scores, threshold=0.5):
    """非极大值抑制"""
    if len(boxes) == 0:
        return []
    
    boxes = np.array(boxes, dtype=np.float32)
    scores = np.array(scores, dtype=np.float32)
    
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 0] + boxes[:, 2]
    y2 = boxes[:, 1] + boxes[:, 3]
    
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]
    
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        
        w = np.maximum(0, xx2 - xx1)
        h = np.maximum(0, yy2 - yy1)
        inter = w * h
        
        iou = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(iou <= threshold)[0]
        order = order[inds + 1]
    
    return keep

# 使用示例
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

image = cv2.imread('pedestrians.jpg')

boxes, weights = hog.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)

# 应用 NMS
keep = nms(boxes, weights.flatten())

for i in keep:
    x, y, w, h = boxes[i]
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('行人检测（NMS）', image)
cv2.waitKey(0)

轮廓检测

轮廓检测可以用于检测图像中的物体边界。

基本轮廓检测

import cv2
import numpy as np

image = cv2.imread('objects.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 二值化
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# 查找轮廓
contours, hierarchy = cv2.findContours(
    binary, 
    cv2.RETR_EXTERNAL,      # 只检测外轮廓
    cv2.CHAIN_APPROX_SIMPLE  # 压缩轮廓
)

# 绘制轮廓
cv2.drawContours(image, contours, -1, (0, 255, 0), 2)

cv2.imshow('轮廓检测', image)
cv2.waitKey(0)

轮廓特征

import cv2

image = cv2.imread('objects.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:
    # 计算轮廓面积
    area = cv2.contourArea(contour)
    
    # 过滤小轮廓
    if area < 500:
        continue
    
    # 计算轮廓周长
    perimeter = cv2.arcLength(contour, True)
    
    # 获取边界矩形
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    # 获取最小外接矩形
    rect = cv2.minAreaRect(contour)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    cv2.drawContours(image, [box], 0, (0, 0, 255), 2)
    
    # 获取最小外接圆
    (cx, cy), radius = cv2.minEnclosingCircle(contour)
    cv2.circle(image, (int(cx), int(cy)), int(radius), (255, 0, 0), 2)
    
    print(f"面积: {area:.2f}, 周长: {perimeter:.2f}")

cv2.imshow('轮廓特征', image)
cv2.waitKey(0)

轮廓近似

import cv2

image = cv2.imread('shapes.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:
    # 轮廓近似
    epsilon = 0.02 * cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    
    # 根据顶点数判断形状
    vertices = len(approx)
    
    if vertices == 3:
        shape = "三角形"
    elif vertices == 4:
        shape = "四边形"
    elif vertices == 5:
        shape = "五边形"
    else:
        shape = "圆形"
    
    # 绘制近似轮廓
    cv2.drawContours(image, [approx], 0, (0, 255, 0), 2)
    
    # 标注形状名称
    M = cv2.moments(contour)
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])
        cv2.putText(image, shape, (cx - 20, cy), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

cv2.imshow('形状识别', image)
cv2.waitKey(0)

颜色检测

基于颜色进行目标检测：

import cv2
import numpy as np

image = cv2.imread('objects.jpg')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

# 定义红色的 HSV 范围
lower_red1 = np.array([0, 100, 100])
upper_red1 = np.array([10, 255, 255])
lower_red2 = np.array([160, 100, 100])
upper_red2 = np.array([180, 255, 255])

# 创建掩码
mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
mask = cv2.bitwise_or(mask1, mask2)

# 形态学操作
kernel = np.ones((5, 5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

# 查找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:
    if cv2.contourArea(contour) > 500:
        x, y, w, h = cv2.boundingRect(contour)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(image, 'Red Object', (x, y - 10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

cv2.imshow('颜色检测', image)
cv2.waitKey(0)

常用颜色的 HSV 范围：

颜色	下界 (H, S, V)	上界 (H, S, V)
红色	(0, 100, 100) / (160, 100, 100)	(10, 255, 255) / (180, 255, 255)
绿色	(35, 100, 100)	(85, 255, 255)
蓝色	(100, 100, 100)	(130, 255, 255)
黄色	(20, 100, 100)	(35, 255, 255)

下一步

掌握了目标检测后，下一章节我们将学习相机标定与 3D 重建，了解如何进行相机标定和立体视觉应用。

模板匹配​

基本使用​

匹配方法​

多目标检测​

Haar 级联分类器​

人脸检测​

人脸和眼睛检测​

实时人脸检测​

可用的预训练分类器​

HOG 行人检测​

非极大值抑制​

轮廓检测​

基本轮廓检测​

轮廓特征​

轮廓近似​

颜色检测​

下一步​