目标检测
目标检测是计算机视觉的核心任务之一,目标是在图像中定位并识别特定对象。本章节介绍 OpenCV 中常用的目标检测方法。
模板匹配
模板匹配是最简单的目标检测方法,在图像中滑动模板寻找最匹配的位置。
基本使用
import cv2
import numpy as np
# 读取原图和模板
image = cv2.imread('scene.jpg', cv2.IMREAD_COLOR)
template = cv2.imread('template.jpg', cv2.IMREAD_COLOR)
h, w = template.shape[:2]
# 模板匹配
result = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
# 找到最佳匹配位置
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# 绘制匹配结果
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)
cv2.imshow('模板匹配', image)
cv2.waitKey(0)
匹配方法
OpenCV 提供了多种模板匹配方法:
import cv2
image = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)
template = cv2.imread('template.jpg', cv2.IMREAD_GRAYSCALE)
methods = [
cv2.TM_CCOEFF,
cv2.TM_CCOEFF_NORMED,
cv2.TM_CCORR,
cv2.TM_CCORR_NORMED,
cv2.TM_SQDIFF,
cv2.TM_SQDIFF_NORMED
]
for method in methods:
result = cv2.matchTemplate(image, template, method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# TM_SQDIFF 和 TM_SQDIFF_NORMED 使用最小值
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
print(f"方法 {method}: 匹配值 {max_val if method not in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED] else min_val}")
常用方法说明:
| 方法 | 说明 | 特点 |
|---|---|---|
TM_CCOEFF_NORMED | 归一化相关系数 | 结果在 -1 到 1 之间,推荐使用 |
TM_CCORR_NORMED | 归一化相关 | 结果在 0 到 1 之间 |
TM_SQDIFF_NORMED | 归一化平方差 | 结果越小越匹配 |
多目标检测
检测图像中的多个目标:
import cv2
import numpy as np
image = cv2.imread('scene.jpg', cv2.IMREAD_COLOR)
template = cv2.imread('template.jpg', cv2.IMREAD_COLOR)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
h, w = template.shape[:2]
# 模板匹配
result = cv2.matchTemplate(gray, gray_template, cv2.TM_CCOEFF_NORMED)
# 设置阈值
threshold = 0.8
# 找到所有匹配位置
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1])) # 转换为 (x, y) 格式
# 非极大值抑制
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), w, h]
rectangles.append(rect)
rectangles.append(rect) # 添加两次,因为 groupRectangles 需要至少 2 个
rectangles, weights = cv2.groupRectangles(rectangles, 1, 0.2)
# 绘制结果
for (x, y, w, h) in rectangles:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('多目标检测', image)
cv2.waitKey(0)
模板匹配的局限性:
- 只能检测与模板大小相同的目标
- 对旋转和缩放敏感
- 计算量较大
Haar 级联分类器
Haar 级联分类器是一种基于机器学习的目标检测方法,常用于人脸检测。
人脸检测
import cv2
# 加载预训练的人脸分类器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# 读取图像
image = cv2.imread('people.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 检测人脸
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1, # 图像缩放比例
minNeighbors=5, # 候选框需要的邻居数
minSize=(30, 30) # 最小目标尺寸
)
# 绘制人脸框
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('人脸检测', image)
cv2.waitKey(0)
人脸和眼睛检测
import cv2
# 加载分类器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
image = cv2.imread('people.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 检测人脸
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
# 绘制人脸框
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# 在人脸区域检测眼睛
roi_gray = gray[y:y + h, x:x + w]
roi_color = image[y:y + h, x:x + w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (255, 0, 0), 2)
cv2.imshow('人脸和眼睛检测', image)
cv2.waitKey(0)
实时人脸检测
import cv2
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('实时人脸检测', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
可用的预训练分类器
OpenCV 提供了多种预训练的 Haar 分类器:
| 分类器文件 | 用途 |
|---|---|
haarcascade_frontalface_default.xml | 正面人脸检测 |
haarcascade_frontalface_alt.xml | 正面人脸检测(备选) |
haarcascade_eye.xml | 眼睛检测 |
haarcascade_smile.xml | 微笑检测 |
haarcascade_fullbody.xml | 全身检测 |
haarcascade_upperbody.xml | 上半身检测 |
haarcascade_profileface.xml | 侧脸检测 |
HOG 行人检测
HOG(Histogram of Oriented Gradients)是一种基于梯度方向直方图的特征描述符,常用于行人检测。
import cv2
# 创建 HOG 描述符和 SVM 分类器
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# 读取图像
image = cv2.imread('pedestrians.jpg')
# 检测行人
boxes, weights = hog.detectMultiScale(
image,
winStride=(8, 8), # 滑动窗口步长
padding=(8, 8), # 填充
scale=1.05 # 缩放因子
)
# 绘制检测框
for (x, y, w, h) in boxes:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('行人检测', image)
cv2.waitKey(0)
非极大值抑制
使用非极大值抑制(NMS)去除重叠的检测框:
import cv2
import numpy as np
def nms(boxes, scores, threshold=0.5):
"""非极大值抑制"""
if len(boxes) == 0:
return []
boxes = np.array(boxes, dtype=np.float32)
scores = np.array(scores, dtype=np.float32)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 0] + boxes[:, 2]
y2 = boxes[:, 1] + boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0, xx2 - xx1)
h = np.maximum(0, yy2 - yy1)
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(iou <= threshold)[0]
order = order[inds + 1]
return keep
# 使用示例
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
image = cv2.imread('pedestrians.jpg')
boxes, weights = hog.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
# 应用 NMS
keep = nms(boxes, weights.flatten())
for i in keep:
x, y, w, h = boxes[i]
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('行人检测(NMS)', image)
cv2.waitKey(0)
轮廓检测
轮廓检测可以用于检测图像中的物体边界。
基本轮廓检测
import cv2
import numpy as np
image = cv2.imread('objects.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 二值化
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 查找轮廓
contours, hierarchy = cv2.findContours(
binary,
cv2.RETR_EXTERNAL, # 只检测外轮廓
cv2.CHAIN_APPROX_SIMPLE # 压缩轮廓
)
# 绘制轮廓
cv2.drawContours(image, contours, -1, (0, 255, 0), 2)
cv2.imshow('轮廓检测', image)
cv2.waitKey(0)
轮廓特征
import cv2
image = cv2.imread('objects.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
# 计算轮廓面积
area = cv2.contourArea(contour)
# 过滤小轮廓
if area < 500:
continue
# 计算轮廓周长
perimeter = cv2.arcLength(contour, True)
# 获取边界矩形
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# 获取最小外接矩形
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image, [box], 0, (0, 0, 255), 2)
# 获取最小外接圆
(cx, cy), radius = cv2.minEnclosingCircle(contour)
cv2.circle(image, (int(cx), int(cy)), int(radius), (255, 0, 0), 2)
print(f"面积: {area:.2f}, 周长: {perimeter:.2f}")
cv2.imshow('轮廓特征', image)
cv2.waitKey(0)
轮廓近似
import cv2
image = cv2.imread('shapes.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
# 轮廓近似
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# 根据顶点数判断形状
vertices = len(approx)
if vertices == 3:
shape = "三角形"
elif vertices == 4:
shape = "四边形"
elif vertices == 5:
shape = "五边形"
else:
shape = "圆形"
# 绘制近似轮廓
cv2.drawContours(image, [approx], 0, (0, 255, 0), 2)
# 标注形状名称
M = cv2.moments(contour)
if M['m00'] != 0:
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
cv2.putText(image, shape, (cx - 20, cy),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.imshow('形状识别', image)
cv2.waitKey(0)
颜色检测
基于颜色进行目标检测:
import cv2
import numpy as np
image = cv2.imread('objects.jpg')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# 定义红色的 HSV 范围
lower_red1 = np.array([0, 100, 100])
upper_red1 = np.array([10, 255, 255])
lower_red2 = np.array([160, 100, 100])
upper_red2 = np.array([180, 255, 255])
# 创建掩码
mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
mask = cv2.bitwise_or(mask1, mask2)
# 形态学操作
kernel = np.ones((5, 5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
# 查找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
if cv2.contourArea(contour) > 500:
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(image, 'Red Object', (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
cv2.imshow('颜色检测', image)
cv2.waitKey(0)
常用颜色的 HSV 范围:
| 颜色 | 下界 (H, S, V) | 上界 (H, S, V) |
|---|---|---|
| 红色 | (0, 100, 100) / (160, 100, 100) | (10, 255, 255) / (180, 255, 255) |
| 绿色 | (35, 100, 100) | (85, 255, 255) |
| 蓝色 | (100, 100, 100) | (130, 255, 255) |
| 黄色 | (20, 100, 100) | (35, 255, 255) |
下一步
掌握了目标检测后,下一章节我们将学习相机标定与 3D 重建,了解如何进行相机标定和立体视觉应用。