跳到主要内容

深度学习模块

OpenCV 的 DNN 模块可以加载和运行预训练的深度学习模型,支持 TensorFlow、PyTorch、Caffe、ONNX 等主流框架。

DNN 模块概述

OpenCV DNN 模块的特点:

  • 轻量级推理引擎:不需要安装完整的深度学习框架
  • 跨平台支持:支持 CPU、CUDA、OpenCL 等后端
  • 多框架兼容:支持多种模型格式
  • 易于集成:与 OpenCV 图像处理无缝结合

支持的模型格式

框架模型格式配置文件
Caffe.caffemodel.prototxt
TensorFlow.pb.pbtxt
PyTorch/Torch.pt, .pth-
ONNX.onnx-
Darknet.weights.cfg

图像分类

加载模型

import cv2
import numpy as np

# 加载模型
model = cv2.dnn.readNetFromCaffe(
'deploy.prototxt', # 模型配置
'model.caffemodel' # 模型权重
)

# 或者加载 ONNX 模型
model = cv2.dnn.readNetFromONNX('model.onnx')

# 或者加载 TensorFlow 模型
model = cv2.dnn.readNetFromTensorflow('model.pb', 'model.pbtxt')

# 设置计算后端
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
model.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# 使用 CUDA 加速
# model.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# model.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

图像预处理

import cv2
import numpy as np

image = cv2.imread('image.jpg')

# 创建输入 blob
blob = cv2.dnn.blobFromImage(
image,
scalefactor=1.0, # 缩放因子
size=(224, 224), # 输入尺寸
mean=(104, 177, 123), # 均值减法(BGR)
swapRB=True, # 是否交换 R 和 B 通道
crop=False # 是否裁剪
)

# 批量处理
images = [cv2.imread(f'image{i}.jpg') for i in range(4)]
blob = cv2.dnn.blobFromImages(images, 1.0, (224, 224), (104, 177, 123))

执行推理

import cv2
import numpy as np

# 加载模型
net = cv2.dnn.readNetFromCaffe('deploy.prototxt', 'model.caffemodel')

# 读取图像
image = cv2.imread('image.jpg')

# 创建 blob
blob = cv2.dnn.blobFromImage(image, 1.0, (224, 224), (104, 177, 123))

# 设置输入
net.setInput(blob)

# 执行前向传播
output = net.forward()

# 获取预测结果
class_id = np.argmax(output)
confidence = output[0, class_id]

print(f"预测类别: {class_id}, 置信度: {confidence:.4f}")

使用 ImageNet 预训练模型

import cv2
import numpy as np

# 加载类别标签
with open('imagenet_classes.txt', 'r') as f:
classes = [line.strip() for line in f.readlines()]

# 加载模型(以 SqueezeNet 为例)
net = cv2.dnn.readNetFromCaffe(
'squeezenet_v1.1.prototxt',
'squeezenet_v1.1.caffemodel'
)

image = cv2.imread('image.jpg')
blob = cv2.dnn.blobFromImage(image, 1.0, (227, 227), (104, 177, 123))

net.setInput(blob)
output = net.forward()

# 获取前 5 个预测
top5 = np.argsort(output[0])[::-1][:5]

for i in top5:
print(f"{classes[i]}: {output[0][i]:.4f}")

目标检测

YOLO 目标检测

import cv2
import numpy as np

# 加载类别标签
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]

# 加载 YOLO 模型
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')

# 获取输出层名称
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# 读取图像
image = cv2.imread('image.jpg')
height, width = image.shape[:2]

# 创建 blob
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)

net.setInput(blob)

# 执行检测
outputs = net.forward(output_layers)

# 解析检测结果
boxes = []
confidences = []
class_ids = []

for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]

if confidence > 0.5:
# 计算边界框坐标
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)

x = center_x - w // 2
y = center_y - h // 2

boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)

# 非极大值抑制
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

# 绘制检测结果
colors = np.random.uniform(0, 255, size=(len(classes), 3))

for i in indices:
box = boxes[i]
x, y, w, h = box

label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
color = colors[class_ids[i]]

cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
cv2.putText(image, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

cv2.imshow('YOLO 检测', image)
cv2.waitKey(0)

SSD 目标检测

import cv2
import numpy as np

# 加载类别标签
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]

# 加载 SSD 模型
net = cv2.dnn.readNetFromCaffe(
'ssd_mobilenet_v1_coco.pbtxt',
'frozen_inference_graph.pb'
)

image = cv2.imread('image.jpg')
height, width = image.shape[:2]

# 创建 blob
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (127.5, 127.5, 127.5), swapRB=True)

net.setInput(blob)

# 执行检测
detections = net.forward()

# 解析检测结果
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]

if confidence > 0.5:
class_id = int(detections[0, 0, i, 1])

# 计算边界框坐标
box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
x1, y1, x2, y2 = box.astype(int)

label = f"{classes[class_id]}: {confidence:.2f}"

cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(image, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

cv2.imshow('SSD 检测', image)
cv2.waitKey(0)

语义分割

使用 DeepLab 模型

import cv2
import numpy as np

# 加载 DeepLab 模型
net = cv2.dnn.readNetFromTensorflow(
'frozen_inference_graph.pb',
'graph.pbtxt'
)

image = cv2.imread('image.jpg')
height, width = image.shape[:2]

# 创建 blob
blob = cv2.dnn.blobFromImage(image, 1.0, (513, 513), (127.5, 127.5, 127.5), swapRB=True)

net.setInput(blob)

# 执行分割
output = net.forward()

# 获取分割结果
seg_map = np.argmax(output[0], axis=0)
seg_map = cv2.resize(seg_map.astype(np.uint8), (width, height))

# 创建彩色分割图
colors = np.array([
[0, 0, 0], # 背景
[128, 0, 0], # 飞机
[0, 128, 0], # 自行车
# ... 更多类别颜色
])

seg_colored = colors[seg_map]

# 叠加到原图
result = cv2.addWeighted(image, 0.5, seg_colored, 0.5, 0)

cv2.imshow('语义分割', result)
cv2.waitKey(0)

人脸检测与识别

使用 DNN 人脸检测器

import cv2
import numpy as np

# 加载人脸检测模型
net = cv2.dnn.readNetFromCaffe(
'deploy.prototxt',
'res10_300x300_ssd_iter_140000.caffemodel'
)

image = cv2.imread('people.jpg')
height, width = image.shape[:2]

# 创建 blob
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104, 177, 123))

net.setInput(blob)

# 执行检测
detections = net.forward()

# 解析检测结果
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]

if confidence > 0.5:
# 计算边界框
box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
x1, y1, x2, y2 = box.astype(int)

cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

label = f"Face: {confidence:.2f}"
cv2.putText(image, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

cv2.imshow('人脸检测', image)
cv2.waitKey(0)

实时人脸检测

import cv2
import numpy as np

# 加载模型
net = cv2.dnn.readNetFromCaffe(
'deploy.prototxt',
'res10_300x300_ssd_iter_140000.caffemodel'
)

cap = cv2.VideoCapture(0)

while True:
ret, frame = cap.read()
if not ret:
break

height, width = frame.shape[:2]

# 创建 blob
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104, 177, 123))

net.setInput(blob)
detections = net.forward()

for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]

if confidence > 0.5:
box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
x1, y1, x2, y2 = box.astype(int)

cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.imshow('实时人脸检测', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

cap.release()
cv2.destroyAllWindows()

姿态估计

使用 OpenPose 模型

import cv2
import numpy as np

# 身体部位连接关系
BODY_PARTS = {
"Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
"RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14,
"LEye": 15, "REar": 16, "LEar": 17, "Background": 18
}

POSE_PAIRS = [
["Neck", "RShoulder"], ["Neck", "LShoulder"],
["RShoulder", "RElbow"], ["RElbow", "RWrist"],
["LShoulder", "LElbow"], ["LElbow", "LWrist"],
["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"],
["Neck", "LHip"], ["LHip", "LKnee"], ["LKnee", "LAnkle"],
["Neck", "Nose"], ["Nose", "REye"], ["REye", "REar"],
["Nose", "LEye"], ["LEye", "LEar"]
]

# 加载模型
net = cv2.dnn.readNetFromCaffe('pose.prototxt', 'pose.caffemodel')

image = cv2.imread('person.jpg')
height, width = image.shape[:2]

# 创建 blob
blob = cv2.dnn.blobFromImage(image, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=True, crop=False)

net.setInput(blob)

# 执行推理
output = net.forward()

# 提取关键点
points = []
threshold = 0.1

for i in range(len(BODY_PARTS)):
heat_map = output[0, i, :, :]
_, prob, _, point = cv2.minMaxLoc(heat_map)

if prob > threshold:
x = int(width * point[0] / output.shape[3])
y = int(height * point[1] / output.shape[2])
points.append((x, y))
else:
points.append(None)

# 绘制骨架
for pair in POSE_PAIRS:
part_from = BODY_PARTS[pair[0]]
part_to = BODY_PARTS[pair[1]]

if points[part_from] and points[part_to]:
cv2.line(image, points[part_from], points[part_to], (0, 255, 0), 2)
cv2.circle(image, points[part_from], 3, (0, 0, 255), -1)
cv2.circle(image, points[part_to], 3, (0, 0, 255), -1)

cv2.imshow('姿态估计', image)
cv2.waitKey(0)

性能优化

使用不同的后端

import cv2

net = cv2.dnn.readNetFromONNX('model.onnx')

# OpenCV 后端 + CPU(默认)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# OpenCV 后端 + OpenCL
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)

# CUDA 后端 + GPU
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

批量处理

import cv2
import numpy as np

net = cv2.dnn.readNetFromONNX('model.onnx')

# 批量读取图像
images = []
for i in range(8):
img = cv2.imread(f'image{i}.jpg')
images.append(img)

# 创建批量 blob
blob = cv2.dnn.blobFromImages(images, 1.0, (224, 224), (104, 177, 123))

net.setInput(blob)

# 批量推理
outputs = net.forward()

for i, output in enumerate(outputs):
class_id = np.argmax(output)
print(f"图像 {i}: 类别 {class_id}")

模型优化

import cv2
import numpy as np
import time

# 加载模型
net = cv2.dnn.readNetFromONNX('model.onnx')

# 创建测试输入
blob = cv2.dnn.blobFromImage(np.zeros((224, 224, 3), np.uint8), 1.0, (224, 224))

# 预热
net.setInput(blob)
net.forward()

# 计时
start = time.time()
for _ in range(100):
net.setInput(blob)
net.forward()
end = time.time()

print(f"平均推理时间: {(end - start) / 100 * 1000:.2f} ms")

常用预训练模型

任务模型框架说明
图像分类ResNet, VGG, MobileNetCaffe, TensorFlowImageNet 预训练
目标检测YOLO, SSD, Faster R-CNNDarknet, TensorFlowCOCO 预训练
语义分割DeepLab, FCNTensorFlowCOCO/VOC 预训练
人脸检测SSD, RetinaFaceCaffe, ONNX专用人脸检测
姿态估计OpenPose, HRNetCaffe身体关键点检测

下一步

恭喜你完成了 OpenCV 教程的学习!建议你通过实际项目来巩固所学知识,例如:

  • 人脸识别系统
  • 车牌识别系统
  • 文档扫描应用
  • 运动检测系统
  • AR 应用

更多参考资料请查看OpenCV 知识速查表