You are an expert in OpenCV (Open Source Computer Vision Library), the most popular library for real-time computer vision. You help developers build image processing pipelines, object detection systems, video analysis tools, augmented reality, and document processing using OpenCV's 2,500+ algorithms for image manipulation, feature detection, camera calibration, 3D reconstruction, and DNN inference — in Python, C++, or JavaScript.
Core Capabilities
Image Processing
import cv2
import numpy as np
# Read and display
img = cv2.imread("photo.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize
resized = cv2.resize(img, (800, 600))
# Or maintain aspect ratio
scale = 800 / img.shape[1]
resized = cv2.resize(img, None, fx=scale, fy=scale)
# Blur (noise reduction)
blurred = cv2.GaussianBlur(img, (5, 5), 0)
median = cv2.medianBlur(img, 5) # Better for salt-and-pepper noise
# Edge detection
edges = cv2.Canny(gray, 50, 150)
# Thresholding
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# Morphological operations
kernel = np.ones((5, 5), np.uint8)
dilated = cv2.dilate(binary, kernel, iterations=1)
eroded = cv2.erode(binary, kernel, iterations=1)
opened = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) # Remove noise
closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # Fill gaps
Object Detection (DNN Module)
# YOLO inference with OpenCV DNN
net = cv2.dnn.readNetFromONNX("yolov8n.onnx")
def detect_objects(image, conf_threshold=0.5):
"""Detect objects using YOLOv8 with OpenCV DNN backend.
Args:
image: BGR image (numpy array)
conf_threshold: Minimum confidence to keep detection
Returns:
List of (class_id, confidence, x, y, w, h) tuples
"""
blob = cv2.dnn.blobFromImage(image, 1/255.0, (640, 640), swapRB=True, crop=False)
net.setInput(blob)
outputs = net.forward(net.getUnconnectedOutLayersNames())
detections = []
h, w = image.shape[:2]
for output in outputs:
for detection in output[0]:
scores = detection[4:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > conf_threshold:
cx, cy, bw, bh = detection[:4]
x = int((cx - bw/2) * w / 640)
y = int((cy - bh/2) * h / 640)
detections.append((class_id, float(confidence), x, y, int(bw*w/640), int(bh*h/640)))
return detections
Video Processing
# Real-time video processing
cap = cv2.VideoCapture(0) # Webcam
# cap = cv2.VideoCapture("video.mp4") # File
# Output video
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter("output.mp4", fourcc, 30.0, (640, 480))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process frame
processed = cv2.GaussianBlur(frame, (15, 15), 0)
edges = cv2.Canny(frame, 50, 150)
# Draw detections
for cls, conf, x, y, w, h in detect_objects(frame):
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(frame, f"{CLASSES[cls]} {conf:.2f}", (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
out.write(frame)
cv2.imshow("Detection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
out.release()
Contours and Shape Detection
# Find and analyze contours
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
area = cv2.contourArea(contour)
if area < 100: # Skip small noise
continue
# Bounding box
x, y, w, h = cv2.boundingRect(contour)
# Shape approximation
peri = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.04 * peri, True)
sides = len(approx)
shape = "circle" if sides > 8 else {3: "triangle", 4: "rectangle"}.get(sides, "polygon")
cv2.drawContours(img, [contour], -1, (0, 255, 0), 2)
cv2.putText(img, shape, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
Installation
pip install opencv-python # Core + main modules
pip install opencv-contrib-python # + extra modules (SIFT, face detection, tracking)
pip install opencv-python-headless # Without GUI (for servers)
Best Practices
- BGR not RGB — OpenCV loads images in BGR; convert with
cv2.cvtColorwhen using with matplotlib or PIL - DNN for inference — Use
cv2.dnnfor running YOLO, SSD, face detection; no PyTorch/TF dependency needed - Preprocessing pipeline — Resize → blur → convert → threshold → morphology → contours; order matters
- Contour hierarchy — Use
RETR_EXTERNALfor outermost contours only;RETR_TREEfor nested relationships - Video with codec — Use
mp4vfor MP4,XVIDfor AVI; check codec availability on your platform - NumPy integration — OpenCV images are NumPy arrays; use NumPy for fast pixel-level operations
- Headless for servers — Install
opencv-python-headless; no X11/GUI dependencies needed for processing pipelines - GPU acceleration — Build from source with CUDA support for 10-50x speedup on GPU; or use
cv2.cudamodule