Code that actually works!: OpenCV: Multi-Scale Template Matching, Raspberry Pi 2, Camera Module, Real-time detection

#7 DEC 2015
#This program uses multi-scale template matching to find an object in a video stream.
#The object is the "template" which is an image file (JPG, PNG, etc.)
#The video stream is from the raspberry pi camera module.
#This program works on the Raspberry PI 2, Jessie, OpenCV 3.0.0

import cv2
import picamera
import picamera.array
import numpy as np #for template matching
import imutils #for template matching, "image processing convenience functions" THIS IS A FUNCITON CALL!

with picamera.PiCamera() as camera:
with picamera.array.PiRGBArray(camera) as stream:
camera.resolution = (1944, 1944) #I made it a square, you can make it whatever you want

template = cv2.imread('glasses_mod.png') #I used a photo of sunglasses, cropped down to just the sunglasses and nothing else
(template_height, template_width) = template.shape[:2]
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) #gray it
template = cv2.GaussianBlur (template, (7,7), 0) #blur it
template = cv2.Canny(template, 50, 150) #edge it
cv2.imshow("template", template) #show it (not necessary, but I like to see what I'm working with

while True:
camera.capture(stream, 'bgr', use_video_port=True)
image_color = stream.array #take an image from the video stream
roi_image = image_color[255:765, 510:1020] #select a region of interest
image_gray = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY) #change the image to grayscale
(h, w) = image_gray.shape[:2]
center = (w/2,h/2)
M = cv2.getRotationMatrix2D(center, 90, 1.0)
image_gray_rotated = cv2.warpAffine(image_gray, M, (w, h)) #my camera is sideways so I have to rotate it (not required if your camera is upright)
im_gblurred = cv2.GaussianBlur(image_gray_rotated, (7,7), 0) #blur the image

### MULTI-SCALE TEMPLATE MATCHING
ms_image = im_gblurred #this line is not required, but I was playing around with different multiscale image transformations
found = None #flag to keep track of the matched region

#scan each scale of the image
#ending value (20%), starting value (100%), number of slices in between (20)
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
#resize the image according to the scale and keep track of the ratio of the resizing
resized = imutils.resize(ms_image, width = int(ms_image.shape[1] * scale))
r = ms_image.shape[1] / float(resized.shape[1])

#if the resized image is smaller than the template then break from the loop

if resized.shape[0] < template_height or resized.shape[1] < template_width:

break

#detect the edges in the resized grayscale image and apply template matching to find the template in the image

edged = cv2.Canny(resized, 50, 150) #must use the SAME parameters here as you did for the tempate ABOVE for best results

#input image: must be 8bit or 32bit-floating point

#tempate image: must not be larger than the image to search, and same data type

#method: parameter specifying the comparison methods (SQDIFF, SQDIFFNORM, CCORR, CCORRNORM, CCOEFF, CCOEFFNORM)

#mask: mask of searched template. must be same data type and size as template. It not set by default.

#result: map of comparison results, must be single channel 32bit float

result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF) #this is where the magic happens!

#The cv2.minMaxLoc function takes the correlation result and returns a 4-tuple

#that includes the minimum correlation value, the maximum correlation value,

#the (x, y)-coordinate of the minimum value, and the (x, y)-coordinate of the

#maximum value, respectively. We are only interested in the maximum value and

#(x, y)-coordinate so we keep the maximums and discard the minimums.

(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)

#if we found a new maximum correlation value, then update the bookkeeping variable

if found is None or maxVal > found[0]:

found = (maxVal, maxLoc, r)

#unpack the bookkeeping variable and compute the (x, y) coordinates of the bounding box based on the resized ratio

(_, maxLoc, r) = found

(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))

(endX, endY) = (int((maxLoc[0] + template_width) * r), int((maxLoc[1] + template_height) * r))

#draw a box around the detected result and display the image

cv2.rectangle(ms_image, (startX, startY), (endX, endY), (0, 0, 255), 2)

cv2.imshow("Image", ms_image)

stream.truncate(0) #Must use this to eliminate the error: "Incorrect buffer length"

if cv2.waitKey(1) & 0xFF == ord('q'): #press CTRL and Q to stop the program from running

break

# When everything is done, release the capture

video_capture.release()

cv2.destroyAllWindows()

### This code below should be in a separate file called imutils.py

# Import the necessary packages

import numpy as np

import cv2

def translate(image, x, y):

# Define the translation matrix and perform the translation

M = np.float32([[1, 0, x], [0, 1, y]])

shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))

# Return the translated image

return shifted

def rotate(image, angle, center = None, scale = 1.0):

# Grab the dimensions of the image

(h, w) = image.shape[:2]

# If the center is None, initialize it as the center of the image

if center is None:

center = (w / 2, h / 2)

# Perform the rotation

M = cv2.getRotationMatrix2D(center, angle, scale)

rotated = cv2.warpAffine(image, M, (w, h))

# Return the rotated image

return rotated

def resize(image, width = None, height = None, inter = cv2.INTER_AREA):

# initialize the dimensions of the image to be resized and grab the image size

dim = None

(h, w) = image.shape[:2]

# if both the width and height are None, then return the original image

if width is None and height is None:

return image

# check to see if the width is None

if width is None:

# calculate the ratio of the height and construct the dimensions

r = height / float(h)

dim = (int(w * r), height)

# otherwise, the height is None

else:

# calculate the ratio of the width and construct the dimensions

r = width / float(w)

dim = (width, int(h * r))

# resize the image

resized = cv2.resize(image, dim, interpolation = inter)

# return the resized image

return resized

4 comments:

UnknownJune 25, 2017 at 7:13 PM
You are Sir the real MVP!
Stephen DevaneJuly 11, 2019 at 7:38 AM
The "multi-scale template matching to find an object in a video stream" program does not work. I get "Illegal Instruction" seems the code is imcomplete. Does anyone have a working version please?

Be nice!

Monday, December 7, 2015

OpenCV: Multi-Scale Template Matching, Raspberry Pi 2, Camera Module, Real-time detection

4 comments: