Skip to content

Instantly share code, notes, and snippets.

@wojteklu
Created March 15, 2018 21:07
Show Gist options
  • Save wojteklu/4de1929f34534b61fbc5264a44d670e4 to your computer and use it in GitHub Desktop.
Save wojteklu/4de1929f34534b61fbc5264a44d670e4 to your computer and use it in GitHub Desktop.
Generating a saliency map with the spectral residual approach
//
// Copyright © 2018 wojteklu. All rights reserved.
//
#import <UIKit/UIKit.h>
@interface SaliencyDetector: NSObject
-(CGRect)findMostProminentPartOfImage:(UIImage *)image;
-(UIImage *)selectMostProminentPartOfImage:(UIImage *)image;
-(UIImage *)generateSaliencyMapOfImage:(UIImage *)image;
@end
//
// Copyright © 2018 wojteklu. All rights reserved.
//
#import "SaliencyDetector.h"
#import "UIImage+Mat.h"
using namespace std;
using namespace cv;
@implementation SaliencyDetector
#pragma mark - public methods
-(CGRect)findMostProminentPartOfImage:(UIImage *)image {
Mat input = [UIImage matFromImage:image];
Mat saliencyMap = [self calculateSaliencyOfImage:input];
cv::Rect rect = [self findBiggestContourRectFromMat:saliencyMap];
return CGRectMake(rect.x, rect.y, rect.width, rect.height);
}
-(UIImage *)selectMostProminentPartOfImage:(UIImage *)image {
Mat input = [UIImage matFromImage:image];
Mat saliencyMap = [self calculateSaliencyOfImage:input];
cv::Rect rect = [self findBiggestContourRectFromMat:saliencyMap];
rectangle(input, rect, CV_RGB(0,0,255), 5);
return [UIImage imageFromMat:input withOrientation:image.imageOrientation];
}
-(UIImage *)generateSaliencyMapOfImage:(UIImage *)image {
Mat input = [UIImage matFromImage:image];
Mat saliencyMap = [self calculateSaliencyOfImage:input];
return [UIImage imageFromMat:saliencyMap withOrientation:image.imageOrientation];
}
#pragma mark - spectral residual approach
-(Mat)calculateSaliencyOfImage:(Mat)image {
Mat magnitude;
cvtColor(image, image, COLOR_RGB2BGR);
// calculate saliency magnitude of each channel independently
Mat channels[3];
split(image, channels);
Mat magnitudes[3];
for (int i=0; i<3; i++) {
magnitudes[i] = [self calculateMagnitudeOfChannel:channels[i]];
}
// calculate overall salience of a multi-channel image
// determined by the average over all channels
Mat output, average;
calcCovarMatrix(magnitudes, 3, output, average, CV_COVAR_NORMAL);
// blur to make the result appear smoother
GaussianBlur(average, magnitude, cv::Size( 5, 5 ), 8);
// square matrix to highlight the regions of high salience
magnitude = magnitude.mul(magnitude);
// normalize values, so that the largest value is 1
double minVal, maxVal;
minMaxLoc( magnitude, &minVal, &maxVal );
magnitude = magnitude / maxVal;
magnitude.convertTo(magnitude, CV_32F);
// scale it back up to its original resolution
resize(magnitude, magnitude, image.size());
// threshold the saliency map
magnitude = magnitude * 255;
magnitude.convertTo(magnitude, CV_8U);
Mat saliencyMap;
threshold(magnitude, saliencyMap, 0, 255, THRESH_BINARY + THRESH_OTSU);
return saliencyMap;
}
-(Mat)calculateMagnitudeOfChannel:(Mat)channel {
cv::Size smallFrame(64, 64);
resize(channel, channel, smallFrame);
Mat mergedImage(smallFrame, CV_64FC2);
Mat imageDFT;
std::vector<Mat> vector;
Mat realImage(smallFrame, CV_64F);
channel.convertTo(realImage, CV_64F);
Mat imaginaryImage(smallFrame, CV_64F);
imaginaryImage.setTo(0);
vector.push_back(realImage);
vector.push_back(imaginaryImage);
// calculate the magnitude and phase of Fourier spectrum
merge(vector, mergedImage);
dft(mergedImage, imageDFT, DFT_COMPLEX_OUTPUT);
split(imageDFT, vector );
Mat angle(smallFrame, CV_64F);
Mat magnitude(smallFrame, CV_64F);
cartToPolar(vector.at(0), vector.at(1), magnitude, angle, false );
// calculate log magnitude of Fourier spectrum
Mat logAmplitude;
log(magnitude, logAmplitude);
// approximate the averaged spectrum of a typical natural image
// by convolving the image with a local averaging filter
Mat logAmplitudeBlur;
blur( logAmplitude, logAmplitudeBlur, cv::Size(3, 3));
// calculate the spectral residual. The spectral residual primarily
// contains the nontrivial parts of a scene
exp(logAmplitude - logAmplitudeBlur, magnitude);
// calculate the saliency map by using the inverse Fourier transform
polarToCart(magnitude, angle, vector.at(0), vector.at(1), false );
merge(vector, imageDFT);
idft(imageDFT, mergedImage);
split(mergedImage, vector);
cartToPolar(vector.at(0), vector.at(1), magnitude, angle, false );
return magnitude;
}
#pragma mark - finding biggest contour
-(cv::Rect)findBiggestContourRectFromMat:(Mat)mat {
std::vector<std::vector<cv::Point> > contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(mat, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0) );
std::sort(contours.begin(), contours.end(), compareContourAreas);
if (contours.size() == 0) {
return cv::Rect(0,0,0,0);
}
std::vector<cv::Point> biggestContour = contours[contours.size()-1];
return boundingRect(biggestContour);
}
bool compareContourAreas(std::vector<cv::Point> contour1, std::vector<cv::Point> contour2) {
return fabs(contourArea(cv::Mat(contour1))) < fabs(contourArea(cv::Mat(contour2)));
}
@end
//
// Copyright © 2018 wojteklu. All rights reserved.
//
#ifdef __cplusplus
#undef NO
#undef YES
#import <opencv2/opencv.hpp>
#import <opencv2/imgcodecs/ios.h>
#endif
#import <UIKit/UIKit.h>
@interface UIImage (Mat)
+(cv::Mat)matFromImage:(UIImage *)image;
+(UIImage *)imageFromMat:(cv::Mat)mat withOrientation:(UIImageOrientation)orientation;
@end
//
// Copyright © 2018 wojteklu. All rights reserved.
//
#import "UIImage+Mat.h"
using namespace std;
using namespace cv;
@implementation UIImage (Mat)
+(Mat)matFromImage:(UIImage *)image {
CGColorSpaceRef colorSpace = CGImageGetColorSpace(image.CGImage);
CGFloat cols = image.size.width;
CGFloat rows = image.size.height;
if (image.imageOrientation == UIImageOrientationLeft || image.imageOrientation == UIImageOrientationRight) {
cols = image.size.height;
rows = image.size.width;
}
// 8 bits per component, 4 channels (color channels + alpha)
cv::Mat mat(rows, cols, CV_8UC4);
CGContextRef contextRef = CGBitmapContextCreate(mat.data, // pointer to data
cols, // width of bitmap
rows, // height of bitmap
8, // bits per component
mat.step[0], // bytes per row
colorSpace,
kCGImageAlphaNoneSkipLast|kCGBitmapByteOrderDefault);
CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows), image.CGImage);
CGContextRelease(contextRef);
CGColorSpaceRelease(colorSpace);
// swap channels
std::vector<Mat> ch;
cv::split(mat,ch);
std::swap(ch[0],ch[2]);
cv::merge(ch,mat);
return mat;
}
+(UIImage *)imageFromMat:(cv::Mat)mat withOrientation:(UIImageOrientation)orientation {
NSData *data = [NSData dataWithBytes:mat.data length:mat.elemSize()*mat.total()];
CGColorSpaceRef colorSpace;
CGBitmapInfo bitmapInfo;
if (mat.elemSize() == 1) {
colorSpace = CGColorSpaceCreateDeviceGray();
bitmapInfo = kCGImageAlphaNone | kCGBitmapByteOrderDefault;
} else {
colorSpace = CGColorSpaceCreateDeviceRGB();
bitmapInfo = kCGBitmapByteOrder32Little |
(mat.elemSize() == 3? kCGImageAlphaNone : kCGImageAlphaNoneSkipFirst);
}
CGDataProviderRef provider = CGDataProviderCreateWithCFData((__bridge CFDataRef)data);
CGImageRef imageRef = CGImageCreate(
mat.cols, //width
mat.rows, //height
8, //bits per component
8 * mat.elemSize(), //bits per pixel
mat.step[0], //bytesPerRow
colorSpace,
bitmapInfo,
provider,
NULL,
false,
kCGRenderingIntentDefault);
UIImage *image = [UIImage imageWithCGImage:imageRef scale:1 orientation:orientation];
CGImageRelease(imageRef);
CGDataProviderRelease(provider);
CGColorSpaceRelease(colorSpace);
return image;
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment