pull/1252/merge
Masterqsx 2 months ago committed by GitHub
commit 00a93dbcaa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 149
      modules/cnn_3dobj/testdata/cv/deploy.prototxt
  2. 47
      modules/datasets/include/opencv2/datasets/saliency_mit1003.hpp
  3. 29
      modules/datasets/samples/saliency_mit1003Sample.cpp
  4. 2
      modules/datasets/samples/track_vot.cpp
  5. 83
      modules/datasets/src/saliency_mit1003.cpp
  6. 2
      modules/saliency/CMakeLists.txt
  7. 26
      modules/saliency/doc/saliency.bib
  8. 322
      modules/saliency/include/opencv2/saliency/saliencySpecializedClasses.hpp
  9. 52
      modules/saliency/samples/BackgroundContrastSample.cpp
  10. 89
      modules/saliency/samples/DeepGaze1Sample.cpp
  11. 64
      modules/saliency/samples/DiscriminantSaliencySample.cpp
  12. 149
      modules/saliency/samples/deploy.prototxt
  13. 376
      modules/saliency/src/BackgroundContrast.cpp
  14. 339
      modules/saliency/src/DeepGaze1.cpp
  15. 340
      modules/saliency/src/DiscriminantSaliency.cpp
  16. 4
      modules/saliency/src/staticSaliency.cpp
  17. 1
      modules/ximgproc/src/selectivesearchsegmentation.cpp

@ -0,0 +1,149 @@
name: "CaffeNet"
input: "data"
input_dim: 10
input_dim: 3
input_dim: 227
input_dim: 227
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}

@ -0,0 +1,47 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_DATASETS_SALIENCY_MIT1003_HPP
#define OPENCV_DATASETS_SALIENCY_MIT1003_HPP
#include <string>
#include <vector>
#include "opencv2/datasets/dataset.hpp"
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
namespace cv
{
namespace datasets
{
//! @addtogroup datasets_saliency
//! @{
struct SALIENCY_mit1003obj : public Object
{
int id;
std::string name;
Mat img;
Mat fixMap;
Mat fixPts;
};
class CV_EXPORTS SALIENCY_mit1003 : public Dataset
{
public:
virtual void load(const std::string &path) = 0;
virtual std::vector<std::vector<Mat> > getDataset() = 0;
static Ptr<SALIENCY_mit1003> create();
};
//! @}
}
}
#endif

@ -0,0 +1,29 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/highgui.hpp>
#include "opencv2/datasets/saliency_mit1003.hpp"
#include <vector>
#include <string>
using namespace std;
using namespace cv;
using namespace cv::datasets;
int main(int argc, char** argv)
{
if (argc < 2) return 0;
Ptr<SALIENCY_mit1003> datasetConnector = SALIENCY_mit1003::create();
datasetConnector->load(argv[1]);
vector<vector<Mat> > dataset(datasetConnector->getDataset()); //dataset[0] is original img, dataset[1] is fixMap, dataset[2] is fixPts
//You can use mit1003 dataset do what ever you want
for ( unsigned i = 0; i < dataset[0].size(); i++)
{
imshow("img", dataset[0][i]);
waitKey(0);
}
return 0;
}

@ -96,4 +96,4 @@ int main(int argc, char *argv[])
getchar();
return 0;
}
}

@ -0,0 +1,83 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "opencv2/datasets/saliency_mit1003.hpp"
#include "opencv2/datasets/util.hpp"
namespace cv
{
namespace datasets
{
using namespace std;
class SALIENCY_mit1003Imp : public SALIENCY_mit1003
{
public:
SALIENCY_mit1003Imp() {}
virtual ~SALIENCY_mit1003Imp() {}
virtual void load(const string &path);
virtual vector<vector<Mat> > getDataset();
private:
void loadDataset(const string &path);
};
void SALIENCY_mit1003Imp::load(const string &path)
{
loadDataset(path);
}
void SALIENCY_mit1003Imp::loadDataset(const string &path)
{
train.push_back( vector< Ptr<Object> >() );
test.push_back( vector< Ptr<Object> >() );
validation.push_back( vector< Ptr<Object> >() );
string imgPath( path + "/ALLSTIMULI/" );
string fixPath( path + "/ALLFIXATIONMAPS/" );
vector<string> imgNames;
getDirList( imgPath, imgNames );
for ( unsigned i = 0; i < imgNames.size(); i++ )
{
Ptr<SALIENCY_mit1003obj> curr( new SALIENCY_mit1003obj );
curr->name = imgNames[i].substr( 0, imgNames[i].find_first_of('.') );
curr->id = i;
curr->img = imread( imgPath + curr->name + ".jpeg" );
curr->fixMap = imread( fixPath + curr->name + "_fixMap.jpg", 0 );
curr->fixPts = imread( fixPath + curr->name + "_fixPts.jpg", 0 );
if ( curr->img.empty() || curr->fixMap.empty() || curr->fixPts.empty() ) continue;
train.back().push_back(curr);
test.back().push_back(curr);
validation.back().push_back(curr);
}
}
Ptr<SALIENCY_mit1003> SALIENCY_mit1003::create()
{
return Ptr<SALIENCY_mit1003Imp>(new SALIENCY_mit1003Imp);
}
vector<vector<Mat> > SALIENCY_mit1003Imp::getDataset()
{
vector<vector<Mat> > res = vector<vector<Mat> >( 3, vector<Mat>() );
for ( unsigned i = 0; i < train.size() ;i++ )
{
for ( unsigned j = 0; j < train[i].size() ;j++ )
{
Ptr<SALIENCY_mit1003obj> curr(static_cast<SALIENCY_mit1003obj *>(train[i][j].get()));
res[0].push_back( curr->img );
res[1].push_back( curr->fixMap );
res[2].push_back( curr->fixPts );
}
}
return res;
}
}
}

@ -4,6 +4,6 @@ endif()
set(the_description "Saliency API")
ocv_define_module(saliency opencv_imgproc opencv_features2d WRAP python)
ocv_define_module(saliency opencv_imgproc opencv_datasets opencv_features2d opencv_dnn opencv_ximgproc WRAP python)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Woverloaded-virtual)

@ -31,3 +31,29 @@
year={2010},
organization={Elsevier}
}
@inproceedings{kummerer2014deep,
title={Deep gaze i: Boosting saliency prediction with feature maps trained on imagenet},
author={K{\"u}mmerer, Matthias and Theis, Lucas and Bethge, Matthias},
journal={arXiv preprint arXiv:1411.1045},
year={2014}
}
@inproceedings{mahadevan2010spatiotemporal,
title={Spatiotemporal saliency in dynamic scenes},
author={Mahadevan, Vijay and Vasconcelos, Nuno},
journal={IEEE transactions on pattern analysis and machine intelligence},
volume={32},
number={1},
pages={171--177},
year={2010},
publisher={IEEE}
}
@inproceedings{zhu2014saliency,
title={Saliency optimization from robust background detection},
author={Zhu, Wangjiang and Liang, Shuang and Wei, Yichen and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2814--2821},
year={2014}
}

@ -46,8 +46,10 @@
#include <string>
#include <iostream>
#include <stdint.h>
#include <vector>
#include "saliencyBaseClasses.hpp"
#include "opencv2/core.hpp"
#include "opencv2/dnn.hpp"
namespace cv
{
@ -152,6 +154,248 @@ private:
void getIntensity(Mat srcArg, Mat dstArg, Mat dstOnArg, Mat dstOffArg, bool generateOnOff);
};
/** @brief the Deep Gaze 1 Saliency approach from @cite kummerer2014deep
This method uses the linear combination of the fifth convolution layers of the pretrained AlexNet, center bias and softmax to generate saliency map.
You can passes your own DNN layers into the DeepGaze1 object and retrain it with "training" method to take advantage of the cutting-edge DNN in future.
*/
class CV_EXPORTS_W DeepGaze1 : public StaticSaliency
{
private:
/** @brief This is the field to store the input DNN
* @sa dnn::Net
*/
dnn::Net net;
/** @brief This is the field to store the name of selected layer
*/
std::vector<std::string> layers_names;
/** @brief This is the field to store the weight of selected layer
*/
std::vector<double> weights;
public:
/** @brief This is the simplest constructor. It works with AlexNet caffe implementation.
* You can put the prototxt file and caffemodel file in the project default path or pass the path of required files into the constructor.
* The weights for AlexNet have been tuned and validated.
* @param net_proto The path of AlexNet prototxt file
* @param net_caffemodel The path of AlexNet caffemodel file
* @sa dnn::Net , dnn:readNetFromCaffe
*/
DeepGaze1( std::string net_proto = "deploy.prototxt", std::string net_caffemodel = "bvlc_alexnet.caffemodel" );
/** @brief This is the constructor works with your own DNN caffe implementation.
* You can put the prototxt file and caffemodel file in the project default path or pass the path of required files into the constructor.
* The weights of the selected layers are randomly initialized. Need to call training method to tune them.
* To download the caffemodel of Alexnet: http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel
* The prototxt we use is slightly different from the one in the bvlc github: https://github.com/BVLC/caffe/blob/master/models/bvlc_alexnet/deploy.prototxt
* To find the modified "deploy.prototxt" specific for DeepGaze1 module: https://github.com/opencv/opencv_contrib/tree/master/modules/cnn_3dobj/testdata/cv/deploy.prototxt
* @param net_proto The path of DNN prototxt file. The default is "deploy.prototxt"
* @param net_caffemodel The path of DNN caffemodel file. The default is "bvlc_alexnet.caffemodel"
* @param selected_layers The name of the layers you selected
* @param n_weights The number of weights you want to initialized
* @sa dnn::Net , dnn:readNetFromCaffe
*/
DeepGaze1( std::string net_proto, std::string net_caffemodel, std::vector<std::string> selected_layers, unsigned n_weights);
/** @brief This is the constructor works with your own DNN caffe implementation with your own tuned weights.
* You can put the prototxt file and caffemodel file in the project default path or pass the path of required files into the constructor.
* @param net_proto The path of DNN prototxt file
* @param net_caffemodel The path of DNN caffemodel file
* @param selected_layers The name of the layers you selected
* @param i_weights The weights you want to initializedcv
* @sa dnn::Net , dnn:readNetFromCaffe
*/
DeepGaze1( std::string net_proto, std::string net_caffemodel, std::vector<std::string> selected_layers, std::vector<double> i_weights);
virtual ~DeepGaze1();
CV_WRAP static Ptr<DeepGaze1> create()
{
return makePtr<DeepGaze1>();
}
CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
{
if( image.empty() )
return false;
return computeSaliencyImpl( image, saliencyMap );
}
/** @brief This is the method you can use to generate saliency map with your own input image size.
* The input image size you set must fit your own DNN structure.
* @param input_image The Mat you want to be processed
* @param input_size The image size that fit your DNN structure. The default setting is 227 * 227 which fits AlexNet
*/
Mat saliencyMapGenerator( Mat input_image, Size input_size = Size(227, 227) );
/** @brief This is the method you can use to train untuned linear combination weights of DNN selected layers.
* Be careful about decayed momentum SGD hyperparameter you set like the batch size, the learning rate, the momentum, the number iteration, the decay rate.
* The default hyperparameter is not for pretrained AlexNet weights.
* Do not be shock by the saliency map you generated. It needs to be thresholded before revealing saliency objects. You can use Otsu's method to threshold the saliency map and get the foreground object.
* @param images The training set images
* @param fixMaps The saliency map of training set
* @param iteration The number of times you iterate on the same batch. The default is 1
* @param batch_size The size of the batch. The default is 100
* @param momentum The value of momentum. The default is 0.9
* @param alpha The value of SGD learnin rate. The default is 0.01. Inproper setting may make performance worse
* @param decay The value of decay. The default is 0.01
* @param input_size The image size that fit your DNN structure. The default setting is 227 * 227 which fits AlexNet
*/
void training( std::vector<Mat>& images, std::vector<Mat>& fixMaps, int iteration = 1, unsigned batch_size = 100, double momentum = 0.9, double alpha = 0.01, double decay = 0.01, Size input_size = Size(227, 227) );
/** @brief This is the method you can use to calculate the AUC.
* @param _saliencyMap The saliency map to be tested
* @param _fixtionMap The ground truth map
*/
double computeAUC( InputArray _saliencyMap, InputArray _fixtionMap );
/** @brief This is the method you can use to visualize saliency map.
* @param _saliencyMap The saliency map to be visualized
*/
void saliencyMapVisualize( InputArray _saliencyMap );
protected:
bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap );
/** @brief This is the method to retrieve layers from the DNN net which are normalized to have unit standard deviation on each elements across all layers.
* @param img The img to be processed by DNN
* @param input_size The rescaled image size that fits DNN
*/
std::vector<Mat> featureMapGenerator( Mat img, Size input_size );
/** @brief This is the method to do linear combination of selected normalized layers and Gaussian blur it.
* @param featureMaps The selected normalized layers generated by method "featureMapGenerator"
* @param wei The weights of selected normalized layers
*/
static Mat comb( std::vector<Mat>& featureMaps, std::vector<double> wei );
/** @brief This is the method to do softmax
* @param res The result of method "comb"
*/
static Mat softmax( Mat res );
/** @brief This is the method to calculate grandients of loss function
* @param featureMaps The selected normalized layers generated by method "featureMapGenerator"
* @param randIndex The index of dimensions that you want to used to accumulate the loss
* @param wei The weights of selected normalized layers to be updated
* @param input_size The input image size which needs to fit the DNN you choose
*/
static std::vector<double> evalGrad( std::vector<Mat>& featureMaps, std::vector<unsigned>& randIndex, std::vector<double> wei, Size input_size );
/** @brief This is the method to randomly draw index of images in training dataset
* @param total The size of the training dataset
* @param batchSize The size of the batch you want to draw
*/
std::vector<unsigned> batchIndex( unsigned total, unsigned batchSize );
/** @brief This is the method to calculate the loss in order to update weights
* @param saliency_sample The value of sampled elements on saliency map which are used to accumulate the loss
* @param wei The weights of selected normalized layers in current iteration
*/
static double loss( std::vector<double> saliency_sample, std::vector<double> wei );
/** @brief This is the method to sample the elements on saliency map in current iteration
* @param saliency Saliency map to be sampled
* @param randIndex The index of elements to be sampled
*/
static std::vector<double> mapSampler( Mat saliency, std::vector<unsigned> randIndex );
/** @brief This is the method to determine which element on the fixation map in the training datasets belongs to saliency objects and generate the sampled elements index.
* @param img The fixation map in the training datasets
* @param input_size The input image size that fits DNN you choose
*/
std::vector<unsigned> fixationLoc( Mat img, Size input_size );
};
/** @brief the Background Contrast Saliency approach from @cite zhu2014saliency
This method uses seed superpixel algorithm to partition the image and calculate the probability of belonging to background with CIE-Lab color contrast.
This method also provides an optimization framework may help foreground based saliency method perform better.
*/
class CV_EXPORTS_W BackgroundContrast : public StaticSaliency
{
private:
/** @brief
* limitOfSp is the maximum number of superpixel
* nOfLevel, usePrior, histBin is same as the seed superpixel method
* bgWei determine the weight of background when optimized with foreground method
*/
int limitOfSP;
int nOfLevel;
int usePrior;
int histBin;
double bgWei;
public:
/** @brief This is the default constructor.
* It will initialize with bgWei: 5, limitOfSp: 600, nOfLevel: 4, usePrior: 2, histBin: 5
*/
BackgroundContrast();
/** @brief This is the constructor user can custumized those parameters
*/
BackgroundContrast( double, int, int, int, int );
virtual ~BackgroundContrast();
CV_WRAP static Ptr<BackgroundContrast> create()
{
return makePtr<BackgroundContrast>();
}
CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
{
if( image.empty() )
return false;
return computeSaliencyImpl( image, saliencyMap );
}
/** @brief This is the method you can use to generate saliency map with or without outside saliency map
* @param img The image to be processed
* @param fgImg The outside saliency map to be optimized with aggregated optimization framework
* @param option The flag parameter for user to use outside saliency map or not. The default is 0 which represents do not use outside saliency map. 1 represents outside saliency map will be optimized
*/
Mat saliencyMapGenerator( const Mat img, const Mat fgImg = Mat(), int option = 0 );
/** @brief This is the method you can use to visualize saliency map
* This method offers 3 different threshold options to retrieve saliency object from saliency map.
* @param _saliencyMap The saliency map to be visualized
* @param option 0(default): no threshold, 1: threshold with Otsu's method and binary threshold, 2: threshold with Otsu's method and tozero threshold
*/
Mat saliencyMapVisualize( InputArray _saliencyMap, int option = 0 );
protected:
/** @brief This is the method to use background probability to optimize saliency map
* This method essentially use least-square to solve an optimization problem.
* @param adjcMatrix superpixel adjacency matrix calculated by method superpixelSplit
* @param colDistM superpixel CIE-Lab color distance matrix calcualted by method getColorPosDis
* @param bgWeight The value of background probability
* @param fgWeight The value of foreground probability(saliency)
* @param saliencyOptimized The output optimized saliency map
* @param bgLambda The weight of background probability compared with foreground probability, default 5
* @param neiSigma The variance that used to transform smoothness weight into [0, 1] range, default 14
*/
void saliencyOptimize( const Mat adjcMatrix, const Mat colDistM, const Mat bgWeight, const Mat fgWeight, Mat& saliencyOptimized, double bgLambda = 5, double neiSigma = 14 );
bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap );
/** @brief This is the method to use opencv seed superpixel method to split the image and return label of each pixel and adjacency matrix
* @param img The image to be processed
* @param idxImg The output label map of each pixel
* @param adjcMatrix The output adjacency matrix
*/
void superpixelSplit( const Mat img, Mat& idxImg, Mat& adjcMatrix );
/** @brief This is the method to find background superpixel
* @param idxImg The label of each pixel in the original image
* @param thickness The width of background boundary, default is 8
*/
std::vector<unsigned> getBndPatchIds( const Mat idxImg, int thickness = 8);
/** @brief this is the method to calculate superpixel CIE-Lab color distance matrix and geographic distance matrix
* @param img The image to be processed
* @param idxImg The label of each pixel
* @param colDistM The output superpixel color distance matrix
* @param posDistM The output superpixel geographic distance matrix
* @param nOfSP The number of superpixels that the image is splited
*/
void getColorPosDis( const Mat img, const Mat idxImg, Mat& colDistM, Mat& posDistM, int nOfSP);
/** @brief This is the method to calculate the background probability of each superpixel
* This method use floyd algorithm to find all shorted path between superpixels
* @param adjcMatrix The adjacency matrix of superpixels
* @param colDistM The superpixel color distance matrix
* @param bdProb The output superpixel background probability
* @param bdIds The index of background superpixels
* @param clipVal The bias of distance between adjacent superpixels, default 3
* @param geoSigma The variance that used to transform geographic distance into [0, 1] range, default 7
*/
void boundaryConnectivity( const Mat adjcMatrix, const Mat colDistM, Mat& bdProb, std::vector<unsigned> bdIds, double clipVal = 3.0, double geoSigma = 7.0 );
/** @brief This is the method to calcualte weighted background contrast
* @param colDistM The superpixel color distance matrix
* @param posDistM The superpixel geographic distance matrix
* @param bgProb The background probability of superpixels
* @param wCtr The background weighted contrast which can be treated as the saliency map generated by the background probability
*/
void getWeightedContrast( const Mat colDistM, const Mat posDistM, const Mat bgProb, Mat& wCtr );
/** @brief This is the method to transform the input value into [0,1] range with variance
*/
void dist2WeightMatrix( Mat&, Mat&, double );
/** @brief This is the method to transform OpenCV BGR color into CIE-Lab color space
*/
void rgb2lab( Mat&, Mat& );
};
@ -279,6 +523,84 @@ private:
};
/** @brief the Discriminant Saliency approach from @cite mahadevan2010spatiotemporal
This method uses dynamic texture model to capture the essence of dynamic background and seperate it from non background object.
*/
class CV_EXPORTS_W DiscriminantSaliency : public MotionSaliency
{
private:
Size imgProcessingSize;
unsigned hiddenSpaceDimension;
unsigned centerSize;
unsigned windowSize;
unsigned patchSize;
unsigned temporalSize;
unsigned stride;
public:
/** @brief The structure to store dynamic texture model parameters
*/
struct DT
{
Mat A;
Mat C;
Mat Q;
Mat R;
Mat S;
Mat MU;
double VAR;
};
// DiscriminantSaliency();
/** @brief Constructor
* @param _stride The parameter to adjust saliency map resolution level. The higher the parameter is, the lower the resolution is, the faster the processing is, default 1
* @param _imgProcessingSize The image size to be processed. You can rescale the image with this parameter. The larger the image is, the slower the processing is, default 127 * 127
* @param _hidden The hidden space dimension. The default is 10.
* @param _center The center window size. The default is 8 * 8
* @param _window The sliding window size. The default is 96 * 96
* @param _patch This parameter determines how many pixels are drawed when estimate dynamic texture parameters, default is 400
* @param _temporal This parameter determines how many frames are considered when estimate dynamic texture parameters, default is 11
*/
DiscriminantSaliency(unsigned _stride = 1, Size _imgProcessingSize = Size(127, 127), unsigned _hidden = 10, unsigned _center = 8, unsigned _window = 96, unsigned _patch = 400, unsigned _temporal = 11);
virtual ~DiscriminantSaliency();
CV_WRAP static Ptr<DiscriminantSaliency> create()
{
return makePtr<DiscriminantSaliency>();
}
CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
{
if( image.empty() )
return false;
return computeSaliencyImpl( image, saliencyMap );
}
/** @brief This is the method to estimate dynamic texture parameters
* @param img_sq The sequence of pixels of randomly sampled patches in each temporal patch. The 2d images are transformed into 1d vertical vectors
* @param para The output dynamic texture parameters
*/
void dynamicTextureEstimator( const Mat img_sq , DT& para );
/** @brief This method is used to randomly sample patches from each sliding window and center area
* @param img_sq frames sequence
* @param index the start index of the currently processed frames
* @param r the currently processed pixel vertical index
* @param c the currently processed pixel horizontal index
* @param center the center window
* @param surround the sliding window pixels surround center window
* @param all sliding window
*/
void patchGenerator( const std::vector<Mat>& img_sq, unsigned index, unsigned r, unsigned c, Mat& center, Mat& surround, Mat& all );
std::vector<Mat> saliencyMapGenerator( std::vector<Mat>, std::vector<Mat>& );
/** @brief This is the method you can use to visualize saliency map.
* @param _saliencyMap The saliency map to be visualized
*/
void saliencyMapVisualize( InputArray _saliencyMap );
protected:
bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap );
/** @brief This is the core function to calculate the KL divergence between center window and sliding window with dynamic texture parameters
* @param para_c The center window dynamic texture parameters
* @param para_w The sliding window dynamic texture parameters
*/
double KLdivDT( const DT& para_c, const DT& para_w );
};
/************************************ Specific Objectness Specialized Classes ************************************/
/**

@ -0,0 +1,52 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/saliency.hpp>
#include <opencv2/ximgproc.hpp>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
using namespace cv;
using namespace saliency;
int main(int argc, char* argv[])
{
const char *keys =
"{ help h usage ? | | show this message }"
"{ img_path | | path to img }"
"{ foregroundImg_path| | path to saliency img generated by other method }"
"{ optimization |0 | 0 for aggregated optimized with no outside saliency, 1 for optimizaed with outside saliency }";
CommandLineParser parser(argc, argv, keys);
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string img_path = parser.get<string>("img_path");
string foregroundImg_path = parser.get<string>("foregroundImg_path");
Mat img = imread(img_path);
Mat fgImg = imread(foregroundImg_path);
BackgroundContrast bc = BackgroundContrast();
if ( parser.get<bool>( "optimization" ) )
{
bc.saliencyMapVisualize(bc.saliencyMapGenerator(img, fgImg, 1), 0);
}
else
{
Mat saliency;
bc.computeSaliency(img, saliency);
bc.saliencyMapVisualize(saliency, 0);
}
return 0;
} //main

@ -0,0 +1,89 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/dnn.hpp>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/saliency.hpp>
#include <opencv2/datasets/saliency_mit1003.hpp>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
using namespace cv;
using namespace cv::dnn;
using namespace cv::saliency;
using namespace cv::datasets;
int main(int argc, char* argv[])
{
const char *keys =
"{ help h usage ? | | show this message }"
"{ train |0 | set training on }"
"{ default |1 | use default deep net(AlexNet) and default weights }"
"{ AUC |0 | calculate AUC with input fixation map }"
"{ img_path | | path to folder with img }"
"{ fix_path | | path to folder with fixation img for compute AUC }"
"{ model_path |bvlc_alexnet.caffemodel | path to your caffe model }"
"{ proto_path |deploy.prototxt | path to your deep net caffe prototxt }"
"{ dataset_path d |./ | path to Dataset for training }";
CommandLineParser parser(argc, argv, keys);
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string img_path = parser.get<string>("img_path");
string model_path = parser.get<string>("model_path");
string proto_path = parser.get<string>("proto_path");
string dataset_path = parser.get<string>("dataset_path");
string fix_path = parser.get<string>("fix_path");
DeepGaze1 g;
if ( parser.get<bool>( "default" ) )
{
g = DeepGaze1( proto_path, model_path );
}
else
{
g = DeepGaze1( proto_path, model_path, vector<string>(1, "conv5"), 257 );
}
//Download mit1003 saliency dataset in the working directory
//ALLSTIMULI folder store images
//ALLFIXATIONMAPS foler store training eye fixation
if ( parser.get<bool>( "train" ) )
{
Ptr<SALIENCY_mit1003> datasetConnector = SALIENCY_mit1003::create();
datasetConnector->load( dataset_path );
vector<vector<Mat> > dataset( datasetConnector->getDataset() );
g.training( dataset[0], dataset[1], 1, 200, 0.9, 0.000001, 0.01);
}
ofstream file;
Mat res2;
g.computeSaliency( imread( img_path ), res2 );
resize( res2, res2, Size( 1024, 768 ) );
if ( parser.get<bool>( "AUC") )
cout << "AUC = " << g.computeAUC( res2, imread( fix_path, 0 ) ) << endl;;
g.saliencyMapVisualize( res2 );
file.open( "saliency.csv" );
for ( int i = 0; i < res2.rows; i++)
{
for ( int j=0; j < res2.cols; j++)
{
file << res2.at<double>( i, j ) << " ";
}
file << endl;
}
file.close();
return 0;
} //main

@ -0,0 +1,64 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/saliency.hpp>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
using namespace cv;
using namespace saliency;
int main(int argc, char* argv[])
{
const char *keys =
"{ help h usage ? | | show this message }"
"{ start_frame |0 | start frame index }"
"{ length |12 | # of frames video contain }"
"{ default |1 | use default deep net(AlexNet) and default weights }"
"{ video_name |skiing| the name of video in UCSD background subtraction }"
"{ img_folder_path|JPEGS| path to folder with frames }"
"{ res_level | 3 | resolution level of output saliency map. Suggested Range [0, 4]. The higher the level is, the fast the processing is, the lower the resolution is }";
CommandLineParser parser(argc, argv, keys);
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
vector<Mat> img_sq;
DiscriminantSaliency t;
if ( parser.get<bool>( "default" ) )
{
t = DiscriminantSaliency();
}
else
{
t = DiscriminantSaliency(parser.get<int>( "res_level" ));
}
for ( unsigned i = 1; i < parser.get<unsigned>( "length" ); i++ )
{
char index[256] = {0};
sprintf(index, "%d", i + parser.get<int>( "start_frame" ));
Mat temp = imread(parser.get<string>("img_folder_path") + "/" + parser.get<string>("video_name") + "/frame_" + index + ".jpg", 0);
//Mat temp = imread(string("JPEGS/traffic/frame_") + index + ".jpg", 0);
//resize(temp, temp, Size(127, 127));
img_sq.push_back(temp);
}
vector<Mat> saliency_sq;
t.computeSaliency(img_sq, saliency_sq);
for ( unsigned i = 0; i < saliency_sq.size(); i++ )
{
resize(saliency_sq[i], saliency_sq[i], Size(1024, 768));
t.saliencyMapVisualize(saliency_sq[i]);
}
return 0;
} //main

@ -0,0 +1,149 @@
name: "CaffeNet"
input: "data"
input_dim: 10
input_dim: 3
input_dim: 227
input_dim: 227
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}

@ -0,0 +1,376 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core.hpp>
#include <opencv2/ximgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/utility.hpp>
#include "precomp.hpp"
#include <vector>
#include <functional>
#include <cmath>
#include <string>
#include <iostream>
#include <cstdlib>
#include <algorithm>
#include <utility>
#include <numeric>
#include <fstream>
#include <set>
using namespace std;
using namespace cv;
using namespace cv::saliency;
using namespace cv::ximgproc;
namespace cv
{
namespace saliency
{
BackgroundContrast::BackgroundContrast()
{
bgWei = 5;
limitOfSP = 600;
nOfLevel = 4;
usePrior = 2;
histBin = 5;
}
BackgroundContrast::BackgroundContrast( double _bgWei, int _limitOfSP, int _nOfLevel, int _usePrior, int _histBin ): limitOfSP(_limitOfSP), nOfLevel(_nOfLevel), usePrior(_usePrior), histBin(_histBin), bgWei(_bgWei) {}
BackgroundContrast::~BackgroundContrast(){}
Mat BackgroundContrast::saliencyMapGenerator( const Mat img, const Mat fgImg, int option )
{
CV_Assert( !(img.empty()) );
CV_Assert( !(option == 1 && fgImg.empty()) );
Mat idxImg, adjcMatrix, colDistM, posDistM, bdProb, wCtr, saliency;
superpixelSplit(img, idxImg, adjcMatrix);
vector<unsigned> bdIds = getBndPatchIds(idxImg);
for (unsigned i = 0; i < bdIds.size(); i++)
{
for (unsigned j = 0; j < bdIds.size(); j++)
{
adjcMatrix.at<uchar>(bdIds[i], bdIds[j]) = 1;
adjcMatrix.at<uchar>(bdIds[j], bdIds[i]) = 1;
}
}
getColorPosDis(img, idxImg, colDistM, posDistM, adjcMatrix.size[0]);
boundaryConnectivity(adjcMatrix, colDistM, bdProb, bdIds);
if ( option == 0 )
{
getWeightedContrast(colDistM, posDistM, bdProb, wCtr);
}
else
{
Mat temp = fgImg.clone();
if (temp.channels() == 3) cvtColor(temp, temp, COLOR_BGR2GRAY);
wCtr = Mat(adjcMatrix.size[0], 1, CV_64F, Scalar::all(0.0));
resize(temp, temp, img.size());
vector<int> szOfSP = vector<int>(adjcMatrix.size[0], 0);
for ( int i = 0; i < img.size[0]; i++ )
{
for ( int j = 0; j < img.size[1]; j++ )
{
szOfSP[idxImg.at<unsigned>(i, j)] += 1;
wCtr.at<double>(idxImg.at<unsigned>(i, j), 0) += temp.at<double>(i, j);
}
}
for ( unsigned i = 0; i < szOfSP.size(); i++ )
{
wCtr.at<double>(i, 0) /= szOfSP[i];
}
}
saliencyOptimize(adjcMatrix, colDistM, bdProb, wCtr, wCtr, bgWei);
saliency = Mat(img.size[0], img.size[1], CV_64F, Scalar::all(0.0));
for (int i = 0; i < img.size[0]; i++)
{
for (int j = 0; j < img.size[1]; j++)
{
saliency.at<double>(i, j) = wCtr.at<double>(idxImg.at<unsigned>(i, j), 0);
}
}
return saliency;
}
void BackgroundContrast::saliencyOptimize( const Mat adjcMatrix, const Mat colDistM, const Mat bgWeight, const Mat fgWeight, Mat& saliencyOptimized, double bgLambda, double neiSigma )
{
Mat smoothWeight = colDistM.clone();
Mat smoothDeri = Mat(smoothWeight.size[0], smoothWeight.size[1], CV_64F, Scalar::all(0.0));
Mat bgWeightDig = Mat(smoothWeight.size[0], smoothWeight.size[1], CV_64F, Scalar::all(0.0));
Mat fgWeightDig = Mat(smoothWeight.size[0], smoothWeight.size[1], CV_64F, Scalar::all(0.0));
Mat temp;
double mi = 0, ma = 0;
minMaxLoc( fgWeight, &mi, &ma );
Mat fg = fgWeight.clone();
fg -= mi;
fg /= ( ma - mi + 0.000001 );
fg *= 255;
fg.convertTo(fg, CV_8U);
threshold(fg, fg, 0, 255, THRESH_TOZERO | THRESH_OTSU);
fg.convertTo(fg, CV_64F);
fg /= 255; // clean fore ground cue
minMaxLoc( smoothWeight, NULL, &ma );
for ( int i = 0; i < smoothWeight.size[0]; i++ )
{
for ( int j = 0; j < smoothWeight.size[1]; j++ )
{
if ( adjcMatrix.at<uchar>(i, j) == 0 )
{
smoothWeight.at<double>(i, j) = ma * adjcMatrix.size[0];
}
}
}
dist2WeightMatrix(smoothWeight, smoothWeight, neiSigma);
adjcMatrix.convertTo(temp, CV_64F);
smoothWeight += temp * 0.1;//add small coefficients for regularization term
reduce(smoothWeight, temp, 0, REDUCE_SUM);
for ( int i = 0; i < smoothDeri.size[0]; i++ )
{
smoothDeri.at<double>(i, i) = temp.at<double>(0, i);
}
for ( int i = 0; i < bgWeightDig.size[0]; i++ )
{
bgWeightDig.at<double>(i, i) = bgWeight.at<double>(i, 0) * bgLambda;
}
for ( int i = 0; i < fgWeightDig.size[0]; i++ )
{
fgWeightDig.at<double>(i, i) = fg.at<double>(i, 0);
}
//temp = (smoothDeri - smoothWeight + bgWeightDig + fgWeightDig);
//saliencyOptimized = temp.inv() * fgWeight;
solve((smoothDeri - smoothWeight + bgWeightDig + fgWeightDig), fg, saliencyOptimized, DECOMP_NORMAL);
}
bool BackgroundContrast::computeSaliencyImpl( InputArray image, OutputArray saliencyMap )
{
CV_Assert( !(image.getMat().empty()) );
saliencyMap.assign(saliencyMapGenerator(image.getMat()));
return true;
}
void BackgroundContrast::superpixelSplit( const Mat img, Mat& idxImg, Mat& adjcMatrix)
{
Ptr<SuperpixelSEEDS> seeds;
seeds = createSuperpixelSEEDS( img.size().width, img.size().height, img.channels(), max(min(img.size().width * img.size().height / 600, limitOfSP), 10), nOfLevel, usePrior, histBin, false);
seeds->iterate( img, 4 );
Mat mask;
adjcMatrix = Mat::eye( seeds->getNumberOfSuperpixels(), seeds->getNumberOfSuperpixels(), CV_8U );
seeds->getLabels(idxImg);
seeds->getLabelContourMask(mask, true);
for ( int i = 0; i < mask.size[0]; i++ )
{
for (int j = 0; j < mask.size[1]; j++ )
{
if (mask.at<uchar>(i, j) != 0)
{
if ( idxImg.at<unsigned>(i, j) != idxImg.at<unsigned>(max(i - 1, 0), j) )
{
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i, j), idxImg.at<unsigned>(i - 1, j)) = 2;
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i - 1, j), idxImg.at<unsigned>(i, j)) = 2;
}
if ( idxImg.at<unsigned>(i, j) != idxImg.at<unsigned>(min(i + 1, mask.size[0] - 1), j) )
{
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i, j), idxImg.at<unsigned>(i + 1, j)) = 2;
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i + 1, j), idxImg.at<unsigned>(i, j)) = 2;
}
if ( idxImg.at<unsigned>(i, j) != idxImg.at<unsigned>(i, max(j - 1, 0)) )
{
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i, j), idxImg.at<unsigned>(i, j - 1)) = 2;
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i, j - 1), idxImg.at<unsigned>(i, j)) = 2;
}
if ( idxImg.at<unsigned>(i, j) != idxImg.at<unsigned>(i, min(j + 1, mask.size[1] - 1)) )
{
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i, j), idxImg.at<unsigned>(i, j + 1)) = 2;
adjcMatrix.at<uchar>(idxImg.at<unsigned>(i, j + 1), idxImg.at<unsigned>(i, j)) = 2;
}
}
}
}
}
vector<unsigned> BackgroundContrast::getBndPatchIds( const Mat idxImg, int thickness )
{
set<unsigned> BndPatchIds;
CV_Assert(idxImg.size[0] > 2 * thickness && idxImg.size[1] > 2 * thickness);
for ( int i = 0; i < idxImg.size[0]; i++)
{
for (int j = 0; j < idxImg.size[1]; j++)
{
if ( ((i >= 0 && i < thickness) || (i >= idxImg.size[0] - thickness && i < idxImg.size[0])) || ((j >= 0 && j < thickness) || (j >= idxImg.size[1] - thickness && j < idxImg.size[1])))
{
BndPatchIds.insert(idxImg.at<unsigned>(i, j));
}
}
}
vector<unsigned> res(BndPatchIds.begin(), BndPatchIds.end());
return res;
}
void BackgroundContrast::getColorPosDis( const Mat img, const Mat idxImg, Mat& colDistM, Mat& posDistM, int nOfSP )
{
vector<int> szOfSP = vector<int>(nOfSP, 0);
Mat meanCol = Mat(nOfSP, img.channels(), CV_64F, Scalar::all(0.0));
Mat meanPos = Mat(nOfSP, 2, CV_64F, Scalar::all(0.0));
colDistM = Mat(nOfSP, nOfSP, CV_64F, Scalar::all(0.0));
posDistM = Mat(nOfSP, nOfSP, CV_64F, Scalar::all(0.0));
for (int i = 0; i < img.size[0]; i++ )
{
for (int j = 0; j < img.size[1]; j++ )
{
szOfSP[idxImg.at<unsigned>(i, j)] += 1;
for (int k = 0; k < img.channels(); k++)
{
meanCol.at<double>(idxImg.at<unsigned>(i, j), k) += (double)img.at<Vec3b>(i, j)[k];
}
meanPos.at<double>(idxImg.at<unsigned>(i, j), 0) += (double)i;
meanPos.at<double>(idxImg.at<unsigned>(i, j), 1) += (double)j;
}
}
for (int i = 0; i < nOfSP; i++)
{
meanCol.row(i) /= szOfSP[i];
meanPos.row(i) /= szOfSP[i];
}
meanPos.col(0) /= img.size[0];
meanPos.col(1) /= img.size[1];
rgb2lab(meanCol, meanCol);
for ( int i = 0; i < meanCol.size[1]; i++)
{
Mat temp = ( repeat(meanCol.col(i), 1, nOfSP) - repeat(meanCol.col(i).t(), nOfSP, 1) );
pow(temp, 2, temp);
colDistM += temp;
}
sqrt(colDistM, colDistM);
for ( int i = 0; i < meanPos.size[1]; i++)
{
Mat temp = ( repeat(meanPos.col(i), 1, nOfSP) - repeat(meanPos.col(i).t(), nOfSP, 1) );
pow(temp, 2, temp);
posDistM += temp;
}
sqrt(posDistM, posDistM);
}
void BackgroundContrast::boundaryConnectivity(const Mat adjcMatrix, const Mat colDistM, Mat& bdProb, vector<unsigned> bdIds, double clipVal, double geoSigma)
{
Mat geoDistMatrix = Mat(adjcMatrix.size[0], adjcMatrix.size[1], CV_64F, Scalar::all(0.0));
double ma = 0;
minMaxLoc( colDistM, NULL, &ma );
for ( int i = 0; i < adjcMatrix.size[0]; i++ )
{
for (int j = 0; j < adjcMatrix.size[1]; j++ )
{
if ( adjcMatrix.at<uchar>(i, j) == 0 )
{
geoDistMatrix.at<double>(i, j) = ma * adjcMatrix.size[0]; //fake inf
}
else
{
geoDistMatrix.at<double>(i, j) = max(0.0, colDistM.at<double>(i, j) - clipVal);
}
}
}
for ( int k = 0; k < adjcMatrix.size[0]; k++ ) // floyd algorithm, you can replace it with johnson algorithm but it's too long
{
for ( int i = 0; i < adjcMatrix.size[0]; i++ )
{
for (int j = 0; j < adjcMatrix.size[1]; j++ )
{
geoDistMatrix.at<double>(i, j) = min(geoDistMatrix.at<double>(i, j), geoDistMatrix.at<double>(i, k) + geoDistMatrix.at<double>(k, j));
geoDistMatrix.at<double>(j, i) = geoDistMatrix.at<double>(i, j);
}
}
}
dist2WeightMatrix(geoDistMatrix, geoDistMatrix, geoSigma);
bdProb = Mat(adjcMatrix.size[0], 1, CV_64F, Scalar::all(0.0));
for ( int i = 0; i < adjcMatrix.size[0]; i++ )
{
for ( unsigned j = 0; j < bdIds.size(); j++ )
{
bdProb.at<double>(i, 0) += geoDistMatrix.at<double>(i, bdIds[j]);
}
bdProb.at<double>(i, 0) /= sqrt(sum(geoDistMatrix.row(i)).val[0]);
}
dist2WeightMatrix(bdProb, bdProb, 1);
bdProb = 1 - bdProb;
}
void BackgroundContrast::getWeightedContrast( const Mat colDistM, const Mat posDistM, const Mat bgProb, Mat& wCtr )
{
wCtr = posDistM.clone();
dist2WeightMatrix(wCtr,wCtr, 0.4);
multiply(colDistM, wCtr, wCtr);
wCtr *= bgProb;
}
void BackgroundContrast::dist2WeightMatrix( Mat& input, Mat& output, double sigma )
{
Mat temp = input.clone();
output = input.clone();
for ( int i = 0; i < output.size[0]; i++ )
{
for ( int j = 0; j < output.size[1]; j++ )
{
//if (temp.at<double>(i, j) > 3 * sigma) output.at<double>(i, j) = 0;
//else
//{
output.at<double>(i, j) = exp(-1 * temp.at<double>(i, j) * temp.at<double>(i, j) / 2 / sigma / sigma);
//}
}
}
}
void BackgroundContrast::rgb2lab( Mat& inputMeanCol, Mat& outputMeanCol )
{
Mat temp = Mat(inputMeanCol.size[0], 1, CV_32FC3, Scalar::all(0));
for ( int i = 0; i < inputMeanCol.size[0]; i++ )
{
temp.at<Vec3f>(i, 0)[0] = (float)inputMeanCol.at<double>(i, 0) / 255;
temp.at<Vec3f>(i, 0)[1] = (float)inputMeanCol.at<double>(i, 1) / 255;
temp.at<Vec3f>(i, 0)[2] = (float)inputMeanCol.at<double>(i, 2) / 255;
}
cvtColor(temp, temp, COLOR_BGR2Lab);
outputMeanCol = inputMeanCol.clone();
for ( int i = 0; i < outputMeanCol.size[0]; i++ )
{
outputMeanCol.at<double>(i, 0) = temp.at<Vec3f>(i, 0)[0];
outputMeanCol.at<double>(i, 1) = temp.at<Vec3f>(i, 0)[1];
outputMeanCol.at<double>(i, 2) = temp.at<Vec3f>(i, 0)[2];
}
}
Mat BackgroundContrast::saliencyMapVisualize( InputArray _saliencyMap, int option )
{
Mat saliency = _saliencyMap.getMat().clone();
double mi = 0, ma = 0;
minMaxLoc( saliency, &mi, &ma );
saliency -= mi;
saliency /= ( ma - mi + 0.000001 );
if (option != 0 )
{
saliency *= 255;
saliency.convertTo(saliency, CV_8U);
if (option == 1) threshold(saliency, saliency, 0, 255, THRESH_BINARY | THRESH_OTSU);
else if (option == 2) threshold(saliency, saliency, 0, 255, THRESH_TOZERO | THRESH_OTSU);
//threshold(saliency, saliency, 0, 255, THRESH_TOZERO | THRESH_OTSU);
}
imshow( "saliencyVisual", saliency );
waitKey( 0 );
return saliency;
}
}
}

@ -0,0 +1,339 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "precomp.hpp"
#include <vector>
#include <functional>
#include <cmath>
#include <string>
#include <iostream>
#include <cstdlib>
#include <algorithm>
#include <utility>
#include <numeric>
#include <fstream>
using namespace std;
using namespace cv;
using namespace cv::dnn;
using namespace cv::saliency;
namespace cv
{
namespace saliency
{
DeepGaze1::DeepGaze1( string net_proto, string net_caffemodel )
{
net = dnn::readNetFromCaffe( net_proto, net_caffemodel );
layers_names.push_back("conv5");
double tmp[] = {0.471836,0.459718,0.457611,0.480085,0.48823,0.462776,0.483445,0.461641,0.483469,0.47846,0.470172,0.383401,0.459975,0.478768,0.576305,0.469454,0.480448,0.435255,0.493571,0.48153,0.441619,0.45436,0.451791,0.476142,0.475955,0.468466,0.479836,0.530578,0.481301,0.454639,0.464725,0.45218,0.502245,0.44709,0.483469,0.500928,0.469562,0.46483,0.453324,0.461538,0.475741,0.480432,0.485595,0.462417,0.495092,0.471557,0.495046,0.459551,0.43668,0.505385,0.478419,0.492535,0.303292,0.475142,0.459992,0.454734,0.466868,0.450649,0.479587,0.434151,0.471309,0.460742,0.49318,0.524707,0.470968,0.478263,0.469935,0.459639,0.490684,0.465349,0.44842,0.481436,0.488862,0.468849,0.492233,0.467677,0.448416,0.474485,0.47684,0.492617,0.455164,0.46794,0.463009,0.47758,0.46629,0.495621,0.464325,0.473217,0.459664,0.478029,0.438637,0.447406,0.438148,0.455966,0.473499,0.473359,0.466213,0.525776,0.434224,0.464641,0.475869,0.501644,0.485892,0.483617,0.47226,0.482615,0.448091,0.460951,0.470457,0.469719,0.474948,0.516341,0.474467,0.429576,0.460061,0.446831,0.429813,0.479859,0.509008,0.504804,0.477351,0.461487,0.445481,0.44935,0.482019,0.469048,0.473205,0.460742,0.474685,0.461985,0.497119,0.464336,0.469783,0.464748,0.477133,0.484101,0.491574,0.591169,0.47327,0.467959,0.479773,0.465179,0.456533,0.42534,0.457655,0.474379,0.482501,0.491678,0.558077,0.473311,0.483722,0.474757,0.46874,0.459033,0.483051,0.475974,0.449861,0.456586,0.462686,0.46992,0.424458,0.492504,0.450006,0.468069,0.450585,0.442672,0.460277,0.460656,0.449303,0.470552,0.433665,0.47603,0.449626,0.471062,0.481555,0.427269,0.424295,0.588326,0.475818,0.484487,0.496265,0.480074,0.45834,0.469174,0.474869,0.49295,0.458737,0.461799,0.487588,0.488148,0.47734,0.480953,0.478616,0.470873,0.456516,0.461151,0.497269,0.449723,0.414189,0.473214,0.47472,0.478068,0.454312,0.485553,0.43564,0.469596,0.450846,0.488699,0.481056,0.419303,0.479696,0.471458,0.456179,0.465579,0.449656,0.459427,0.475431,0.518732,0.45971,0.51276,0.475805,0.467066,0.455423,0.462425,0.468577,0.429871,0.467098,0.467196,0.48245,0.496047,0.439613,0.446267,0.478326,0.463222,0.466251,0.475164,0.460792,0.407577,0.475157,0.465814,0.480478,0.490252,0.485834,0.455555,0.488025,0.472621,0.482393,0.48254,0.500558,0.466278,0.478975,0.423606,0.482795,0.486593,0.488191,0.483121,5.96006};
weights = vector<double>( tmp, tmp + 257 );
}
DeepGaze1::DeepGaze1( string net_proto, string net_caffemodel, vector<string> selected_layers, unsigned n_weights )
{
net = dnn::readNetFromCaffe( net_proto, net_caffemodel );
layers_names = selected_layers;
weights = vector<double>( n_weights, 0 );
for ( unsigned i = 0; i < weights.size(); i++ )
{
weights[i] = (double)( rand() % 10 ) / 10.0;
}
}
DeepGaze1::DeepGaze1( string net_proto, string net_caffemodel, vector<string> selected_layers, vector<double> i_weights )
{
net = dnn::readNetFromCaffe( net_proto, net_caffemodel );
layers_names = selected_layers;
weights = i_weights;
}
DeepGaze1::~DeepGaze1(){}
vector<Mat> DeepGaze1::featureMapGenerator( Mat img, Size input_size )
{
vector<Mat> featureMaps;
Mat me(input_size, CV_64F, Scalar::all(0.0));
Mat std(input_size, CV_64F, Scalar::all(0.0));
Mat temp(input_size, CV_64F, Scalar::all(0.0));
resize( img, img, input_size, 0, 0, INTER_AREA );//hard coded
Mat inputBlob = blobFromImage( img ); //Convert Mat to batch of images
net.setInput( inputBlob, "data" );
//net.forward();
//net.setBlob(".data", inputBlob); //set the network input
//net.forward(); //compute output
for ( unsigned i = 0; i < layers_names.size(); i++ )
{
//Mat blob_set = net.getBlob(layers_names[i]);
Mat blob_set = net.forward( layers_names[i] );
for ( int j = 0; j < blob_set.size[1]; j++ )
{
Mat m, s, blob = Mat( blob_set.size[2], blob_set.size[3], CV_64F, blob_set.ptr(0, j) ).clone();
resize( blob, blob, input_size, 0, 0, INTER_CUBIC );
featureMaps.push_back( blob );
}
}
for ( unsigned i = 0; i < featureMaps.size(); i++ )
{
me += ( featureMaps[i] / (double)featureMaps.size() );
}
for ( unsigned i = 0; i < featureMaps.size(); i++ )
{
pow( featureMaps[i] - me, 2, temp );
std += (temp / (double)featureMaps.size());
}
pow( std, 0.5, std );
for ( unsigned i = 0; i < featureMaps.size(); i++ )
{
featureMaps[i] -= me;
divide( featureMaps[i], std, featureMaps[i] );
}
Mat centerBias = ( Mat_<double>(15, 15) << 0.136605132339619,0.254911885194451,0.354975848201554,0.432118422919642,0.492788960570534,0.537169372900984,0.559893362856313,0.570408994503682,0.558327108322080,0.529592014101891,0.483151700229565,0.421134751334104,0.339516463333039,0.240333898610746,0.119040524151926,0.255057036816424,0.373463164527964,0.471695245848319,0.548331995771048,0.610621682003216,0.652904303445073,0.678084927709815,0.686289988540627,0.675100404076781,0.648787013529810,0.601363163606808,0.539724655027488,0.457897183544702,0.359051684872921,0.237121060715778,0.354996162889476,0.471574664262948,0.569824563418442,0.647926075225057,0.710453524703313,0.751807537518647,0.777982678740224,0.785604558552769,0.773105643206983,0.747143093637550,0.700960041197720,0.637036644172802,0.555488832006236,0.456278688906414,0.336404833368141,0.432114886533253,0.548519754764192,0.647919735424171,0.726252091271344,0.787633820027685,0.830453686804553,0.855159388723024,0.864261146191168,0.851130919200443,0.823782075450280,0.777941301454146,0.714197202377273,0.632042767206447,0.533375632722201,0.413459898633110,0.492846224526104,0.610458063998438,0.710273041291813,0.787934734560166,0.848818377335713,0.892669644305876,0.915962511030596,0.925983774442787,0.914078884852605,0.886340426175551,0.838921069614458,0.776522338759063,0.694910735314247,0.595717337417090,0.474404562107594,0.537258377108661,0.652654274132924,0.751668431422994,0.830557252057301,0.892264877678679,0.934387776714834,0.960294475640559,0.968153776825753,0.957118891968944,0.930719438034762,0.882668936995775,0.818004698514852,0.736335421611659,0.637250235468980,0.518321616011027,0.559912410908083,0.678060440904210,0.778035916610351,0.855337573494623,0.916090961317328,0.960268532575822,0.983151329506359,0.993265865328518,0.981393158586731,0.953452714649956,0.906806535966130,0.844266579567292,0.762471436321223,0.663436280862256,0.542292468493236,0.570471859736804,0.686367557103361,0.785571470982568,0.863984557875191,0.925787582513478,0.968184312606637,0.993160551514547,1.00071969560062,0.990820131015439,0.963788669203278,0.916062100871062,0.851941275823728,0.770061877665113,0.671049079061439,0.551460475500773,0.558354004396870,0.675218828625687,0.773141022132423,0.851023228938218,0.914111180726610,0.957097877210958,0.981435236380489,0.990830375805955,0.979069435484073,0.951285941586670,0.904001536547754,0.841050003441305,0.759279958742245,0.660299547838899,0.539966072338975,0.529794530767286,0.648426268685882,0.747085236556856,0.823784188854240,0.886061506412046,0.930850736147926,0.953129788839557,0.963752221613668,0.951512839210700,0.924189623933934,0.877773421560175,0.814688659956347,0.733050627339970,0.633967523008819,0.512144357629488,0.483094193799984,0.601317535642541,0.700936072455337,0.778084086878178,0.839052826457435,0.882842688790194,0.906345815545706,0.916276056981117,0.904027650739533,0.877827433807473,0.831763094963022,0.767578814024913,0.685653480474771,0.586759596897176,0.465090771371211,0.421344630481700,0.539705161668206,0.637059989497880,0.714213691485248,0.776688317470029,0.818187784047592,0.844251497483870,0.851844570961231,0.841044566248117,0.814778195034621,0.767546270221294,0.705699445037065,0.623983961385159,0.525082406012993,0.403548799412053,0.339352953813616,0.458020160692771,0.555130309401582,0.632633247554868,0.694581915649390,0.736338072460011,0.762425604772228,0.770108192322197,0.759369952472952,0.732901801094476,0.685633947666787,0.623974569677122,0.542296497992531,0.443313473703102,0.321653790809172,0.240452590442239,0.358845767555164,0.456323501276454,0.533311273531990,0.595861650031113,0.637378281509690,0.663434282045727,0.670883353894946,0.660356369332519,0.633940197197224,0.586949913320820,0.525019901253340,0.443239391952182,0.341581133756428,0.221496442250004,0.118883254021697,0.237092401238019,0.336442576365934,0.413042016689593,0.474758090922195,0.518099556977193,0.541973522382581,0.551465525482662,0.539575142928788,0.512205675560351,0.465197135656097,0.403297133574821,0.321504846482331,0.221359680451730,0.100335071938095);
resize( centerBias, centerBias, input_size, 0, 0, INTER_CUBIC );
featureMaps.push_back( centerBias );
return featureMaps;
}
bool DeepGaze1::computeSaliencyImpl( InputArray image, OutputArray saliencyMap )
{
CV_Assert( !(image.getMat().empty()) );
vector<Mat> featureMaps = featureMapGenerator( image.getMat(), Size(227, 227) );
saliencyMap.assign( softmax( comb( featureMaps, weights ) ) );
for ( unsigned i = 0; i < weights.size(); i++ ) cout << weights[i] << ",";
cout << endl;
return true;
}
Mat DeepGaze1::saliencyMapGenerator( Mat input_image, Size input_size )
{
//raw saliency map generate
CV_Assert( !(input_image.empty()) );
vector<Mat> featureMaps = featureMapGenerator( input_image, input_size );
return softmax( comb( featureMaps, weights ) );
}
Mat DeepGaze1::comb( vector<Mat>& featureMaps, vector<double> wei )
{
Mat res( featureMaps[0].rows, featureMaps[0].cols, CV_64F, Scalar::all(0.0) );
for ( unsigned i = 0; i < featureMaps.size(); i++ )
{
Mat temp = featureMaps[i].clone();
temp *= wei[i];
res += temp;
}
GaussianBlur( res, res, Size(35, 35), 0, 0, BORDER_CONSTANT );
return res;
}
Mat DeepGaze1::softmax( Mat res )
{
exp(res, res);
double temp = sum(res).val[0];
res = res / temp;
return res;
}
vector<unsigned> DeepGaze1::batchIndex( unsigned total, unsigned batchSize )
{
srand(0);
vector<unsigned> allIndex(total, 0);
for ( unsigned i = 0; i < total; i++ )
{
allIndex[i] = i;
}
for ( int i = batchSize - 1; i >= 0; i-- )
{
swap(allIndex[i], allIndex[rand() % total]);
}
return vector<unsigned>( allIndex.begin(), allIndex.begin() + batchSize );
}
vector<unsigned> DeepGaze1::fixationLoc( Mat img, Size input_size )
{
CV_Assert( !(img.empty()) );
vector<unsigned> randIndex;
resize( img, img, input_size, 0, 0, INTER_AREA );
vector<unsigned> fixation;
vector<pair<unsigned, unsigned> > match;
fixation.assign(img.datastart, img.dataend);
for ( unsigned i = 0; i < fixation.size(); i++ )
{
match.push_back( pair<unsigned, unsigned>(fixation[i], i));
}
sort( match.begin(), match.end(), greater<pair<unsigned, unsigned> >() );
//sort(match.begin(), match.end(), [](pair<unsigned, unsigned> a, pair<unsigned, unsigned> b) {
// return b.first < a.first;
//});
for ( unsigned i = 0 ; ((i < match.size()) && (match[i].first > 0)) ; i++ )
{
randIndex.push_back( match[i].second );
}
img.release();
return randIndex;
}
double DeepGaze1::loss( vector<double> saliency_sample, vector<double> wei )
{
double res = 0, l1 = 0, l2 = 0;
for ( unsigned i = 0;i < wei.size();i++ )
{
l1 += abs(wei[i]);
}
for ( unsigned i = 0;i < wei.size();i++ )
{
l2 += wei[i] * wei[i];
}
for ( unsigned i = 0; i < saliency_sample.size(); i++ )
{
res -= log( saliency_sample[i] ) / saliency_sample.size();
}
return res + 0.001 * l1 / sqrt(l2);
}
vector<double> DeepGaze1::mapSampler( Mat saliency, vector<unsigned> randIndex )
{
vector<double> saliency_sample;
for ( unsigned i = 0 ; i < randIndex.size() ; i++ )
{
saliency_sample.push_back( saliency.at<double>( randIndex[i] / saliency.size[1], randIndex[i] % saliency.size[1] ) );
}
return saliency_sample;
}
vector<double> DeepGaze1::evalGrad( vector<Mat>& featureMaps, vector<unsigned>& randIndex, vector<double> wei, Size input_size )
{
vector<double> grad( featureMaps.size(), 0 );
Mat c = comb( featureMaps, wei );
Mat saliency_old = c.clone();
saliency_old = softmax( saliency_old );
vector<double> tt_old = mapSampler( saliency_old, randIndex );
double loss_old = 0;
loss_old = loss( tt_old, wei );
for ( unsigned i = 0; i < wei.size(); i++ )
{
Mat saliency_new = c.clone();
Mat temp( input_size, CV_64F, Scalar::all(0.0) );
temp += 0.0000000001 * featureMaps[i];
GaussianBlur( temp, temp, Size(35, 35), 0, 0, BORDER_CONSTANT );
saliency_new += temp;
saliency_new = softmax( saliency_new );
vector<double> weights_new( wei );
weights_new[i] += 0.0000000001;
double loss_new = 0;
vector<double> tt = mapSampler( saliency_new, randIndex );
loss_new = loss( tt, weights_new );
grad[i] = ( loss_new - loss_old ) / 0.0000000001;
}
return grad;
}
void DeepGaze1::training( vector<Mat>& images, vector<Mat>& fixMaps, int iteration, unsigned batch_size, double momentum, double alpha, double decay, Size input_size )
{
vector<unsigned> randIndex = batchIndex( (unsigned)images.size(), min( batch_size, (unsigned)images.size() ) );
vector<unsigned> fixLoc;
vector<Mat> featureMaps;
vector<double> grad;
vector<double> vel(weights.size(), 0);
unsigned n = 0;
while ( iteration > 0 )
{
for ( unsigned i = 0; i < randIndex.size(); i++ )
{
featureMaps = featureMapGenerator( images[randIndex[i]], input_size );
fixLoc = fixationLoc( fixMaps[randIndex[i]], input_size );
grad = evalGrad( featureMaps, fixLoc, weights, input_size );
for ( unsigned j = 0; j < grad.size(); j++ )
{
vel[j] = momentum * vel[j] + grad[j];
weights[j] -= alpha * vel[j] * exp(-decay * n);
}
n++;
double avgGrad = accumulate( grad.begin(), grad.end(), 0.0 ) / weights.size();
double avgWeight = accumulate( weights.begin(), weights.end(), 0.0 ) / weights.size();
cout << n << " " << avgGrad << " " << avgWeight << endl;
}
iteration--;
}
}
double DeepGaze1::computeAUC( InputArray _saliencyMap, InputArray _fixtionMap )
{
Mat saliency = _saliencyMap.getMat().clone();
Mat fixtion = _fixtionMap.getMat().clone();
if ( saliency.empty() || fixtion.empty() || saliency.dims > 2 || fixtion.dims > 2 )
{
cout << "saliency map and fixtion map must be 1 channel and have same size" << endl;
CV_Assert( !( saliency.empty() || fixtion.empty() || saliency.dims > 2 || fixtion.dims > 2 ) );
return -1;
}
resize ( fixtion, fixtion, Size(saliency.cols, saliency.rows), 0, 0, INTER_CUBIC );
double mi = 0, ma = 0;
minMaxLoc( saliency, &mi, &ma );
saliency -= mi;
saliency /= ( ma - mi );
vector<double> threshold_list;
for ( int i = 0; i < saliency.rows; i++ )
{
for ( int j = 0; j < saliency.cols; j++ )
{
if ( fixtion.at<uchar>(i, j) > 0 ) threshold_list.push_back( saliency.at<double>(i, j) );
}
}
sort( threshold_list.begin(), threshold_list.end(), greater<double>() );
vector<double> tp(1, 0), fp(1, 0);
for ( unsigned i = 0; i < threshold_list.size(); i++ )
{
unsigned aboveth = 0;
for ( int m = 0; m < saliency.rows; m++ )
{
for ( int n = 0; n < saliency.cols; n++ )
{
if ( saliency.at<double>(m, n) >= threshold_list[i] ) aboveth++;
}
}
tp.push_back( (i + 1.0) / (double)threshold_list.size() );
fp.push_back( (aboveth - i - 1.0) / (double)(saliency.rows * saliency.cols - threshold_list.size()) );
}
tp.push_back(1);
fp.push_back(1);
double auc = 0;
for ( unsigned i = 1; i < tp.size(); i++ )
{
auc += (tp[i - 1] + tp[i]) * (fp[i] - fp[i - 1]) / 2;
}
return auc;
}
void DeepGaze1::saliencyMapVisualize( InputArray _saliencyMap )
{
Mat saliency = _saliencyMap.getMat().clone();
double mi = 0, ma = 0;
minMaxLoc( saliency, &mi, &ma );
saliency -= mi;
saliency /= ( ma - mi );
imshow( "saliencyVisual", saliency );
waitKey( 0 );
}
}
}

@ -0,0 +1,340 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core.hpp>
#include "precomp.hpp"
#include <vector>
#include <functional>
#include <cmath>
#include <string>
#include <iostream>
#include <cstdlib>
#include <algorithm>
#include <utility>
#include <numeric>
#include <fstream>
using namespace std;
using namespace cv;
using namespace cv::saliency;
namespace cv
{
namespace saliency
{
/*DiscriminantSaliency::DiscriminantSaliency()
{
imgProcessingSize = Size(127, 127);
hiddenSpaceDimension = 10;
centerSize = 8;
windowSize = 96;
patchSize = 400;
temporalSize = 11;
stride = 1;
CV_Assert( hiddenSpaceDimension <= temporalSize && temporalSize <= (unsigned)imgProcessingSize.width * imgProcessingSize.height );
}*/
DiscriminantSaliency::DiscriminantSaliency(unsigned _stride, Size _imgProcessingSize, unsigned _hidden, unsigned _center, unsigned _window, unsigned _patch, unsigned _temporal)
{
imgProcessingSize = _imgProcessingSize;
hiddenSpaceDimension = _hidden;
centerSize = _center;
windowSize = _window;
patchSize = _patch;
temporalSize = _temporal;
stride = _stride;
CV_Assert( hiddenSpaceDimension <= temporalSize && temporalSize <= (unsigned)imgProcessingSize.width * imgProcessingSize.height && stride <= (centerSize - 1) / 2 );
}
DiscriminantSaliency::~DiscriminantSaliency(){}
bool DiscriminantSaliency::computeSaliencyImpl( InputArray image, OutputArray saliencyMap )
{
vector<Mat> img_sq;
image.getMatVector(img_sq);
vector<Mat>& saliency_sq = *( std::vector<Mat>* ) saliencyMap.getObj();
CV_Assert( !(img_sq.empty()) || !(img_sq[0].empty()) );
saliencyMapGenerator(img_sq, saliency_sq);
return true;
}
vector<Mat> DiscriminantSaliency::saliencyMapGenerator( vector<Mat> img_sq, vector<Mat>& saliency_sq )
{
CV_Assert( img_sq.size() >= temporalSize );
for ( unsigned i = 0; i < img_sq.size(); i++ )
{
resize(img_sq[i], img_sq[i], imgProcessingSize);
}
//vector<Mat> saliency_sq;
for ( unsigned i = temporalSize - 1; i < img_sq.size(); i++ )
{
saliency_sq.push_back(Mat(imgProcessingSize, CV_64F, Scalar::all(0.0)));
for ( unsigned r = (centerSize - 1) / 2; r < imgProcessingSize.height - (centerSize - (centerSize - 1) / 2 - 1); r+= (2 * stride + 1) )
{
for ( unsigned c = (centerSize - 1) / 2; c < imgProcessingSize.width - (centerSize - (centerSize - 1) / 2 - 1); c+= (2 * stride + 1) )
{
Mat center, surround, all;
DT para_c0, para_c1, para_w;
patchGenerator(img_sq, i, r, c, center, surround, all);
//dynamicTextureEstimator( surround, para_c0 );
dynamicTextureEstimator( center, para_c1 );
dynamicTextureEstimator( all, para_w );
double kl1 = KLdivDT( para_c1, para_w );//, kl0 = KLdivDT( para_c0, para_w );
for ( unsigned rn = r - stride; rn <= r + stride; rn++)
{
for ( unsigned cn = c - stride; cn <= c + stride; cn++)
{
saliency_sq.back().at<double>(rn, cn) = max(saliency_sq.back().at<double>(rn, cn), kl1);
}
}
//saliency_sq.back().at<double>(r, c) = kl1;
//cout << r << " " << c << endl;
}
}
}
return saliency_sq;
}
void DiscriminantSaliency::patchGenerator( const vector<Mat>& img_sq, unsigned index, unsigned r, unsigned c, Mat& center, Mat& surround, Mat& all )
{
unsigned r1 = max((int)r - ((int)windowSize - 1) / 2, 0), c1 = max((int)c - ((int)windowSize - 1) / 2, 0);
unsigned r2 = min(r1 + windowSize, (unsigned)imgProcessingSize.height), c2 = min(c1 + windowSize, (unsigned)imgProcessingSize.width);
all = Mat(patchSize, temporalSize, CV_64F, Scalar::all(0.0));
surround = Mat(patchSize, temporalSize, CV_64F, Scalar::all(0.0));
center = Mat(patchSize, temporalSize, CV_64F, Scalar::all(0.0));
srand(0);
for ( int i = 0; i < all.size[0]; i++ )
{
unsigned rt = rand() % (r2 - r1) + r1, ct = rand() % (c2 - c1) + c1;
for ( int j = 0; j < all.size[1]; j++ )
{
all.at<double>(i, j) = (double)img_sq[j + 1 + index - temporalSize].at<uchar>(rt, ct);
}
}
srand(0);
for ( int i = 0; i < center.size[0]; i++ )
{
unsigned rt = rand() % centerSize + r - (centerSize - 1) / 2, ct = rand() % centerSize + c - (centerSize - 1) / 2;
for ( int j = 0; j < center.size[1]; j++ )
{
center.at<double>(i, j) = (double)img_sq[j + 1 + index - temporalSize].at<uchar>(rt, ct);
}
}
srand(0);
for ( int i = 0; i < surround.size[0]; i++ )
{
unsigned rt = rand() % (r2 - r1) + r1, ct = rand() % (c2 - c1) + c1;
while ((abs(rt - r) < (centerSize / 2)) && (abs(ct - c) < (centerSize / 2)))
{
rt = rand() % (r2 - r1) + r1;
ct = rand() % (c2 - c1) + c1;
}
for ( int j = 0; j < surround.size[1]; j++ )
{
surround.at<double>(i, j) = (double)img_sq[j + 1 + index - temporalSize].at<uchar>(rt, ct);
}
}
}
double DiscriminantSaliency::KLdivDT( const DT& para_c, const DT& para_w )
{
Mat temp1, temp2, Eig;
Mat MU_c = para_c.MU.clone();//1
Mat MU_w = para_w.MU.clone();//1
Mat S_c = para_c.S.clone();//1
Mat S_w = para_w.S.clone();//1
Mat Beta = ((para_w.S.inv()) + Mat::eye(para_w.S.size(), CV_64F) / (para_w.VAR)).inv();//1
Mat Beta_c = ((para_c.S.inv()) + Mat::eye(para_c.S.size(), CV_64F) / (para_c.VAR)).inv();//1
Mat Omega = -1 * para_w.Q.inv() * para_w.A;//const
Mat Omega_c = -1 * para_c.Q.inv() * para_c.A;
Mat Theta = (para_w.S.inv()) + (para_w.A.t()) * (para_w.Q.inv()) * (para_w.A);//const
Mat Theta_c = (para_c.S.inv()) + (para_c.A.t()) * (para_c.Q.inv()) * (para_c.A);
Mat Tc = para_w.C.t() * para_c.C;//const
Mat Vc = para_w.C.t() * para_c.C * MU_c - MU_w;//1
double Omegac = trace(para_c.S).val[0] / para_w.VAR + imgProcessingSize.width * imgProcessingSize.height * para_c.VAR / para_w.VAR - para_c.VAR / pow(para_w.VAR, 2) * trace(Beta).val[0];
Mat Psic = 1 / pow(para_w.VAR, 2) * Tc * para_c.S * Tc.t();
Mat U_c = para_c.A * para_c.S;//2
Mat U_w = para_w.A * para_w.S;//2
Mat G = -1 * Beta * Omega;//2
Mat G_c = -1 * Beta_c * Omega_c;
Mat H = Theta + Mat::eye(Theta.size(), CV_64F) / (para_w.VAR) - Omega.t() * Beta * Omega;//2
Mat H_c = Theta_c + Mat::eye(Theta_c.size(), CV_64F) / (para_c.VAR) - Omega_c.t() * Beta_c * Omega_c;
S_w = para_w.A * S_w * para_w.A.t() + para_w.Q;//2
MU_c = para_c.A * MU_c;//2
MU_w = para_w.A * MU_w;//2
Mat Zc = 1 / para_w.VAR * para_w.C * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * Vc - para_c.C * MU_c + para_w.C * MU_w;//2
PCA pt_pca((S_w - 1 / para_w.VAR * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * U_w.t()), cv::Mat(), 1, 0);
Eig = pt_pca.eigenvalues;
//eigen((S_w - 1 / para_w.VAR * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * U_w.t()), Eig);
Eig /= para_w.VAR;
Eig += Mat::ones(Eig.size(), CV_64F);
log(Eig, Eig);
double det_w = imgProcessingSize.width * imgProcessingSize.height * log(para_w.VAR) + sum(Eig).val[0];
pt_pca = PCA((S_c - 1 / para_c.VAR * U_c * (Mat::eye(Beta_c.size(), CV_64F) - Beta_c / para_c.VAR) * U_c.t()), cv::Mat(), 1, 0);
Eig = pt_pca.eigenvalues;
//eigen((S_c - 1 / para_c.VAR * U_c * (Mat::eye(Beta.size(), CV_64F) - Beta_c / para_c.VAR) * U_c.t()), Eig);
Eig /= para_c.VAR;
Eig += Mat::ones(Eig.size(), CV_64F);
log(Eig, Eig);
double det_c = imgProcessingSize.width * imgProcessingSize.height * log(para_c.VAR) + sum(Eig).val[0];
Mat Gama = (S_w - 1 / para_w.VAR * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * U_w.t()).inv() + Mat::eye(S_w.size(), CV_64F) / para_w.VAR;//2
temp1 = (1 / para_w.VAR / para_w.VAR * Zc.t() * para_w.C * Gama.inv() * para_w.C.t() * Zc);//2
double update_term = pow(norm(Zc), 2) / para_w.VAR - temp1.at<double>(0, 0);
Mat Xic = 1 / pow(para_w.VAR, 2) * para_c.A * para_c.S * Tc.t();
S_c = para_c.A * S_c * para_c.A.t() + para_c.Q;//2
Omegac += 1 / para_w.VAR * trace(S_c).val[0] + imgProcessingSize.width * imgProcessingSize.height * para_c.VAR / para_w.VAR - para_c.VAR / pow(para_w.VAR, 2) * trace(H.inv()).val[0] - para_c.VAR / pow(para_w.VAR, 2) * trace(H.inv() * G.t() * G).val[0];
hconcat(Psic, Xic.t() * Tc.t(), temp1);
hconcat(Tc * Xic, 1 / pow(para_w.VAR, 2) * Tc * S_c * Tc.t(), temp2);
vconcat(temp1, temp2, Psic);//Psic 2
hconcat(H.inv(), H.inv() * G.t(), temp1);
hconcat(G * H.inv(), Beta + G * H.inv() * G.t(), temp2);
vconcat(temp1, temp2, Beta);//Beta 2
hconcat(H_c.inv(), H_c.inv() * G_c.t(), temp1);
hconcat(G_c * H_c.inv(), Beta_c + G_c * H_c.inv() * G_c.t(), temp2);
vconcat(temp1, temp2, Beta_c);
vconcat(Vc, para_w.C.t() * para_c.C * MU_c - MU_w, Vc);//2
for ( unsigned i = 2; i < temporalSize; i++)
{
hconcat(para_c.A * U_c, para_c.A * S_c, U_c);
hconcat(para_w.A * U_w, para_w.A * S_w, U_w);
vconcat(-1 * H.inv() * Omega, -1 * G * H.inv() * Omega, G);
vconcat(-1 * H_c.inv() * Omega_c, -1 * G_c * H_c.inv() * Omega_c, G_c);
H = Theta + Mat::eye(Theta.size(), CV_64F) / para_w.VAR - Omega.t() * H.inv() * Omega;
H_c = Theta_c + Mat::eye(Theta_c.size(), CV_64F) / para_c.VAR - Omega_c.t() * H_c.inv() * Omega_c;
S_w = para_w.A * S_w * para_w.A.t() + para_w.Q;
MU_c = para_c.A * MU_c;
MU_w = para_w.A * MU_w;
Zc = 1 / para_w.VAR * para_w.C * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * Vc - para_c.C * MU_c + para_w.C * MU_w;
pt_pca = PCA((S_w - 1 / para_w.VAR * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * U_w.t()), cv::Mat(), 1, 0);
//eigen((S_w - 1 / para_w.VAR * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * U_w.t()), Eig);
Eig = pt_pca.eigenvalues;
Eig /= para_w.VAR;
Eig += Mat::ones(Eig.size(), CV_64F);
log(Eig, Eig);
det_w += imgProcessingSize.width * imgProcessingSize.height * log(para_w.VAR) + sum(Eig).val[0];
pt_pca = PCA((S_c - 1 / para_c.VAR * U_c * (Mat::eye(Beta_c.size(), CV_64F) - Beta_c / para_c.VAR) * U_c.t()), cv::Mat(), 1, 0);
//eigen((S_c - 1 / para_c.VAR * U_c * (Mat::eye(Beta.size(), CV_64F) - Beta_c / para_c.VAR) * U_c.t()), Eig);
Eig = pt_pca.eigenvalues;
Eig /= para_c.VAR;
Eig += Mat::ones(Eig.size(), CV_64F);
log(Eig, Eig);
det_c += imgProcessingSize.width * imgProcessingSize.height * log(para_c.VAR) + sum(Eig).val[0];
Gama = (S_w - 1 / para_w.VAR * U_w * (Mat::eye(Beta.size(), CV_64F) - Beta / para_w.VAR) * U_w.t()).inv() + Mat::eye(S_w.size(), CV_64F) / para_w.VAR;
temp1 = pow(norm(Zc), 2) / para_w.VAR - 1 / para_w.VAR / para_w.VAR * Zc.t() * para_w.C * Gama.inv() * para_w.C.t() * Zc;
update_term += temp1.at<double>(0, 0);
hconcat(1/pow(para_w.VAR, 2) * para_c.A * Xic, 1/pow(para_w.VAR, 2) * para_c.A * S_c * Tc.t(), Xic);
S_c = para_c.A * S_c * para_c.A.t() + para_c.Q;
Omegac += 1 / para_w.VAR * trace(S_c).val[0] + imgProcessingSize.width * imgProcessingSize.height * para_c.VAR / para_w.VAR;
Omegac -= para_c.VAR / pow(para_w.VAR, 2) * trace(H.inv()).val[0] - para_c.VAR / pow(para_w.VAR, 2) * trace((H.inv()) * (G.t()) * G).val[0];
hconcat(Psic, Xic.t() * Tc.t(), temp1);
hconcat(Tc * Xic, 1 / pow(para_w.VAR, 2) * Tc * S_c * Tc.t(), temp2);
vconcat(temp1, temp2, Psic);
hconcat(H.inv(), H.inv() * G.t(), temp1);
hconcat(G * H.inv(), Beta + G * H.inv() * G.t(), temp2);
vconcat(temp1, temp2, Beta);
hconcat(H_c.inv(), H_c.inv() * G_c.t(), temp1);
hconcat(G_c * H_c.inv(), Beta_c + G_c * H_c.inv() * G_c.t(), temp2);
vconcat(temp1, temp2, Beta_c);
vconcat(Vc, para_w.C.t() * para_c.C * MU_c - MU_w, Vc);
}
return det_w - det_c + update_term + Omegac - trace(Beta * Psic).val[0] - imgProcessingSize.width * imgProcessingSize.height * temporalSize;
}
void DiscriminantSaliency::dynamicTextureEstimator( const Mat img_sq, DT& para)
{
unsigned tau = img_sq.size[1]; //row represents pixel location and column represents temporal location
Mat me( img_sq.size[0], 1, CV_64F, Scalar::all(0.0) );
Mat temp( img_sq.size[0], img_sq.size[1], CV_64F, Scalar::all(0.0) );
Mat X, V, B, W, Y;
for ( unsigned i = 0; i < tau; i++ )
{
me += img_sq.col(i) / tau;
}
temp += me * Mat::ones(1, temp.size[1], CV_64F) * (-1);
temp += img_sq;
Y = temp.clone();
temp = temp.colRange(0, tau);
SVD s = SVD(temp, SVD::MODIFY_A);
para.C = s.u.colRange(0, hiddenSpaceDimension);
para.C = para.C.clone();
X = Mat::diag(s.w.rowRange(0, hiddenSpaceDimension)) * s.vt.rowRange(0, hiddenSpaceDimension);
para.A = (X.colRange(1, tau) * (X.colRange(0, tau - 1)).inv(DECOMP_SVD));
para.A = para.A.clone();
V = (X.colRange(1, tau) - para.A * X.colRange(0, tau - 1));
V = V.clone();
SVD sv = SVD(V, SVD::MODIFY_A);
B = sv.u * Mat::diag(sv.w) / sqrt(tau - 1);
para.Q = (B * B.t());
para.Q = para.Q.clone();
/*Mat rnd = Mat(B.size[1], 1, CV_64F, Scalar::all(0.0));
cv::Mat mean = cv::Mat::zeros(1,1,CV_64FC1);
cv::Mat sigma= cv::Mat::ones(1,1,CV_64FC1);
cv::randn(rnd, mean, sigma);
Mat Xt = X.col(tau - 1);
W = (Y.col(tau) - para.C * (para.A * Xt + B * rnd));
Mat mea, std;
meanStdDev(W, me, std);
W -= me;
//para.VAR = pow(std.at<double>(0, 0), 2);
SVD sw = SVD(W, SVD::MODIFY_A);
B = sw.u * Mat::diag(sw.w) / sqrt(tau - 1);
para.R = (B * B.t());
para.R = para.R.clone();
para.VAR = (trace(para.R).val[0] / para.R.size[0]);*/
para.VAR = 1;
para.MU = Mat( hiddenSpaceDimension, 1, CV_64F, Scalar::all(0.0) );
for (unsigned i = 0; i < tau; i++ )
{
para.MU += X.col(i) / tau;
}
para.S = (X - para.MU * Mat::ones(1, tau, CV_64F)) * ((X - para.MU * Mat::ones(1, tau, CV_64F)).t()) / (tau - 1);
para.S = para.S.clone();
}
void DiscriminantSaliency::saliencyMapVisualize( InputArray _saliencyMap )
{
Mat saliency = _saliencyMap.getMat().clone();
double mi = 0, ma = 0;
minMaxLoc( saliency, &mi, &ma );
saliency -= mi;
saliency /= ( ma - mi );
imshow( "saliencyVisual", saliency );
waitKey( 0 );
}
}
}

@ -40,6 +40,9 @@
//M*/
#include "precomp.hpp"
#include <iostream>
#include <vector>
#include <algorithm>
namespace cv
{
@ -50,6 +53,7 @@ namespace saliency
* StaticSaliency
*/
bool StaticSaliency::computeBinaryMap( InputArray _saliencyMap, OutputArray _binaryMap )
{
Mat saliencyMap = _saliencyMap.getMat();

@ -167,6 +167,7 @@ namespace cv {
// Generate mask
Mat mask = regions == r;
// Compute histogram for each channels
float tt = 0;

Loading…
Cancel
Save