diff --git a/modules/contrib/src/basicretinafilter.cpp b/modules/contrib/src/basicretinafilter.cpp index 06320f8117..a4270aa74d 100644 --- a/modules/contrib/src/basicretinafilter.cpp +++ b/modules/contrib/src/basicretinafilter.cpp @@ -316,28 +316,50 @@ void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const std::valarray _spatiotemporalLPfilter(get_data(inputFrame), &_filterOutput[0]); _localLuminanceAdaptation(get_data(inputFrame), &_filterOutput[0], &outputFrame[0]); } -// local luminance adaptation of the input in regard of localLuminance buffer -void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame) + +// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output +void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance) { - float meanLuminance=0; - const float *luminancePTR=inputFrame; - for (unsigned int i=0;i<_filterOutput.getNBpixels();++i) - meanLuminance+=*(luminancePTR++); - meanLuminance/=_filterOutput.getNBpixels(); - //float tempMeanValue=meanLuminance+_meanInputValue*_tau; + _localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false); + + /* const float *localLuminancePTR=localLuminance; + float *inputOutputFramePTR=inputOutputFrame; + + for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR) + { + float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon; + *(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0+0.00000000001); + } + */ +} - updateCompressionParameter(meanLuminance); +// local luminance adaptation of the input in regard of localLuminance buffer +void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean) +{ + if (updateLuminanceMean) + { float meanLuminance=0; + const float *luminancePTR=inputFrame; + for (unsigned int i=0;i<_filterOutput.getNBpixels();++i) + meanLuminance+=*(luminancePTR++); + meanLuminance/=_filterOutput.getNBpixels(); + //float tempMeanValue=meanLuminance+_meanInputValue*_tau; + updateCompressionParameter(meanLuminance); + } +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner()); +#else //std::cout<<meanLuminance<<std::endl; const float *localLuminancePTR=localLuminance; const float *inputFramePTR=inputFrame; float *outputFramePTR=outputFrame; - for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR) + for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR) { float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon; // TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values... - *(outputFramePTR++) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001f); + *(outputFramePTR) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001); //std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]); } +#endif } // local adaptation applied on a range of values which can be positive and negative @@ -355,27 +377,6 @@ void BasicRetinaFilter::_localLuminanceAdaptationPosNegValues(const float *input } } -// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output -void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance) -{ - /*float meanLuminance=0; - const float *luminancePTR=inputOutputFrame; - for (unsigned int i=0;i<_filterOutput.getNBpixels();++i) - meanLuminance+=*(luminancePTR++); - meanLuminance/=_filterOutput.getNBpixels(); - //float tempMeanValue=meanLuminance+_meanInputValue*_tau; - - updateCompressionParameter(meanLuminance); - */ - const float *localLuminancePTR=localLuminance; - float *inputOutputFramePTR=inputOutputFrame; - - for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR) - { - float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon; - *(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0); - } -} /////////////////////////////////////////////////////////////////////// /// Spatio temporal Low Pass filter functions // run LP filter and save result in the basic retina element buffer @@ -465,7 +466,9 @@ void BasicRetinaFilter::_horizontalCausalFilter(float *outputFrame, unsigned int // horizontal causal filter which adds the input inside void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { - //#pragma omp parallel for +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner()); +#else for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) { register float* outputPTR=outputFrame+(IDrowStart+IDrow)*_filterOutput.getNBcolumns(); @@ -477,14 +480,16 @@ void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame *(outputPTR++) = result; } } - +#endif } // horizontal anticausal filter (basic way, no add on) void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { - //#pragma omp parallel for +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner()); +#else for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) { register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(_filterOutput.getNBcolumns())-1; @@ -495,9 +500,9 @@ void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned *(outputPTR--) = result; } } - - +#endif } + // horizontal anticausal filter which multiplies the output by _gain void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { @@ -518,8 +523,10 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, // vertical anticausal filter void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) { - //#pragma omp parallel for - for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner()); +#else + for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) { register float result=0; register float *outputPTR=outputFrame+IDcolumn; @@ -532,6 +539,7 @@ void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int I } } +#endif } @@ -558,7 +566,10 @@ void BasicRetinaFilter::_verticalAnticausalFilter(float *outputFrame, unsigned i // vertical anticausal filter which multiplies the output by _gain void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) { - float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner()); +#else + float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); //#pragma omp parallel for for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) { @@ -573,7 +584,7 @@ void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, u } } - +#endif } ///////////////////////////////////////// @@ -745,8 +756,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(float *inputOutputFram // launch the serie of 1D directional filters in order to compute the 2D low pass filter _horizontalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows()); - _horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows()); - _verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns()); + _horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]); + _verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]); _verticalAnticausalFilter_Irregular_multGain(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns()); } @@ -765,8 +776,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(const float *inputFram // launch the serie of 1D directional filters in order to compute the 2D low pass filter _horizontalCausalFilter_Irregular_addInput(inputFrame, outputFrame, 0, (int)_filterOutput.getNBrows()); - _horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows()); - _verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns()); + _horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]); + _verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]); _verticalAnticausalFilter_Irregular_multGain(outputFrame, 0, (int)_filterOutput.getNBcolumns()); } @@ -806,10 +817,13 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float * } // horizontal anticausal filter (basic way, no add on) -void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) +void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer) { +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#else register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1; - register const float* spatialConstantPTR=&_progressiveSpatialConstant[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1; + register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1; for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) { @@ -820,18 +834,21 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame *(outputPTR--) = result; } } - +#endif } // vertical anticausal filter -void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) +void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer) { +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#else for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) { register float result=0; register float *outputPTR=outputFrame+IDcolumn; - register const float *spatialConstantPTR=&_progressiveSpatialConstant[0]+IDcolumn; + register const float *spatialConstantPTR=spatialConstantBuffer+IDcolumn; for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index) { result = *(outputPTR) + *(spatialConstantPTR) * result; @@ -840,6 +857,7 @@ void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsi spatialConstantPTR+=_filterOutput.getNBcolumns(); } } +#endif } // vertical anticausal filter which multiplies the output by _gain diff --git a/modules/contrib/src/basicretinafilter.hpp b/modules/contrib/src/basicretinafilter.hpp index a8f61f1bc3..c0cf100a4c 100644 --- a/modules/contrib/src/basicretinafilter.hpp +++ b/modules/contrib/src/basicretinafilter.hpp @@ -393,48 +393,261 @@ protected: // LP filter that squares the input and computes the output ONLY on the areas where the integrationAreas map are TRUE void _localSquaringSpatioTemporalLPfilter(const float *inputFrame, float *LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex=0); - // local luminance adaptation of the input in regard of localLuminance buffer - void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame); - // local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output - void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance); - // local adaptation applied on a range of values which can be positive and negative - void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame); + // local luminance adaptation of the input in regard of localLuminance buffer + void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean=true); + // local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output + void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance); + // local adaptation applied on a range of values which can be positive and negative + void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame); ////////////////////////////////////////////////////////////// // 1D directional filters used for the 2D low pass filtering - // 1D filters with image input - void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - // 1D filters with image input that is squared in the function - void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - // vertical anticausal filter that returns the mean value of its result - float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); - - // most simple functions: only perform 1D filtering with output=input (no add on) - void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); - void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); - - // perform 1D filtering with output with varrying spatial coefficient - void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); - void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); - void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); - - - // 1D filters in which the output is multiplied by _gain - void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output - void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output - - // LP filter on specific parts of the picture instead of all the image - // same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output... - void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas); - void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas); - void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); - void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output + // 1D filters with image input + void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); + // 1D filters with image input that is squared in the function // parallelized with TBB + void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); + // vertical anticausal filter that returns the mean value of its result + float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); + + // most simple functions: only perform 1D filtering with output=input (no add on) + void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); + void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); // parallelized with TBB + void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // parallelized with TBB + void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); + + // perform 1D filtering with output with varrying spatial coefficient + void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); + void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); + void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer); // parallelized with TBB + void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer); // parallelized with TBB + void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); + + + // 1D filters in which the output is multiplied by _gain + void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output // parallelized with TBB + void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output + + // LP filter on specific parts of the picture instead of all the image + // same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output... + void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas); + void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas); + void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); + void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output + +#ifdef HAVE_TBB +/****************************************************** +** IF TBB is useable, then, main loops are parallelized using these functors +** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary +** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised +** ==> functors constructors can differ from the parameters used with their related serial functions +*/ + +#define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout + class Parallel_horizontalAnticausalFilter + { + private: + float *outputFrame; + const unsigned int IDrowEnd, nbColumns; + const float filterParam_a; + public: + // constructor which takes the input image pointer reference reference and limits + Parallel_horizontalAnticausalFilter(float *bufferToProcess, const unsigned int idEnd, const unsigned int nbCols, const float a ) + :outputFrame(bufferToProcess), IDrowEnd(idEnd), nbColumns(nbCols), filterParam_a(a) + { +#ifdef DEBUG_TBB + std::cout<<"Parallel_horizontalAnticausalFilter::Parallel_horizontalAnticausalFilter :" + <<"\n\t idEnd="<<IDrowEnd + <<"\n\t nbCols="<<nbColumns + <<"\n\t filterParam="<<filterParam_a + <<std::endl; +#endif + } + + void operator()( const tbb::blocked_range<size_t>& r ) const { + +#ifdef DEBUG_TBB + std::cout<<"Parallel_horizontalAnticausalFilter::operator() :" + <<"\n\t range size="<<r.size() + <<"\n\t first index="<<r.begin() + //<<"\n\t last index="<<filterParam + <<std::endl; +#endif + for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + { + register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1; + register float result=0; + for (unsigned int index=0; index<nbColumns; ++index) + { + result = *(outputPTR)+ filterParam_a* result; + *(outputPTR--) = result; + } + } + } + }; + + class Parallel_horizontalCausalFilter_addInput + { + private: + const float *inputFrame; + float *outputFrame; + const unsigned int IDrowStart, nbColumns; + const float filterParam_a, filterParam_tau; + public: + Parallel_horizontalCausalFilter_addInput(const float *bufferToAddAsInputProcess, float *bufferToProcess, const unsigned int idStart, const unsigned int nbCols, const float a, const float tau) + :inputFrame(bufferToAddAsInputProcess), outputFrame(bufferToProcess), IDrowStart(idStart), nbColumns(nbCols), filterParam_a(a), filterParam_tau(tau){} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + { + register float* outputPTR=outputFrame+(IDrowStart+IDrow)*nbColumns; + register const float* inputPTR=inputFrame+(IDrowStart+IDrow)*nbColumns; + register float result=0; + for (unsigned int index=0; index<nbColumns; ++index) + { + result = *(inputPTR++) + filterParam_tau**(outputPTR)+ filterParam_a* result; + *(outputPTR++) = result; + } + } + } + }; + + class Parallel_verticalCausalFilter + { + private: + float *outputFrame; + const unsigned int nbRows, nbColumns; + const float filterParam_a; + public: + Parallel_verticalCausalFilter(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a ) + :outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a){} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + { + register float result=0; + register float *outputPTR=outputFrame+IDcolumn; + + for (unsigned int index=0; index<nbRows; ++index) + { + result = *(outputPTR) + filterParam_a * result; + *(outputPTR) = result; + outputPTR+=nbColumns; + + } + } + } + }; + + class Parallel_verticalAnticausalFilter_multGain + { + private: + float *outputFrame; + const unsigned int nbRows, nbColumns; + const float filterParam_a, filterParam_gain; + public: + Parallel_verticalAnticausalFilter_multGain(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a, const float gain) + :outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a), filterParam_gain(gain){} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + float* offset=outputFrame+nbColumns*nbRows-nbColumns; + for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + { + register float result=0; + register float *outputPTR=offset+IDcolumn; + + for (unsigned int index=0; index<nbRows; ++index) + { + result = *(outputPTR) + filterParam_a * result; + *(outputPTR) = filterParam_gain*result; + outputPTR-=nbColumns; + + } + } + } + }; + + class Parallel_localAdaptation + { + private: + const float *localLuminance, *inputFrame; + float *outputFrame; + const float localLuminanceFactor, localLuminanceAddon, maxInputValue; + public: + Parallel_localAdaptation(const float *localLum, const float *inputImg, float *bufferToProcess, const float localLuminanceFact, const float localLuminanceAdd, const float maxInputVal) + :localLuminance(localLum), inputFrame(inputImg),outputFrame(bufferToProcess), localLuminanceFactor(localLuminanceFact), localLuminanceAddon(localLuminanceAdd), maxInputValue(maxInputVal) {}; + + void operator()( const tbb::blocked_range<size_t>& r ) const { + const float *localLuminancePTR=localLuminance+r.begin(); + const float *inputFramePTR=inputFrame+r.begin(); + float *outputFramePTR=outputFrame+r.begin(); + for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR) + { + float X0=*(localLuminancePTR++)*localLuminanceFactor+localLuminanceAddon; + // TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values... + *(outputFramePTR) = (maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001); + //std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]); + } + } + }; + +////////////////////////////////////////// +/// Specific filtering methods which manage non const spatial filtering parameter (used By retinacolor and LogProjectors) + class Parallel_horizontalAnticausalFilter_Irregular + { + private: + float *outputFrame; + const float *spatialConstantBuffer; + const unsigned int IDrowEnd, nbColumns; + public: + Parallel_horizontalAnticausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int idEnd, const unsigned int nbCols) + :outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), IDrowEnd(idEnd), nbColumns(nbCols){} + +void operator()( const tbb::blocked_range<size_t>& r ) const { + + for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + { + register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1; + register const float* spatialConstantPTR=spatialConstantBuffer+(IDrowEnd-IDrow)*(nbColumns)-1; + register float result=0; + for (unsigned int index=0; index<nbColumns; ++index) + { + result = *(outputPTR)+ *(spatialConstantPTR--)* result; + *(outputPTR--) = result; + } + } + } + }; + + class Parallel_verticalCausalFilter_Irregular + { + private: + float *outputFrame; + const float *spatialConstantBuffer; + const unsigned int nbRows, nbColumns; + public: + Parallel_verticalCausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int nbRws, const unsigned int nbCols) + :outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), nbRows(nbRws), nbColumns(nbCols){} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + { + register float result=0; + register float *outputPTR=outputFrame+IDcolumn; + register const float* spatialConstantPTR=spatialConstantBuffer+IDcolumn; + for (unsigned int index=0; index<nbRows; ++index) + { + result = *(outputPTR) + *(spatialConstantPTR) * result; + *(outputPTR) = result; + outputPTR+=nbColumns; + spatialConstantPTR+=nbColumns; + } + } + } + }; + +#endif }; diff --git a/modules/contrib/src/magnoretinafilter.cpp b/modules/contrib/src/magnoretinafilter.cpp index ea5fdf8037..4626133efa 100644 --- a/modules/contrib/src/magnoretinafilter.cpp +++ b/modules/contrib/src/magnoretinafilter.cpp @@ -153,6 +153,9 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF) { +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner()); +#else register const float *OPL_ON_PTR=OPL_ON; register const float *OPL_OFF_PTR=OPL_OFF; register float *previousInput_ON_PTR= &_previousInput_ON[0]; @@ -175,6 +178,7 @@ void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++); } +#endif } // launch filter that runs all the IPL filter diff --git a/modules/contrib/src/magnoretinafilter.hpp b/modules/contrib/src/magnoretinafilter.hpp index b5c9015ac6..daefb74485 100644 --- a/modules/contrib/src/magnoretinafilter.hpp +++ b/modules/contrib/src/magnoretinafilter.hpp @@ -190,10 +190,52 @@ private: // varialbles float _temporalCoefficient; - // amacrine cells filter : high pass temporal filter - void _amacrineCellsComputing(const float *ONinput, const float *OFFinput); - - + // amacrine cells filter : high pass temporal filter + void _amacrineCellsComputing(const float *ONinput, const float *OFFinput); +#ifdef HAVE_TBB +/****************************************************** +** IF TBB is useable, then, main loops are parallelized using these functors +** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary +** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised +** ==> functors constructors can differ from the parameters used with their related serial functions +*/ + class Parallel_amacrineCellsComputing + { + private: + const float *OPL_ON, *OPL_OFF; + float *previousInput_ON, *previousInput_OFF, *amacrinCellsTempOutput_ON, *amacrinCellsTempOutput_OFF; + const float temporalCoefficient; + public: + Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal) + :OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + register const float *OPL_ON_PTR=OPL_ON+r.begin(); + register const float *OPL_OFF_PTR=OPL_OFF+r.begin(); + register float *previousInput_ON_PTR= previousInput_ON+r.begin(); + register float *previousInput_OFF_PTR= previousInput_OFF+r.begin(); + register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin(); + register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin(); + + for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel) + { + + /* Compute ON and OFF amacrin cells high pass temporal filter */ + float magnoXonPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_ON_PTR+ *OPL_ON_PTR-*previousInput_ON_PTR); + *(amacrinCellsTempOutput_ON_PTR++)=((float)(magnoXonPixelResult>0))*magnoXonPixelResult; + + float magnoXoffPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_OFF_PTR+ *OPL_OFF_PTR-*previousInput_OFF_PTR); + *(amacrinCellsTempOutput_OFF_PTR++)=((float)(magnoXoffPixelResult>0))*magnoXoffPixelResult; + + /* prepare next loop */ + *(previousInput_ON_PTR++)=*(OPL_ON_PTR++); + *(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++); + + } + } + + }; +#endif }; } diff --git a/modules/contrib/src/parvoretinafilter.cpp b/modules/contrib/src/parvoretinafilter.cpp index a6cbf1b801..50d1b0ba68 100644 --- a/modules/contrib/src/parvoretinafilter.cpp +++ b/modules/contrib/src/parvoretinafilter.cpp @@ -199,17 +199,20 @@ const std::valarray<float> &ParvoRetinaFilter::runFilter(const std::valarray<flo return (*_parvocellularOutputONminusOFF); } -void ParvoRetinaFilter::_OPL_OnOffWaysComputing() +void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requires many buffer accesses, parallelizing can increase bandwith & core efficacy { // loop that makes the difference between photoreceptor cells output and horizontal cells // positive part goes on the ON way, negative pat goes on the OFF way - register float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0]; - register float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0]; - register float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0]; - register float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0]; - register float *parvocellularOutputON_PTR= &_parvocellularOutputON[0]; - register float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0]; +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner()); +#else + float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0]; + float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0]; + float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0]; + float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0]; + float *parvocellularOutputON_PTR= &_parvocellularOutputON[0]; + float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0]; // compute bipolar cells response equal to photoreceptors minus horizontal cells response // and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel) @@ -222,6 +225,7 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing() *(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference; *(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference; } +#endif } } diff --git a/modules/contrib/src/parvoretinafilter.hpp b/modules/contrib/src/parvoretinafilter.hpp index 54521b17b8..76f5506855 100644 --- a/modules/contrib/src/parvoretinafilter.hpp +++ b/modules/contrib/src/parvoretinafilter.hpp @@ -216,6 +216,45 @@ private: // private functions void _OPL_OnOffWaysComputing(); +#ifdef HAVE_TBB +/****************************************************** +** IF TBB is useable, then, main loops are parallelized using these functors +** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary +** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised +** ==> functors constructors can differ from the parameters used with their related serial functions +*/ + class Parallel_OPL_OnOffWaysComputing + { + private: + float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF; + public: + Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR) + :photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + // compute bipolar cells response equal to photoreceptors minus horizontal cells response + // and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result + float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin(); + float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin(); + float *bipolarCellsON_PTR = bipolarCellsON+r.begin(); + float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin(); + float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin(); + float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin(); + + for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel) + { + float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++); + // test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over + float isPositive=(float) (pixelDifference>0.0f); + + // ON and OFF channels writing step + *(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference; + *(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference; + } + } + }; +#endif + }; } #endif diff --git a/modules/contrib/src/retinacolor.cpp b/modules/contrib/src/retinacolor.cpp index f98b0507dc..b452956997 100644 --- a/modules/contrib/src/retinacolor.cpp +++ b/modules/contrib/src/retinacolor.cpp @@ -89,7 +89,7 @@ RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns _demultiplexedColorFrame(NBrows*NBcolumns*3), _chrominance(NBrows*NBcolumns*3), _colorLocalDensity(NBrows*NBcolumns*3), - _imageGradient(NBrows*NBcolumns*3) + _imageGradient(NBrows*NBcolumns*2) { // link to parent buffers (let's recycle !) _luminance=&_filterOutput; @@ -126,12 +126,12 @@ RetinaColor::~RetinaColor() void RetinaColor::clearAllBuffers() { BasicRetinaFilter::clearAllBuffers(); - _tempMultiplexedFrame=0; - _demultiplexedTempBuffer=0; + _tempMultiplexedFrame=0.f; + _demultiplexedTempBuffer=0.f; - _demultiplexedColorFrame=0; - _chrominance=0; - _imageGradient=1; + _demultiplexedColorFrame=0.f; + _chrominance=0.f; + _imageGradient=0.57f; } /** @@ -149,7 +149,7 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns _demultiplexedColorFrame.resize(NBrows*NBcolumns*3); _chrominance.resize(NBrows*NBcolumns*3); _colorLocalDensity.resize(NBrows*NBcolumns*3); - _imageGradient.resize(NBrows*NBcolumns*3); + _imageGradient.resize(NBrows*NBcolumns*2); // link to parent buffers (let's recycle !) _luminance=&_filterOutput; @@ -325,15 +325,15 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed }else { - register const float *multiplexedColorFramePTR1= get_data(multiplexedColorFrame); - for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR1) + register const float *multiplexedColorFramePTR= get_data(multiplexedColorFrame); + for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR) { // normalize by photoreceptors density float Cr=*(chrominancePTR)*_colorLocalDensity[indexc]; float Cg=*(chrominancePTR+_filterOutput.getNBpixels())*_colorLocalDensity[indexc+_filterOutput.getNBpixels()]; float Cb=*(chrominancePTR+_filterOutput.getDoubleNBpixels())*_colorLocalDensity[indexc+_filterOutput.getDoubleNBpixels()]; *luminance=(Cr+Cg+Cb)*_pG; - _demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR1 - *luminance; + _demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR - *luminance; } @@ -349,8 +349,9 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed _adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getNBpixels()); _adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getDoubleNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getDoubleNBpixels()); - for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage - _demultiplexedColorFrame[index] /= _chrominance[index]; +/* for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage + _demultiplexedColorFrame[index] /= _chrominance[index];*/ + _demultiplexedColorFrame/=_chrominance; // more optimal ;o) // compute and substract the residual luminance for (unsigned int index=0; index<_filterOutput.getNBpixels() ; ++index) @@ -432,6 +433,9 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const if (inputOutputBuffer==NULL) inputOutputBuffer= &_demultiplexedColorFrame[0]; +#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method + tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner()); +#else register float *inputOutputBufferPTR=inputOutputBuffer; for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR) { @@ -440,6 +444,7 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const else if (*inputOutputBufferPTR<0) *inputOutputBufferPTR=0; } +#endif //std::cout<<"RetinaColor::...normalizing RGB frame OK"<<std::endl; } @@ -535,8 +540,8 @@ void RetinaColor::_applyRIFfilter(const float *sourceBuffer, float *destinationB void RetinaColor::_getNormalizedContoursImage(const float *inputFrame, float *outputFrame) { - float maxValue=0; - float normalisationFactor=1.f/3; + float maxValue=0.f; + float normalisationFactor=1.f/3.f; for (unsigned int indexr=1 ; indexr<_filterOutput.getNBrows()-1; ++indexr) { for (unsigned int indexc=1 ; indexc<_filterOutput.getNBcolumns()-1; ++indexc) @@ -564,19 +569,23 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu _gain = (1-0.57f)*(1-0.57f)*(1-0.06f)*(1-0.06f); // launch the serie of 1D directional filters in order to compute the 2D low pass filter + // -> horizontal filters work with the first layer of imageGradient _adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, 0, _filterOutput.getNBrows()); - _adaptiveHorizontalAnticausalFilter(outputFrame, 0, _filterOutput.getNBrows()); - _adaptiveVerticalCausalFilter(outputFrame, 0, _filterOutput.getNBcolumns()); + _horizontalAnticausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBrows(), &_imageGradient[0]); + // -> horizontal filters work with the second layer of imageGradient + _verticalCausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBcolumns(), &_imageGradient[0]+_filterOutput.getNBpixels()); _adaptiveVerticalAnticausalFilter_multGain(outputFrame, 0, _filterOutput.getNBcolumns()); - } -// horizontal causal filter which adds the input inside +// horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#else register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns(); register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns(); - register float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns(); + register const float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns(); for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) { register float result=0; @@ -589,51 +598,17 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra } // std::cout<<" "<<std::endl; } +#endif } -// horizontal anticausal filter (basic way, no add on) -void RetinaColor::_adaptiveHorizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) -{ - register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1; - register float *imageGradientPTR= &_imageGradient[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1; - - for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) - { - register float result=0; - for (unsigned int index=0; index<_filterOutput.getNBcolumns(); ++index) - { - result = *(outputPTR)+ (*imageGradientPTR)* result; - *(outputPTR--) = result; - --imageGradientPTR; - } - } -} - -// vertical anticausal filter -void RetinaColor::_adaptiveVerticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) -{ - for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) - { - register float result=0; - register float *outputPTR=outputFrame+IDcolumn; - register float *imageGradientPTR= &_imageGradient[0]+IDcolumn; - - for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index) - { - result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result; - *(outputPTR) = result; - outputPTR+=_filterOutput.getNBcolumns(); - imageGradientPTR+=_filterOutput.getNBcolumns(); - - } - } -} - -// vertical anticausal filter which multiplies the output by _gain +// vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) { +#ifdef HAVE_TBB + tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner()); +#else float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); - float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); + float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns(); for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) { @@ -642,12 +617,13 @@ void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, register float *imageGradientPTR=gradOffset+IDcolumn; for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index) { - result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result; + result = *(outputPTR) + (*(imageGradientPTR)) * result; *(outputPTR) = _gain*result; outputPTR-=_filterOutput.getNBcolumns(); imageGradientPTR-=_filterOutput.getNBcolumns(); } } +#endif } /////////////////////////// diff --git a/modules/contrib/src/retinacolor.hpp b/modules/contrib/src/retinacolor.hpp index 622f9a09bb..ec5d060c56 100644 --- a/modules/contrib/src/retinacolor.hpp +++ b/modules/contrib/src/retinacolor.hpp @@ -248,9 +248,7 @@ protected: void _getNormalizedContoursImage(const float *inputFrame, float *outputFrame); // -> special adaptive filters dedicated to low pass filtering on the chrominance (skeeps filtering on the edges) void _adaptiveSpatialLPfilter(const float *inputFrame, float *outputFrame); - void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); - void _adaptiveHorizontalAnticausalFilter(float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); - void _adaptiveVerticalCausalFilter(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd); + void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); // TBB parallelized void _adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd); void _computeGradient(const float *luminance); void _normalizeOutputs_0_maxOutputValue(void); @@ -258,6 +256,84 @@ protected: // color space transform void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable); +#ifdef HAVE_TBB +/****************************************************** +** IF TBB is useable, then, main loops are parallelized using these functors +** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary +** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised +** ==> functors constructors can differ from the parameters used with their related serial functions +*/ + +/* Template : + class + { + private: + + public: + Parallel_() + : {} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + + } + }: +*/ + class Parallel_adaptiveHorizontalCausalFilter_addInput + { + private: + float *outputFrame; + const float *inputFrame, *imageGradient; + const unsigned int nbColumns; + public: + Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols) + :outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {}; + + void operator()( const tbb::blocked_range<size_t>& r ) const { + register float* outputPTR=outputFrame+r.begin()*nbColumns; + register const float* inputPTR=inputFrame+r.begin()*nbColumns; + register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns; + for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + { + register float result=0; + for (unsigned int index=0; index<nbColumns; ++index) + { + result = *(inputPTR++) + (*imageGradientPTR++)* result; + *(outputPTR++) = result; + } + } + } + }; + + class Parallel_adaptiveVerticalAnticausalFilter_multGain + { + private: + float *outputFrame; + const float *imageGradient; + const unsigned int nbRows, nbColumns; + const float filterParam_gain; + public: + Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float gain) + :outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + float* offset=outputFrame+nbColumns*nbRows-nbColumns; + const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns; + for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + { + register float result=0; + register float *outputPTR=offset+IDcolumn; + register const float *imageGradientPTR=gradOffset+IDcolumn; + for (unsigned int index=0; index<nbRows; ++index) + { + result = *(outputPTR) + *(imageGradientPTR) * result; + *(outputPTR) = filterParam_gain*result; + outputPTR-=nbColumns; + imageGradientPTR-=nbColumns; + } + } + } + }; +#endif }; } diff --git a/modules/contrib/src/templatebuffer.hpp b/modules/contrib/src/templatebuffer.hpp index 06a519ec6e..b119b95089 100644 --- a/modules/contrib/src/templatebuffer.hpp +++ b/modules/contrib/src/templatebuffer.hpp @@ -70,6 +70,38 @@ #include <iostream> #include <cmath> + +//// If TBB is used +// ==> then include required includes +#ifdef HAVE_TBB +#include "tbb/parallel_for.h" +#include "tbb/blocked_range.h" + +// ==> declare usefull generic tools +template <class type> +class Parallel_clipBufferValues +{ +private: + type *bufferToClip; + const type minValue, maxValue; + +public: + Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max) + : bufferToClip(bufferToProcess), minValue(min), maxValue(max){} + + void operator()( const tbb::blocked_range<size_t>& r ) const { + register type *inputOutputBufferPTR=bufferToClip+r.begin(); + for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR) + { + if (*inputOutputBufferPTR>maxValue) + *inputOutputBufferPTR=maxValue; + else if (*inputOutputBufferPTR<minValue) + *inputOutputBufferPTR=minValue; + } + } +}; +#endif + //#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information namespace cv @@ -351,21 +383,25 @@ public: } } - std::cout<<"Tdebug"<<std::endl; - std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl; - std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl; - std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl; - // clipping values outside than the updated thresholds - bufferPTR=this->Buffer(); - for (unsigned int i=0;i<this->size();++i, ++bufferPTR) - { - if (*bufferPTR<updatedLowValue) - *bufferPTR=updatedLowValue; - else if (*bufferPTR>updatedHighValue) - *bufferPTR=updatedHighValue; - } - - normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue); + std::cout<<"Tdebug"<<std::endl; + std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl; + std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl; + std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl; + // clipping values outside than the updated thresholds + bufferPTR=this->Buffer(); +#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method + tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner()); +#else + + for (unsigned int i=0;i<this->size();++i, ++bufferPTR) + { + if (*bufferPTR<updatedLowValue) + *bufferPTR=updatedLowValue; + else if (*bufferPTR>updatedHighValue) + *bufferPTR=updatedHighValue; + } +#endif + normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue); }