Commit 424bc609 authored by noob's avatar noob

Retina module is now parallelized thanks to the TBB library. Speed increase…

Retina module is now parallelized thanks to the TBB library. Speed increase expected on multicore plateforms
parent c78884c7
......@@ -316,28 +316,50 @@ void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const std::valarray
_spatiotemporalLPfilter(get_data(inputFrame), &_filterOutput[0]);
_localLuminanceAdaptation(get_data(inputFrame), &_filterOutput[0], &outputFrame[0]);
}
// local luminance adaptation of the input in regard of localLuminance buffer
void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame)
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
{
float meanLuminance=0;
const float *luminancePTR=inputFrame;
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
meanLuminance+=*(luminancePTR++);
meanLuminance/=_filterOutput.getNBpixels();
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
_localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false);
/* const float *localLuminancePTR=localLuminance;
float *inputOutputFramePTR=inputOutputFrame;
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
{
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
*(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0+0.00000000001);
}
*/
}
updateCompressionParameter(meanLuminance);
// local luminance adaptation of the input in regard of localLuminance buffer
void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean)
{
if (updateLuminanceMean)
{ float meanLuminance=0;
const float *luminancePTR=inputFrame;
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
meanLuminance+=*(luminancePTR++);
meanLuminance/=_filterOutput.getNBpixels();
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
updateCompressionParameter(meanLuminance);
}
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner());
#else
//std::cout<<meanLuminance<<std::endl;
const float *localLuminancePTR=localLuminance;
const float *inputFramePTR=inputFrame;
float *outputFramePTR=outputFrame;
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR)
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
{
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
// TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...
*(outputFramePTR++) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001f);
*(outputFramePTR) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
//std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
}
#endif
}
// local adaptation applied on a range of values which can be positive and negative
......@@ -355,27 +377,6 @@ void BasicRetinaFilter::_localLuminanceAdaptationPosNegValues(const float *input
}
}
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
{
/*float meanLuminance=0;
const float *luminancePTR=inputOutputFrame;
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
meanLuminance+=*(luminancePTR++);
meanLuminance/=_filterOutput.getNBpixels();
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
updateCompressionParameter(meanLuminance);
*/
const float *localLuminancePTR=localLuminance;
float *inputOutputFramePTR=inputOutputFrame;
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
{
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
*(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0);
}
}
///////////////////////////////////////////////////////////////////////
/// Spatio temporal Low Pass filter functions
// run LP filter and save result in the basic retina element buffer
......@@ -465,7 +466,9 @@ void BasicRetinaFilter::_horizontalCausalFilter(float *outputFrame, unsigned int
// horizontal causal filter which adds the input inside
void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
//#pragma omp parallel for
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner());
#else
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowStart+IDrow)*_filterOutput.getNBcolumns();
......@@ -477,14 +480,16 @@ void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame
*(outputPTR++) = result;
}
}
#endif
}
// horizontal anticausal filter (basic way, no add on)
void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
//#pragma omp parallel for
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
#else
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(_filterOutput.getNBcolumns())-1;
......@@ -495,9 +500,9 @@ void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned
*(outputPTR--) = result;
}
}
#endif
}
// horizontal anticausal filter which multiplies the output by _gain
void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
......@@ -518,8 +523,10 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame,
// vertical anticausal filter
void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
//#pragma omp parallel for
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
#else
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
......@@ -532,6 +539,7 @@ void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int I
}
}
#endif
}
......@@ -558,7 +566,10 @@ void BasicRetinaFilter::_verticalAnticausalFilter(float *outputFrame, unsigned i
// vertical anticausal filter which multiplies the output by _gain
void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner());
#else
float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
//#pragma omp parallel for
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
......@@ -573,7 +584,7 @@ void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, u
}
}
#endif
}
/////////////////////////////////////////
......@@ -745,8 +756,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(float *inputOutputFram
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
_horizontalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
_horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
_verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
_verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
_verticalAnticausalFilter_Irregular_multGain(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
}
......@@ -765,8 +776,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(const float *inputFram
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
_horizontalCausalFilter_Irregular_addInput(inputFrame, outputFrame, 0, (int)_filterOutput.getNBrows());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows());
_verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
_verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
_verticalAnticausalFilter_Irregular_multGain(outputFrame, 0, (int)_filterOutput.getNBcolumns());
}
......@@ -806,10 +817,13 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float *
}
// horizontal anticausal filter (basic way, no add on)
void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register const float* spatialConstantPTR=&_progressiveSpatialConstant[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1;
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
......@@ -820,18 +834,21 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame
*(outputPTR--) = result;
}
}
#endif
}
// vertical anticausal filter
void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register const float *spatialConstantPTR=&_progressiveSpatialConstant[0]+IDcolumn;
register const float *spatialConstantPTR=spatialConstantBuffer+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + *(spatialConstantPTR) * result;
......@@ -840,6 +857,7 @@ void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsi
spatialConstantPTR+=_filterOutput.getNBcolumns();
}
}
#endif
}
// vertical anticausal filter which multiplies the output by _gain
......
......@@ -393,48 +393,261 @@ protected:
// LP filter that squares the input and computes the output ONLY on the areas where the integrationAreas map are TRUE
void _localSquaringSpatioTemporalLPfilter(const float *inputFrame, float *LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex=0);
// local luminance adaptation of the input in regard of localLuminance buffer
void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame);
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
// local adaptation applied on a range of values which can be positive and negative
void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
// local luminance adaptation of the input in regard of localLuminance buffer
void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean=true);
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
// local adaptation applied on a range of values which can be positive and negative
void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
//////////////////////////////////////////////////////////////
// 1D directional filters used for the 2D low pass filtering
// 1D filters with image input
void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// 1D filters with image input that is squared in the function
void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// vertical anticausal filter that returns the mean value of its result
float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// most simple functions: only perform 1D filtering with output=input (no add on)
void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// perform 1D filtering with output with varrying spatial coefficient
void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// 1D filters in which the output is multiplied by _gain
void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
// LP filter on specific parts of the picture instead of all the image
// same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
// 1D filters with image input
void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// 1D filters with image input that is squared in the function // parallelized with TBB
void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// vertical anticausal filter that returns the mean value of its result
float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// most simple functions: only perform 1D filtering with output=input (no add on)
void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); // parallelized with TBB
void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // parallelized with TBB
void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// perform 1D filtering with output with varrying spatial coefficient
void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer); // parallelized with TBB
void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer); // parallelized with TBB
void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// 1D filters in which the output is multiplied by _gain
void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output // parallelized with TBB
void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
// LP filter on specific parts of the picture instead of all the image
// same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
#define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout
class Parallel_horizontalAnticausalFilter
{
private:
float *outputFrame;
const unsigned int IDrowEnd, nbColumns;
const float filterParam_a;
public:
// constructor which takes the input image pointer reference reference and limits
Parallel_horizontalAnticausalFilter(float *bufferToProcess, const unsigned int idEnd, const unsigned int nbCols, const float a )
:outputFrame(bufferToProcess), IDrowEnd(idEnd), nbColumns(nbCols), filterParam_a(a)
{
#ifdef DEBUG_TBB
std::cout<<"Parallel_horizontalAnticausalFilter::Parallel_horizontalAnticausalFilter :"
<<"\n\t idEnd="<<IDrowEnd
<<"\n\t nbCols="<<nbColumns
<<"\n\t filterParam="<<filterParam_a
<<std::endl;
#endif
}
void operator()( const tbb::blocked_range<size_t>& r ) const {
#ifdef DEBUG_TBB
std::cout<<"Parallel_horizontalAnticausalFilter::operator() :"
<<"\n\t range size="<<r.size()
<<"\n\t first index="<<r.begin()
//<<"\n\t last index="<<filterParam
<<std::endl;
#endif
for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(outputPTR)+ filterParam_a* result;
*(outputPTR--) = result;
}
}
}
};
class Parallel_horizontalCausalFilter_addInput
{
private:
const float *inputFrame;
float *outputFrame;
const unsigned int IDrowStart, nbColumns;
const float filterParam_a, filterParam_tau;
public:
Parallel_horizontalCausalFilter_addInput(const float *bufferToAddAsInputProcess, float *bufferToProcess, const unsigned int idStart, const unsigned int nbCols, const float a, const float tau)
:inputFrame(bufferToAddAsInputProcess), outputFrame(bufferToProcess), IDrowStart(idStart), nbColumns(nbCols), filterParam_a(a), filterParam_tau(tau){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowStart+IDrow)*nbColumns;
register const float* inputPTR=inputFrame+(IDrowStart+IDrow)*nbColumns;
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(inputPTR++) + filterParam_tau**(outputPTR)+ filterParam_a* result;
*(outputPTR++) = result;
}
}
}
};
class Parallel_verticalCausalFilter
{
private:
float *outputFrame;
const unsigned int nbRows, nbColumns;
const float filterParam_a;
public:
Parallel_verticalCausalFilter(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a )
:outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + filterParam_a * result;
*(outputPTR) = result;
outputPTR+=nbColumns;
}
}
}
};
class Parallel_verticalAnticausalFilter_multGain
{
private:
float *outputFrame;
const unsigned int nbRows, nbColumns;
const float filterParam_a, filterParam_gain;
public:
Parallel_verticalAnticausalFilter_multGain(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a, const float gain)
:outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a), filterParam_gain(gain){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=offset+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + filterParam_a * result;
*(outputPTR) = filterParam_gain*result;
outputPTR-=nbColumns;
}
}
}
};
class Parallel_localAdaptation
{
private:
const float *localLuminance, *inputFrame;
float *outputFrame;
const float localLuminanceFactor, localLuminanceAddon, maxInputValue;
public:
Parallel_localAdaptation(const float *localLum, const float *inputImg, float *bufferToProcess, const float localLuminanceFact, const float localLuminanceAdd, const float maxInputVal)
:localLuminance(localLum), inputFrame(inputImg),outputFrame(bufferToProcess), localLuminanceFactor(localLuminanceFact), localLuminanceAddon(localLuminanceAdd), maxInputValue(maxInputVal) {};
void operator()( const tbb::blocked_range<size_t>& r ) const {
const float *localLuminancePTR=localLuminance+r.begin();
const float *inputFramePTR=inputFrame+r.begin();
float *outputFramePTR=outputFrame+r.begin();
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
{
float X0=*(localLuminancePTR++)*localLuminanceFactor+localLuminanceAddon;
// TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...
*(outputFramePTR) = (maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
//std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
}
}
};
//////////////////////////////////////////
/// Specific filtering methods which manage non const spatial filtering parameter (used By retinacolor and LogProjectors)
class Parallel_horizontalAnticausalFilter_Irregular
{
private:
float *outputFrame;
const float *spatialConstantBuffer;
const unsigned int IDrowEnd, nbColumns;
public:
Parallel_horizontalAnticausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int idEnd, const unsigned int nbCols)
:outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), IDrowEnd(idEnd), nbColumns(nbCols){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
register const float* spatialConstantPTR=spatialConstantBuffer+(IDrowEnd-IDrow)*(nbColumns)-1;
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(outputPTR)+ *(spatialConstantPTR--)* result;
*(outputPTR--) = result;
}
}
}
};
class Parallel_verticalCausalFilter_Irregular
{
private:
float *outputFrame;
const float *spatialConstantBuffer;
const unsigned int nbRows, nbColumns;
public:
Parallel_verticalCausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int nbRws, const unsigned int nbCols)
:outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), nbRows(nbRws), nbColumns(nbCols){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register const float* spatialConstantPTR=spatialConstantBuffer+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + *(spatialConstantPTR) * result;
*(outputPTR) = result;
outputPTR+=nbColumns;
spatialConstantPTR+=nbColumns;
}
}
}
};
#endif
};
......
......@@ -153,6 +153,9 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons
void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner());
#else
register const float *OPL_ON_PTR=OPL_ON;
register const float *OPL_OFF_PTR=OPL_OFF;
register float *previousInput_ON_PTR= &_previousInput_ON[0];
......@@ -175,6 +178,7 @@ void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
}
#endif
}
// launch filter that runs all the IPL filter
......
......@@ -190,10 +190,52 @@ private:
// varialbles
float _temporalCoefficient;
// amacrine cells filter : high pass temporal filter
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
// amacrine cells filter : high pass temporal filter
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
class Parallel_amacrineCellsComputing
{
private:
const float *OPL_ON, *OPL_OFF;
float *previousInput_ON, *previousInput_OFF, *amacrinCellsTempOutput_ON, *amacrinCellsTempOutput_OFF;
const float temporalCoefficient;
public:
Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal)
:OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
register const float *OPL_ON_PTR=OPL_ON+r.begin();
register const float *OPL_OFF_PTR=OPL_OFF+r.begin();
register float *previousInput_ON_PTR= previousInput_ON+r.begin();
register float *previousInput_OFF_PTR= previousInput_OFF+r.begin();
register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin();
register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin();
for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel)
{
/* Compute ON and OFF amacrin cells high pass temporal filter */
float magnoXonPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_ON_PTR+ *OPL_ON_PTR-*previousInput_ON_PTR);
*(amacrinCellsTempOutput_ON_PTR++)=((float)(magnoXonPixelResult>0))*magnoXonPixelResult;
float magnoXoffPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_OFF_PTR+ *OPL_OFF_PTR-*previousInput_OFF_PTR);
*(amacrinCellsTempOutput_OFF_PTR++)=((float)(magnoXoffPixelResult>0))*magnoXoffPixelResult;
/* prepare next loop */
*(previousInput_ON_PTR++)=*(OPL_ON_PTR++);
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
}
}
};
#endif
};
}
......
......@@ -199,17 +199,20 @@ const std::valarray<float> &ParvoRetinaFilter::runFilter(const std::valarray<flo
return (*_parvocellularOutputONminusOFF);
}
void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requires many buffer accesses, parallelizing can increase bandwith & core efficacy
{
// loop that makes the difference between photoreceptor cells output and horizontal cells
// positive part goes on the ON way, negative pat goes on the OFF way
register float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
register float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
register float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
register float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
register float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
register float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner());
#else
float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel)
......@@ -222,6 +225,7 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
}
#endif
}
}
......@@ -216,6 +216,45 @@ private:
// private functions
void _OPL_OnOffWaysComputing();
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
class Parallel_OPL_OnOffWaysComputing
{
private:
float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF;
public:
Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR)
:photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin();
float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin();
float *bipolarCellsON_PTR = bipolarCellsON+r.begin();
float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin();
float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin();
float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin();
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel)
{
float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++);
// test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over
float isPositive=(float) (pixelDifference>0.0f);
// ON and OFF channels writing step
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
}
}
};
#endif
};
}
#endif
......
......@@ -89,7 +89,7 @@ RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns
_demultiplexedColorFrame(NBrows*NBcolumns*3),
_chrominance(NBrows*NBcolumns*3),
_colorLocalDensity(NBrows*NBcolumns*3),
_imageGradient(NBrows*NBcolumns*3)
_imageGradient(NBrows*NBcolumns*2)
{
// link to parent buffers (let's recycle !)
_luminance=&_filterOutput;
......@@ -126,12 +126,12 @@ RetinaColor::~RetinaColor()
void RetinaColor::clearAllBuffers()
{
BasicRetinaFilter::clearAllBuffers();
_tempMultiplexedFrame=0;
_demultiplexedTempBuffer=0;
_tempMultiplexedFrame=0.f;
_demultiplexedTempBuffer=0.f;
_demultiplexedColorFrame=0;
_chrominance=0;
_imageGradient=1;
_demultiplexedColorFrame=0.f;
_chrominance=0.f;
_imageGradient=0.57f;
}
/**
......@@ -149,7 +149,7 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns
_demultiplexedColorFrame.resize(NBrows*NBcolumns*3);
_chrominance.resize(NBrows*NBcolumns*3);
_colorLocalDensity.resize(NBrows*NBcolumns*3);
_imageGradient.resize(NBrows*NBcolumns*3);
_imageGradient.resize(NBrows*NBcolumns*2);
// link to parent buffers (let's recycle !)
_luminance=&_filterOutput;
......@@ -325,15 +325,15 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
}else
{
register const float *multiplexedColorFramePTR1= get_data(multiplexedColorFrame);
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR1)
register const float *multiplexedColorFramePTR= get_data(multiplexedColorFrame);
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR)
{
// normalize by photoreceptors density
float Cr=*(chrominancePTR)*_colorLocalDensity[indexc];
float Cg=*(chrominancePTR+_filterOutput.getNBpixels())*_colorLocalDensity[indexc+_filterOutput.getNBpixels()];
float Cb=*(chrominancePTR+_filterOutput.getDoubleNBpixels())*_colorLocalDensity[indexc+_filterOutput.getDoubleNBpixels()];
*luminance=(Cr+Cg+Cb)*_pG;
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR1 - *luminance;
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR - *luminance;
}
......@@ -349,8 +349,9 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getNBpixels());
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getDoubleNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getDoubleNBpixels());
for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage
_demultiplexedColorFrame[index] /= _chrominance[index];
/* for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage
_demultiplexedColorFrame[index] /= _chrominance[index];*/
_demultiplexedColorFrame/=_chrominance; // more optimal ;o)
// compute and substract the residual luminance
for (unsigned int index=0; index<_filterOutput.getNBpixels() ; ++index)
......@@ -432,6 +433,9 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
if (inputOutputBuffer==NULL)
inputOutputBuffer= &_demultiplexedColorFrame[0];
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner());
#else
register float *inputOutputBufferPTR=inputOutputBuffer;
for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR)
{
......@@ -440,6 +444,7 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
else if (*inputOutputBufferPTR<0)
*inputOutputBufferPTR=0;
}
#endif
//std::cout<<"RetinaColor::...normalizing RGB frame OK"<<std::endl;
}
......@@ -535,8 +540,8 @@ void RetinaColor::_applyRIFfilter(const float *sourceBuffer, float *destinationB
void RetinaColor::_getNormalizedContoursImage(const float *inputFrame, float *outputFrame)
{
float maxValue=0;
float normalisationFactor=1.f/3;
float maxValue=0.f;
float normalisationFactor=1.f/3.f;
for (unsigned int indexr=1 ; indexr<_filterOutput.getNBrows()-1; ++indexr)
{
for (unsigned int indexc=1 ; indexc<_filterOutput.getNBcolumns()-1; ++indexc)
......@@ -564,19 +569,23 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu
_gain = (1-0.57f)*(1-0.57f)*(1-0.06f)*(1-0.06f);
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
// -> horizontal filters work with the first layer of imageGradient
_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, 0, _filterOutput.getNBrows());
_adaptiveHorizontalAnticausalFilter(outputFrame, 0, _filterOutput.getNBrows());
_adaptiveVerticalCausalFilter(outputFrame, 0, _filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBrows(), &_imageGradient[0]);
// -> horizontal filters work with the second layer of imageGradient
_verticalCausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBcolumns(), &_imageGradient[0]+_filterOutput.getNBpixels());
_adaptiveVerticalAnticausalFilter_multGain(outputFrame, 0, _filterOutput.getNBcolumns());
}
// horizontal causal filter which adds the input inside
// horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel
void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns();
register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns();
register float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
register const float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float result=0;
......@@ -589,51 +598,17 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra
}
// std::cout<<" "<<std::endl;
}
#endif
}
// horizontal anticausal filter (basic way, no add on)
void RetinaColor::_adaptiveHorizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register float *imageGradientPTR= &_imageGradient[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float result=0;
for (unsigned int index=0; index<_filterOutput.getNBcolumns(); ++index)
{
result = *(outputPTR)+ (*imageGradientPTR)* result;
*(outputPTR--) = result;
--imageGradientPTR;
}
}
}
// vertical anticausal filter
void RetinaColor::_adaptiveVerticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register float *imageGradientPTR= &_imageGradient[0]+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
*(outputPTR) = result;
outputPTR+=_filterOutput.getNBcolumns();
imageGradientPTR+=_filterOutput.getNBcolumns();
}
}
}
// vertical anticausal filter which multiplies the output by _gain
// vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer
void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner());
#else
float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns();
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
......@@ -642,12 +617,13 @@ void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame,
register float *imageGradientPTR=gradOffset+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
result = *(outputPTR) + (*(imageGradientPTR)) * result;
*(outputPTR) = _gain*result;
outputPTR-=_filterOutput.getNBcolumns();
imageGradientPTR-=_filterOutput.getNBcolumns();
}
}
#endif
}
///////////////////////////
......
......@@ -248,9 +248,7 @@ protected:
void _getNormalizedContoursImage(const float *inputFrame, float *outputFrame);
// -> special adaptive filters dedicated to low pass filtering on the chrominance (skeeps filtering on the edges)
void _adaptiveSpatialLPfilter(const float *inputFrame, float *outputFrame);
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
void _adaptiveHorizontalAnticausalFilter(float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
void _adaptiveVerticalCausalFilter(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); // TBB parallelized
void _adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
void _computeGradient(const float *luminance);
void _normalizeOutputs_0_maxOutputValue(void);
......@@ -258,6 +256,84 @@ protected:
// color space transform
void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable);
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
/* Template :
class
{
private:
public:
Parallel_()
: {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
}
}:
*/
class Parallel_adaptiveHorizontalCausalFilter_addInput
{
private:
float *outputFrame;
const float *inputFrame, *imageGradient;
const unsigned int nbColumns;
public:
Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols)
:outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {};
void operator()( const tbb::blocked_range<size_t>& r ) const {
register float* outputPTR=outputFrame+r.begin()*nbColumns;
register const float* inputPTR=inputFrame+r.begin()*nbColumns;
register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns;
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(inputPTR++) + (*imageGradientPTR++)* result;
*(outputPTR++) = result;
}
}
}
};
class Parallel_adaptiveVerticalAnticausalFilter_multGain
{
private:
float *outputFrame;
const float *imageGradient;
const unsigned int nbRows, nbColumns;
const float filterParam_gain;
public:
Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float gain)
:outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns;
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=offset+IDcolumn;
register const float *imageGradientPTR=gradOffset+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + *(imageGradientPTR) * result;
*(outputPTR) = filterParam_gain*result;
outputPTR-=nbColumns;
imageGradientPTR-=nbColumns;
}
}
}
};
#endif
};
}
......
......@@ -70,6 +70,38 @@
#include <iostream>
#include <cmath>
//// If TBB is used
// ==> then include required includes
#ifdef HAVE_TBB
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
// ==> declare usefull generic tools
template <class type>
class Parallel_clipBufferValues
{
private:
type *bufferToClip;
const type minValue, maxValue;
public:
Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max)
: bufferToClip(bufferToProcess), minValue(min), maxValue(max){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
register type *inputOutputBufferPTR=bufferToClip+r.begin();
for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR)
{
if (*inputOutputBufferPTR>maxValue)
*inputOutputBufferPTR=maxValue;
else if (*inputOutputBufferPTR<minValue)
*inputOutputBufferPTR=minValue;
}
}
};
#endif
//#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
namespace cv
......@@ -351,21 +383,25 @@ public:
}
}
std::cout<<"Tdebug"<<std::endl;
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
// clipping values outside than the updated thresholds
bufferPTR=this->Buffer();
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
{
if (*bufferPTR<updatedLowValue)
*bufferPTR=updatedLowValue;
else if (*bufferPTR>updatedHighValue)
*bufferPTR=updatedHighValue;
}
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
std::cout<<"Tdebug"<<std::endl;
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
// clipping values outside than the updated thresholds
bufferPTR=this->Buffer();
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner());
#else
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
{
if (*bufferPTR<updatedLowValue)
*bufferPTR=updatedLowValue;
else if (*bufferPTR>updatedHighValue)
*bufferPTR=updatedHighValue;
}
#endif
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment