Commit 424bc609 authored by noob's avatar noob

Retina module is now parallelized thanks to the TBB library. Speed increase…

Retina module is now parallelized thanks to the TBB library. Speed increase expected on multicore plateforms
parent c78884c7
This diff is collapsed.
This diff is collapsed.
......@@ -153,6 +153,9 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons
void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner());
#else
register const float *OPL_ON_PTR=OPL_ON;
register const float *OPL_OFF_PTR=OPL_OFF;
register float *previousInput_ON_PTR= &_previousInput_ON[0];
......@@ -175,6 +178,7 @@ void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
}
#endif
}
// launch filter that runs all the IPL filter
......
......@@ -190,10 +190,52 @@ private:
// varialbles
float _temporalCoefficient;
// amacrine cells filter : high pass temporal filter
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
// amacrine cells filter : high pass temporal filter
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
class Parallel_amacrineCellsComputing
{
private:
const float *OPL_ON, *OPL_OFF;
float *previousInput_ON, *previousInput_OFF, *amacrinCellsTempOutput_ON, *amacrinCellsTempOutput_OFF;
const float temporalCoefficient;
public:
Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal)
:OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
register const float *OPL_ON_PTR=OPL_ON+r.begin();
register const float *OPL_OFF_PTR=OPL_OFF+r.begin();
register float *previousInput_ON_PTR= previousInput_ON+r.begin();
register float *previousInput_OFF_PTR= previousInput_OFF+r.begin();
register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin();
register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin();
for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel)
{
/* Compute ON and OFF amacrin cells high pass temporal filter */
float magnoXonPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_ON_PTR+ *OPL_ON_PTR-*previousInput_ON_PTR);
*(amacrinCellsTempOutput_ON_PTR++)=((float)(magnoXonPixelResult>0))*magnoXonPixelResult;
float magnoXoffPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_OFF_PTR+ *OPL_OFF_PTR-*previousInput_OFF_PTR);
*(amacrinCellsTempOutput_OFF_PTR++)=((float)(magnoXoffPixelResult>0))*magnoXoffPixelResult;
/* prepare next loop */
*(previousInput_ON_PTR++)=*(OPL_ON_PTR++);
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
}
}
};
#endif
};
}
......
......@@ -199,17 +199,20 @@ const std::valarray<float> &ParvoRetinaFilter::runFilter(const std::valarray<flo
return (*_parvocellularOutputONminusOFF);
}
void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requires many buffer accesses, parallelizing can increase bandwith & core efficacy
{
// loop that makes the difference between photoreceptor cells output and horizontal cells
// positive part goes on the ON way, negative pat goes on the OFF way
register float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
register float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
register float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
register float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
register float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
register float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner());
#else
float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel)
......@@ -222,6 +225,7 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
}
#endif
}
}
......@@ -216,6 +216,45 @@ private:
// private functions
void _OPL_OnOffWaysComputing();
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
class Parallel_OPL_OnOffWaysComputing
{
private:
float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF;
public:
Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR)
:photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin();
float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin();
float *bipolarCellsON_PTR = bipolarCellsON+r.begin();
float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin();
float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin();
float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin();
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel)
{
float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++);
// test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over
float isPositive=(float) (pixelDifference>0.0f);
// ON and OFF channels writing step
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
}
}
};
#endif
};
}
#endif
......
......@@ -89,7 +89,7 @@ RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns
_demultiplexedColorFrame(NBrows*NBcolumns*3),
_chrominance(NBrows*NBcolumns*3),
_colorLocalDensity(NBrows*NBcolumns*3),
_imageGradient(NBrows*NBcolumns*3)
_imageGradient(NBrows*NBcolumns*2)
{
// link to parent buffers (let's recycle !)
_luminance=&_filterOutput;
......@@ -126,12 +126,12 @@ RetinaColor::~RetinaColor()
void RetinaColor::clearAllBuffers()
{
BasicRetinaFilter::clearAllBuffers();
_tempMultiplexedFrame=0;
_demultiplexedTempBuffer=0;
_tempMultiplexedFrame=0.f;
_demultiplexedTempBuffer=0.f;
_demultiplexedColorFrame=0;
_chrominance=0;
_imageGradient=1;
_demultiplexedColorFrame=0.f;
_chrominance=0.f;
_imageGradient=0.57f;
}
/**
......@@ -149,7 +149,7 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns
_demultiplexedColorFrame.resize(NBrows*NBcolumns*3);
_chrominance.resize(NBrows*NBcolumns*3);
_colorLocalDensity.resize(NBrows*NBcolumns*3);
_imageGradient.resize(NBrows*NBcolumns*3);
_imageGradient.resize(NBrows*NBcolumns*2);
// link to parent buffers (let's recycle !)
_luminance=&_filterOutput;
......@@ -325,15 +325,15 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
}else
{
register const float *multiplexedColorFramePTR1= get_data(multiplexedColorFrame);
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR1)
register const float *multiplexedColorFramePTR= get_data(multiplexedColorFrame);
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR)
{
// normalize by photoreceptors density
float Cr=*(chrominancePTR)*_colorLocalDensity[indexc];
float Cg=*(chrominancePTR+_filterOutput.getNBpixels())*_colorLocalDensity[indexc+_filterOutput.getNBpixels()];
float Cb=*(chrominancePTR+_filterOutput.getDoubleNBpixels())*_colorLocalDensity[indexc+_filterOutput.getDoubleNBpixels()];
*luminance=(Cr+Cg+Cb)*_pG;
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR1 - *luminance;
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR - *luminance;
}
......@@ -349,8 +349,9 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getNBpixels());
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getDoubleNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getDoubleNBpixels());
for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage
_demultiplexedColorFrame[index] /= _chrominance[index];
/* for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage
_demultiplexedColorFrame[index] /= _chrominance[index];*/
_demultiplexedColorFrame/=_chrominance; // more optimal ;o)
// compute and substract the residual luminance
for (unsigned int index=0; index<_filterOutput.getNBpixels() ; ++index)
......@@ -432,6 +433,9 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
if (inputOutputBuffer==NULL)
inputOutputBuffer= &_demultiplexedColorFrame[0];
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner());
#else
register float *inputOutputBufferPTR=inputOutputBuffer;
for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR)
{
......@@ -440,6 +444,7 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
else if (*inputOutputBufferPTR<0)
*inputOutputBufferPTR=0;
}
#endif
//std::cout<<"RetinaColor::...normalizing RGB frame OK"<<std::endl;
}
......@@ -535,8 +540,8 @@ void RetinaColor::_applyRIFfilter(const float *sourceBuffer, float *destinationB
void RetinaColor::_getNormalizedContoursImage(const float *inputFrame, float *outputFrame)
{
float maxValue=0;
float normalisationFactor=1.f/3;
float maxValue=0.f;
float normalisationFactor=1.f/3.f;
for (unsigned int indexr=1 ; indexr<_filterOutput.getNBrows()-1; ++indexr)
{
for (unsigned int indexc=1 ; indexc<_filterOutput.getNBcolumns()-1; ++indexc)
......@@ -564,19 +569,23 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu
_gain = (1-0.57f)*(1-0.57f)*(1-0.06f)*(1-0.06f);
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
// -> horizontal filters work with the first layer of imageGradient
_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, 0, _filterOutput.getNBrows());
_adaptiveHorizontalAnticausalFilter(outputFrame, 0, _filterOutput.getNBrows());
_adaptiveVerticalCausalFilter(outputFrame, 0, _filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBrows(), &_imageGradient[0]);
// -> horizontal filters work with the second layer of imageGradient
_verticalCausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBcolumns(), &_imageGradient[0]+_filterOutput.getNBpixels());
_adaptiveVerticalAnticausalFilter_multGain(outputFrame, 0, _filterOutput.getNBcolumns());
}
// horizontal causal filter which adds the input inside
// horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel
void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns();
register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns();
register float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
register const float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float result=0;
......@@ -589,51 +598,17 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra
}
// std::cout<<" "<<std::endl;
}
#endif
}
// horizontal anticausal filter (basic way, no add on)
void RetinaColor::_adaptiveHorizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register float *imageGradientPTR= &_imageGradient[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float result=0;
for (unsigned int index=0; index<_filterOutput.getNBcolumns(); ++index)
{
result = *(outputPTR)+ (*imageGradientPTR)* result;
*(outputPTR--) = result;
--imageGradientPTR;
}
}
}
// vertical anticausal filter
void RetinaColor::_adaptiveVerticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register float *imageGradientPTR= &_imageGradient[0]+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
*(outputPTR) = result;
outputPTR+=_filterOutput.getNBcolumns();
imageGradientPTR+=_filterOutput.getNBcolumns();
}
}
}
// vertical anticausal filter which multiplies the output by _gain
// vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer
void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner());
#else
float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns();
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
......@@ -642,12 +617,13 @@ void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame,
register float *imageGradientPTR=gradOffset+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
result = *(outputPTR) + (*(imageGradientPTR)) * result;
*(outputPTR) = _gain*result;
outputPTR-=_filterOutput.getNBcolumns();
imageGradientPTR-=_filterOutput.getNBcolumns();
}
}
#endif
}
///////////////////////////
......
......@@ -248,9 +248,7 @@ protected:
void _getNormalizedContoursImage(const float *inputFrame, float *outputFrame);
// -> special adaptive filters dedicated to low pass filtering on the chrominance (skeeps filtering on the edges)
void _adaptiveSpatialLPfilter(const float *inputFrame, float *outputFrame);
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
void _adaptiveHorizontalAnticausalFilter(float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
void _adaptiveVerticalCausalFilter(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); // TBB parallelized
void _adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
void _computeGradient(const float *luminance);
void _normalizeOutputs_0_maxOutputValue(void);
......@@ -258,6 +256,84 @@ protected:
// color space transform
void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable);
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
/* Template :
class
{
private:
public:
Parallel_()
: {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
}
}:
*/
class Parallel_adaptiveHorizontalCausalFilter_addInput
{
private:
float *outputFrame;
const float *inputFrame, *imageGradient;
const unsigned int nbColumns;
public:
Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols)
:outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {};
void operator()( const tbb::blocked_range<size_t>& r ) const {
register float* outputPTR=outputFrame+r.begin()*nbColumns;
register const float* inputPTR=inputFrame+r.begin()*nbColumns;
register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns;
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(inputPTR++) + (*imageGradientPTR++)* result;
*(outputPTR++) = result;
}
}
}
};
class Parallel_adaptiveVerticalAnticausalFilter_multGain
{
private:
float *outputFrame;
const float *imageGradient;
const unsigned int nbRows, nbColumns;
const float filterParam_gain;
public:
Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float gain)
:outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns;
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=offset+IDcolumn;
register const float *imageGradientPTR=gradOffset+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + *(imageGradientPTR) * result;
*(outputPTR) = filterParam_gain*result;
outputPTR-=nbColumns;
imageGradientPTR-=nbColumns;
}
}
}
};
#endif
};
}
......
......@@ -70,6 +70,38 @@
#include <iostream>
#include <cmath>
//// If TBB is used
// ==> then include required includes
#ifdef HAVE_TBB
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
// ==> declare usefull generic tools
template <class type>
class Parallel_clipBufferValues
{
private:
type *bufferToClip;
const type minValue, maxValue;
public:
Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max)
: bufferToClip(bufferToProcess), minValue(min), maxValue(max){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
register type *inputOutputBufferPTR=bufferToClip+r.begin();
for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR)
{
if (*inputOutputBufferPTR>maxValue)
*inputOutputBufferPTR=maxValue;
else if (*inputOutputBufferPTR<minValue)
*inputOutputBufferPTR=minValue;
}
}
};
#endif
//#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
namespace cv
......@@ -351,21 +383,25 @@ public:
}
}
std::cout<<"Tdebug"<<std::endl;
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
// clipping values outside than the updated thresholds
bufferPTR=this->Buffer();
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
{
if (*bufferPTR<updatedLowValue)
*bufferPTR=updatedLowValue;
else if (*bufferPTR>updatedHighValue)
*bufferPTR=updatedHighValue;
}
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
std::cout<<"Tdebug"<<std::endl;
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
// clipping values outside than the updated thresholds
bufferPTR=this->Buffer();
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner());
#else
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
{
if (*bufferPTR<updatedLowValue)
*bufferPTR=updatedLowValue;
else if (*bufferPTR>updatedHighValue)
*bufferPTR=updatedHighValue;
}
#endif
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment