Commit 12999416 authored by Leonid Beynenson's avatar Leonid Beynenson

Made small optimization: for some processors using float accumulator gives speedup.

parent 6aabf72b
...@@ -982,24 +982,47 @@ inline int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvalu ...@@ -982,24 +982,47 @@ inline int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvalu
CascadeClassifier::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0]; CascadeClassifier::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
CascadeClassifier::Data::Stage* cascadeStages = &cascade.data.stages[0]; CascadeClassifier::Data::Stage* cascadeStages = &cascade.data.stages[0];
#ifdef HAVE_TEGRA_OPTIMIZATION
float tmp; // float accumulator -- float operations are quicker
#endif
for( int si = 0; si < nstages; si++ ) for( int si = 0; si < nstages; si++ )
{ {
CascadeClassifier::Data::Stage& stage = cascadeStages[si]; CascadeClassifier::Data::Stage& stage = cascadeStages[si];
int wi, ntrees = stage.ntrees; int wi, ntrees = stage.ntrees;
#ifdef HAVE_TEGRA_OPTIMIZATION
tmp = 0;
#else
sum = 0; sum = 0;
#endif
for( wi = 0; wi < ntrees; wi++ ) for( wi = 0; wi < ntrees; wi++ )
{ {
CascadeClassifier::Data::DTreeNode& node = cascadeNodes[nodeOfs]; CascadeClassifier::Data::DTreeNode& node = cascadeNodes[nodeOfs];
int c = featureEvaluator(node.featureIdx); int c = featureEvaluator(node.featureIdx);
const int* subset = &cascadeSubsets[nodeOfs*subsetSize]; const int* subset = &cascadeSubsets[nodeOfs*subsetSize];
#ifdef HAVE_TEGRA_OPTIMIZATION
tmp += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1];
#else
sum += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1]; sum += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1];
#endif
nodeOfs++; nodeOfs++;
leafOfs += 2; leafOfs += 2;
} }
#ifdef HAVE_TEGRA_OPTIMIZATION
if( tmp < stage.threshold ) {
sum = (double)tmp;
return -si;
}
#else
if( sum < stage.threshold ) if( sum < stage.threshold )
return -si; return -si;
#endif
} }
#ifdef HAVE_TEGRA_OPTIMIZATION
sum = (double)tmp;
#endif
return 1; return 1;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment