Commit e4965b14 authored by Andrey Kamaev's avatar Andrey Kamaev

Fixed discrepancy between SSE and regular baranches in FREAK

parent 739da31a
...@@ -83,7 +83,7 @@ ...@@ -83,7 +83,7 @@
# if defined WIN32 # if defined WIN32
# include <intrin.h> # include <intrin.h>
# endif # endif
# if __SSE2__ || !defined __GNUC__ # if defined __SSE2__ || !defined __GNUC__
# include <emmintrin.h> # include <emmintrin.h>
# endif # endif
#endif #endif
...@@ -304,7 +304,7 @@ enum { ...@@ -304,7 +304,7 @@ enum {
CV_INLINE int cvRound( double value ) CV_INLINE int cvRound( double value )
{ {
#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && __SSE2__ && !defined __APPLE__) #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__ && !defined __APPLE__)
__m128d t = _mm_set_sd( value ); __m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t); return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
......
...@@ -150,7 +150,7 @@ static void updateContinuityFlag(Mat& m) ...@@ -150,7 +150,7 @@ static void updateContinuityFlag(Mat& m)
break; break;
} }
int64 t = (int64)m.step[0]*m.size[0]; uint64 t = (uint64)m.step[0]*m.size[0];
if( j <= i && t == (size_t)t ) if( j <= i && t == (size_t)t )
m.flags |= Mat::CONTINUOUS_FLAG; m.flags |= Mat::CONTINUOUS_FLAG;
else else
......
...@@ -44,16 +44,6 @@ ...@@ -44,16 +44,6 @@
namespace cv namespace cv
{ {
#if CV_SSSE3
// binary: 10000000 => char: 128 or hex: 0x80
static const __m128i binMask = _mm_set_epi8(0x80, 0x80, 0x80,
0x80, 0x80, 0x80,
0x80, 0x80, 0x80,
0x80, 0x80, 0x80,
0x80, 0x80, 0x80,
0x80);
#endif
static const double FREAK_SQRT2 = 1.4142135623731; static const double FREAK_SQRT2 = 1.4142135623731;
static const double FREAK_INV_SQRT2 = 1.0 / FREAK_SQRT2; static const double FREAK_INV_SQRT2 = 1.0 / FREAK_SQRT2;
static const double FREAK_LOG2 = 0.693147180559945; static const double FREAK_LOG2 = 0.693147180559945;
...@@ -234,13 +224,6 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat ...@@ -234,13 +224,6 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
((FREAK*)this)->buildPattern(); ((FREAK*)this)->buildPattern();
#if CV_SSSE3
register __m128i operand1;
register __m128i operand2;
register __m128i workReg;
register __m128i result128;
#endif
Mat imgIntegral; Mat imgIntegral;
integral(image, imgIntegral); integral(image, imgIntegral);
std::vector<int> kpScaleIdx(keypoints.size()); // used to save pattern scale index corresponding to each keypoints std::vector<int> kpScaleIdx(keypoints.size()); // used to save pattern scale index corresponding to each keypoints
...@@ -292,8 +275,13 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat ...@@ -292,8 +275,13 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
if( !extAll ) { if( !extAll ) {
// extract the best comparisons only // extract the best comparisons only
descriptors = cv::Mat::zeros(keypoints.size(), FREAK_NB_PAIRS/8, CV_8U); descriptors = cv::Mat::zeros(keypoints.size(), FREAK_NB_PAIRS/8, CV_8U);
#if CV_SSSE3 #if CV_SSE2
__m128i* ptr= (__m128i*) (descriptors.data+(keypoints.size()-1)*descriptors.step[0]); __m128i* ptr= (__m128i*) (descriptors.data+(keypoints.size()-1)*descriptors.step[0]);
// binary: 10000000 => char: 128 or hex: 0x80
const __m128i binMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80);
#else #else
std::bitset<FREAK_NB_PAIRS>* ptr = (std::bitset<FREAK_NB_PAIRS>*) (descriptors.data+(keypoints.size()-1)*descriptors.step[0]); std::bitset<FREAK_NB_PAIRS>* ptr = (std::bitset<FREAK_NB_PAIRS>*) (descriptors.data+(keypoints.size()-1)*descriptors.step[0]);
#endif #endif
...@@ -329,25 +317,52 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat ...@@ -329,25 +317,52 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
for( int i = FREAK_NB_POINTS; i--; ) { for( int i = FREAK_NB_POINTS; i--; ) {
pointsValue[i] = meanIntensity(image, imgIntegral, keypoints[k].pt.x,keypoints[k].pt.y, kpScaleIdx[k], thetaIdx, i); pointsValue[i] = meanIntensity(image, imgIntegral, keypoints[k].pt.x,keypoints[k].pt.y, kpScaleIdx[k], thetaIdx, i);
} }
#if CV_SSSE3 #if CV_SSE2
// extracting descriptor by blocks of 128 bits using SSE instructions
// note that comparisons order is modified in each block (but first 128 comparisons remain globally the same-->does not affect the 128,384 bits segmanted matching strategy) // note that comparisons order is modified in each block (but first 128 comparisons remain globally the same-->does not affect the 128,384 bits segmanted matching strategy)
int cnt(0); int cnt = 0;
for( int n = 4; n-- ; ) { for( int n = FREAK_NB_PAIRS/128; n-- ; )
result128 = _mm_setzero_si128(); {
for( int m = 8; m--; cnt+=16 ) { __m128i result128 = _mm_setzero_si128();
operand1 = _mm_set_epi8(pointsValue[descriptionPairs[cnt].i],pointsValue[descriptionPairs[cnt+1].i],pointsValue[descriptionPairs[cnt+2].i],pointsValue[descriptionPairs[cnt+3].i], for( int m = 128/16; m--; cnt += 16 )
pointsValue[descriptionPairs[cnt+4].i],pointsValue[descriptionPairs[cnt+5].i],pointsValue[descriptionPairs[cnt+6].i],pointsValue[descriptionPairs[cnt+7].i], {
pointsValue[descriptionPairs[cnt+8].i],pointsValue[descriptionPairs[cnt+9].i],pointsValue[descriptionPairs[cnt+10].i],pointsValue[descriptionPairs[cnt+11].i], __m128i operand1 = _mm_set_epi8(
pointsValue[descriptionPairs[cnt+12].i],pointsValue[descriptionPairs[cnt+13].i],pointsValue[descriptionPairs[cnt+14].i],pointsValue[descriptionPairs[cnt+15].i]); pointsValue[descriptionPairs[cnt+0].i],
pointsValue[descriptionPairs[cnt+1].i],
operand2 = _mm_set_epi8(pointsValue[descriptionPairs[cnt].j],pointsValue[descriptionPairs[cnt+1].j],pointsValue[descriptionPairs[cnt+2].j],pointsValue[descriptionPairs[cnt+3].j], pointsValue[descriptionPairs[cnt+2].i],
pointsValue[descriptionPairs[cnt+4].j],pointsValue[descriptionPairs[cnt+5].j],pointsValue[descriptionPairs[cnt+6].j],pointsValue[descriptionPairs[cnt+7].j], pointsValue[descriptionPairs[cnt+3].i],
pointsValue[descriptionPairs[cnt+8].j],pointsValue[descriptionPairs[cnt+9].j],pointsValue[descriptionPairs[cnt+10].j],pointsValue[descriptionPairs[cnt+11].j], pointsValue[descriptionPairs[cnt+4].i],
pointsValue[descriptionPairs[cnt+12].j],pointsValue[descriptionPairs[cnt+13].j],pointsValue[descriptionPairs[cnt+14].j],pointsValue[descriptionPairs[cnt+15].j]); pointsValue[descriptionPairs[cnt+5].i],
pointsValue[descriptionPairs[cnt+6].i],
workReg = _mm_min_epu8(operand1, operand2); // emulated "greater than" for UNSIGNED int pointsValue[descriptionPairs[cnt+7].i],
workReg = _mm_cmpeq_epi8(workReg, operand2); // emulated "greater than" for UNSIGNED int pointsValue[descriptionPairs[cnt+8].i],
pointsValue[descriptionPairs[cnt+9].i],
pointsValue[descriptionPairs[cnt+10].i],
pointsValue[descriptionPairs[cnt+11].i],
pointsValue[descriptionPairs[cnt+12].i],
pointsValue[descriptionPairs[cnt+13].i],
pointsValue[descriptionPairs[cnt+14].i],
pointsValue[descriptionPairs[cnt+15].i]);
__m128i operand2 = _mm_set_epi8(
pointsValue[descriptionPairs[cnt+0].j],
pointsValue[descriptionPairs[cnt+1].j],
pointsValue[descriptionPairs[cnt+2].j],
pointsValue[descriptionPairs[cnt+3].j],
pointsValue[descriptionPairs[cnt+4].j],
pointsValue[descriptionPairs[cnt+5].j],
pointsValue[descriptionPairs[cnt+6].j],
pointsValue[descriptionPairs[cnt+7].j],
pointsValue[descriptionPairs[cnt+8].j],
pointsValue[descriptionPairs[cnt+9].j],
pointsValue[descriptionPairs[cnt+10].j],
pointsValue[descriptionPairs[cnt+11].j],
pointsValue[descriptionPairs[cnt+12].j],
pointsValue[descriptionPairs[cnt+13].j],
pointsValue[descriptionPairs[cnt+14].j],
pointsValue[descriptionPairs[cnt+15].j]);
__m128i workReg = _mm_min_epu8(operand1, operand2); // emulated "not less than" for 8-bit UNSIGNED integers
workReg = _mm_cmpeq_epi8(workReg, operand2); // emulated "not less than" for 8-bit UNSIGNED integers
workReg = _mm_and_si128(_mm_srli_epi16(binMask, m), workReg); // merge the last 16 bits with the 128bits std::vector until full workReg = _mm_and_si128(_mm_srli_epi16(binMask, m), workReg); // merge the last 16 bits with the 128bits std::vector until full
result128 = _mm_or_si128(result128, workReg); result128 = _mm_or_si128(result128, workReg);
...@@ -355,10 +370,20 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat ...@@ -355,10 +370,20 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
(*ptr) = result128; (*ptr) = result128;
++ptr; ++ptr;
} }
ptr-=8; ptr -= 8;
#else #else
for( int m = FREAK_NB_PAIRS; m--; ) { // extracting descriptor preserving the order of SSE version
ptr->set(m, pointsValue[descriptionPairs[m].i]> pointsValue[descriptionPairs[m].j ] ); int cnt = 0;
for( int n = 7; n < FREAK_NB_PAIRS; n += 128)
{
for( int m = 8; m--; )
{
int nm = n-m;
for(int kk = nm+15*8; kk >= nm; kk-=8, ++cnt)
{
ptr->set(kk, pointsValue[descriptionPairs[cnt].i] >= pointsValue[descriptionPairs[cnt].j]);
}
}
} }
--ptr; --ptr;
#endif #endif
...@@ -407,7 +432,7 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat ...@@ -407,7 +432,7 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
for( int i = 1; i < FREAK_NB_POINTS; ++i ) { for( int i = 1; i < FREAK_NB_POINTS; ++i ) {
//(generate all the pairs) //(generate all the pairs)
for( int j = 0; j < i; ++j ) { for( int j = 0; j < i; ++j ) {
ptr->set(cnt, pointsValue[i]>pointsValue[j] ); ptr->set(cnt, pointsValue[i] >= pointsValue[j] );
++cnt; ++cnt;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment