Fixed discrepancy between SSE and regular baranches in FREAK

e4965b14 · Andrey Kamaev · 739da31a · e4965b14 · e4965b14 · e4965b14
Commit e4965b14 authored Jun 28, 2012 by Andrey Kamaev
Hide whitespace changes
Inline Side-by-side

Showing with 68 additions and 43 deletions

types_c.h modules/core/include/opencv2/core/types_c.h +2 -2

matrix.cpp modules/core/src/matrix.cpp +1 -1

freak.cpp modules/features2d/src/freak.cpp +65 -40

No files found.
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@@ -83,7 +83,7 @@
 #  if defined WIN32
 #    include <intrin.h>
 #  endif
-#  if __SSE2__ || !defined __GNUC__
+#  if defined __SSE2__ || !defined __GNUC__
 #    include <emmintrin.h>
 #  endif
 #endif
@@ -304,7 +304,7 @@ enum {
 CV_INLINE  int  cvRound( double value )
 {
-#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && __SSE2__ && !defined __APPLE__)
+#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__ && !defined __APPLE__)
    __m128d t = _mm_set_sd( value );
    return _mm_cvtsd_si32(t);
 #elif defined _MSC_VER && defined _M_IX86

--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -150,7 +150,7 @@ static void updateContinuityFlag(Mat& m)
            break;
    }
-    int64 t = (int64)m.step[0]*m.size[0];
+    uint64 t = (uint64)m.step[0]*m.size[0];
    if( j <= i && t == (size_t)t )
        m.flags |= Mat::CONTINUOUS_FLAG;
    else

--- a/modules/features2d/src/freak.cpp
+++ b/modules/features2d/src/freak.cpp
@@ -44,16 +44,6 @@
 namespace cv
 {
-#if CV_SSSE3
-// binary: 10000000 => char: 128 or hex: 0x80
-static const __m128i binMask = _mm_set_epi8(0x80, 0x80, 0x80,
-                                            0x80, 0x80, 0x80,
-                                            0x80, 0x80, 0x80,
-                                            0x80, 0x80, 0x80,
-                                            0x80, 0x80, 0x80,
-                                            0x80);
-#endif
 static const double FREAK_SQRT2 = 1.4142135623731;
 static const double FREAK_INV_SQRT2 = 1.0 / FREAK_SQRT2;
 static const double FREAK_LOG2 = 0.693147180559945;
@@ -234,13 +224,6 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
    ((FREAK*)this)->buildPattern();
-#if CV_SSSE3
-    register __m128i operand1;
-    register __m128i operand2;
-    register __m128i workReg;
-    register __m128i result128;
-#endif
    Mat imgIntegral;
    integral(image, imgIntegral);
    std::vector<int> kpScaleIdx(keypoints.size()); // used to save pattern scale index corresponding to each keypoints
@@ -292,8 +275,13 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
    if( !extAll ) {
        // extract the best comparisons only
        descriptors = cv::Mat::zeros(keypoints.size(), FREAK_NB_PAIRS/8, CV_8U);
-#if CV_SSSE3
+#if CV_SSE2
        __m128i* ptr= (__m128i*) (descriptors.data+(keypoints.size()-1)*descriptors.step[0]);
+        // binary: 10000000 => char: 128 or hex: 0x80
+        const __m128i binMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80,
+                                             0x80, 0x80, 0x80, 0x80,
+                                             0x80, 0x80, 0x80, 0x80,
+                                             0x80, 0x80, 0x80, 0x80);
 #else
        std::bitset<FREAK_NB_PAIRS>* ptr = (std::bitset<FREAK_NB_PAIRS>*) (descriptors.data+(keypoints.size()-1)*descriptors.step[0]);
 #endif
@@ -329,25 +317,52 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
            for( int i = FREAK_NB_POINTS; i--; ) {
                pointsValue[i] = meanIntensity(image, imgIntegral, keypoints[k].pt.x,keypoints[k].pt.y, kpScaleIdx[k], thetaIdx, i);
            }
-#if CV_SSSE3
+#if CV_SSE2
-            // extracting descriptor by blocks of 128 bits using SSE instructions
            // note that comparisons order is modified in each block (but first 128 comparisons remain globally the same-->does not affect the 128,384 bits segmanted matching strategy)
-            int cnt(0);
+            int cnt = 0;
-            for( int n = 4; n-- ; ) {
+            for( int n = FREAK_NB_PAIRS/128; n-- ; )
-                result128 = _mm_setzero_si128();
+            {
-                for( int m = 8; m--; cnt+=16 ) {
+                __m128i result128 = _mm_setzero_si128();
-                    operand1 = _mm_set_epi8(pointsValue[descriptionPairs[cnt].i],pointsValue[descriptionPairs[cnt+1].i],pointsValue[descriptionPairs[cnt+2].i],pointsValue[descriptionPairs[cnt+3].i],
+                for( int m = 128/16; m--; cnt += 16 )
-                                          pointsValue[descriptionPairs[cnt+4].i],pointsValue[descriptionPairs[cnt+5].i],pointsValue[descriptionPairs[cnt+6].i],pointsValue[descriptionPairs[cnt+7].i],
+                {
-                                          pointsValue[descriptionPairs[cnt+8].i],pointsValue[descriptionPairs[cnt+9].i],pointsValue[descriptionPairs[cnt+10].i],pointsValue[descriptionPairs[cnt+11].i],
+                    __m128i operand1 = _mm_set_epi8(
-                                          pointsValue[descriptionPairs[cnt+12].i],pointsValue[descriptionPairs[cnt+13].i],pointsValue[descriptionPairs[cnt+14].i],pointsValue[descriptionPairs[cnt+15].i]);
+                        pointsValue[descriptionPairs[cnt+0].i],
+                        pointsValue[descriptionPairs[cnt+1].i],
-                    operand2 = _mm_set_epi8(pointsValue[descriptionPairs[cnt].j],pointsValue[descriptionPairs[cnt+1].j],pointsValue[descriptionPairs[cnt+2].j],pointsValue[descriptionPairs[cnt+3].j],
+                        pointsValue[descriptionPairs[cnt+2].i],
-                                          pointsValue[descriptionPairs[cnt+4].j],pointsValue[descriptionPairs[cnt+5].j],pointsValue[descriptionPairs[cnt+6].j],pointsValue[descriptionPairs[cnt+7].j],
+                        pointsValue[descriptionPairs[cnt+3].i],
-                                          pointsValue[descriptionPairs[cnt+8].j],pointsValue[descriptionPairs[cnt+9].j],pointsValue[descriptionPairs[cnt+10].j],pointsValue[descriptionPairs[cnt+11].j],
+                        pointsValue[descriptionPairs[cnt+4].i],
-                                          pointsValue[descriptionPairs[cnt+12].j],pointsValue[descriptionPairs[cnt+13].j],pointsValue[descriptionPairs[cnt+14].j],pointsValue[descriptionPairs[cnt+15].j]);
+                        pointsValue[descriptionPairs[cnt+5].i],
+                        pointsValue[descriptionPairs[cnt+6].i],
-                    workReg = _mm_min_epu8(operand1, operand2); // emulated "greater than" for UNSIGNED int
+                        pointsValue[descriptionPairs[cnt+7].i],
-                    workReg = _mm_cmpeq_epi8(workReg, operand2); // emulated "greater than" for UNSIGNED int
+                        pointsValue[descriptionPairs[cnt+8].i],
+                        pointsValue[descriptionPairs[cnt+9].i],
+                        pointsValue[descriptionPairs[cnt+10].i],
+                        pointsValue[descriptionPairs[cnt+11].i],
+                        pointsValue[descriptionPairs[cnt+12].i],
+                        pointsValue[descriptionPairs[cnt+13].i],
+                        pointsValue[descriptionPairs[cnt+14].i],
+                        pointsValue[descriptionPairs[cnt+15].i]);
+                    __m128i operand2 = _mm_set_epi8(
+                        pointsValue[descriptionPairs[cnt+0].j],
+                        pointsValue[descriptionPairs[cnt+1].j],
+                        pointsValue[descriptionPairs[cnt+2].j],
+                        pointsValue[descriptionPairs[cnt+3].j],
+                        pointsValue[descriptionPairs[cnt+4].j],
+                        pointsValue[descriptionPairs[cnt+5].j],
+                        pointsValue[descriptionPairs[cnt+6].j],
+                        pointsValue[descriptionPairs[cnt+7].j],
+                        pointsValue[descriptionPairs[cnt+8].j],
+                        pointsValue[descriptionPairs[cnt+9].j],
+                        pointsValue[descriptionPairs[cnt+10].j],
+                        pointsValue[descriptionPairs[cnt+11].j],
+                        pointsValue[descriptionPairs[cnt+12].j],
+                        pointsValue[descriptionPairs[cnt+13].j],
+                        pointsValue[descriptionPairs[cnt+14].j],
+                        pointsValue[descriptionPairs[cnt+15].j]);
+                    __m128i workReg = _mm_min_epu8(operand1, operand2); // emulated "not less than" for 8-bit UNSIGNED integers
+                    workReg = _mm_cmpeq_epi8(workReg, operand2);        // emulated "not less than" for 8-bit UNSIGNED integers
                    workReg = _mm_and_si128(_mm_srli_epi16(binMask, m), workReg); // merge the last 16 bits with the 128bits std::vector until full
                    result128 = _mm_or_si128(result128, workReg);
@@ -355,10 +370,20 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
                (*ptr) = result128;
                ++ptr;
            }
-            ptr-=8;
+            ptr -= 8;
 #else
-            for( int m = FREAK_NB_PAIRS; m--; ) {
+            // extracting descriptor preserving the order of SSE version
-                ptr->set(m, pointsValue[descriptionPairs[m].i]>  pointsValue[descriptionPairs[m].j ] );
+            int cnt = 0;
+            for( int n = 7; n < FREAK_NB_PAIRS; n += 128)
+            {
+                for( int m = 8; m--; )
+                {
+                    int nm = n-m;
+                    for(int kk = nm+15*8; kk >= nm; kk-=8, ++cnt)
+                    {
+                        ptr->set(kk, pointsValue[descriptionPairs[cnt].i] >= pointsValue[descriptionPairs[cnt].j]);
+                    }
+                }
            }
            --ptr;
 #endif
@@ -407,7 +432,7 @@ void FREAK::computeImpl( const Mat& image, std::vector<KeyPoint>& keypoints, Mat
            for( int i = 1; i < FREAK_NB_POINTS; ++i ) {
                //(generate all the pairs)
                for( int j = 0; j < i; ++j ) {
-                    ptr->set(cnt, pointsValue[i]>pointsValue[j] );
+                    ptr->set(cnt, pointsValue[i] >= pointsValue[j] );
                    ++cnt;
                }
            }