Commit 0cea828a authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2006 from krodyush:pullreq/2.4-opt-131120-gfft

parents bc741ab2 ea0c9b7f
...@@ -1380,8 +1380,10 @@ namespace cv ...@@ -1380,8 +1380,10 @@ namespace cv
oclMat Dx_; oclMat Dx_;
oclMat Dy_; oclMat Dy_;
oclMat eig_; oclMat eig_;
oclMat eig_minmax_;
oclMat minMaxbuf_; oclMat minMaxbuf_;
oclMat tmpCorners_; oclMat tmpCorners_;
oclMat counter_;
}; };
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
......
This diff is collapsed.
...@@ -46,33 +46,26 @@ ...@@ -46,33 +46,26 @@
#ifndef WITH_MASK #ifndef WITH_MASK
#define WITH_MASK 0 #define WITH_MASK 0
#endif #endif
//macro to read eigenvalue matrix
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; #define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x]
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
{
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
}
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
{
return read_imagef(_eig, sampler, pt).x;
}
__kernel __kernel
void findCorners void findCorners
( (
image2d_t eig, __global const char* eig,
__global const char * mask, const int eig_pitch,
__global float2 * corners, __global const char* mask,
const int mask_strip,// in pixels __global float2* corners,
const float threshold, const int mask_strip,// in pixels
const int rows, __global const float* pMinMax,
const int cols, const float qualityLevel,
const int max_count, const int rows,
__global int * g_counter const int cols,
const int max_count,
__global int* g_counter
) )
{ {
float threshold = qualityLevel*pMinMax[1];
const int j = get_global_id(0); const int j = get_global_id(0);
const int i = get_global_id(1); const int i = get_global_id(1);
...@@ -82,39 +75,42 @@ __kernel ...@@ -82,39 +75,42 @@ __kernel
#endif #endif
) )
{ {
const float val = ELEM_INT2(eig, j, i); const float val = GET_SRC_32F(j, i);
if (val > threshold) if (val > threshold)
{ {
float maxVal = val; float maxVal = val;
maxVal = fmax(GET_SRC_32F(j - 1, i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j , i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j + 1, i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal); maxVal = fmax(GET_SRC_32F(j - 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal); maxVal = fmax(GET_SRC_32F(j + 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal); maxVal = fmax(GET_SRC_32F(j - 1, i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal); maxVal = fmax(GET_SRC_32F(j , i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal); maxVal = fmax(GET_SRC_32F(j + 1, i + 1), maxVal);
if (val == maxVal) if (val == maxVal)
{ {
const int ind = atomic_inc(g_counter); const int ind = atomic_inc(g_counter);
if (ind < max_count) if (ind < max_count)
corners[ind] = (float2)(j, i); {// pack and store eigenvalue and its coordinates
corners[ind].x = val;
corners[ind].y = as_float(j|(i<<16));
}
} }
} }
} }
} }
#undef GET_SRC_32F
//bitonic sort //bitonic sort
__kernel __kernel
void sortCorners_bitonicSort void sortCorners_bitonicSort
( (
image2d_t eig,
__global float2 * corners, __global float2 * corners,
const int count, const int count,
const int stage, const int stage,
...@@ -140,8 +136,8 @@ __kernel ...@@ -140,8 +136,8 @@ __kernel
const float2 leftPt = corners[leftId]; const float2 leftPt = corners[leftId];
const float2 rightPt = corners[rightId]; const float2 rightPt = corners[rightId];
const float leftVal = ELEM_FLT2(eig, leftPt); const float leftVal = leftPt.x;
const float rightVal = ELEM_FLT2(eig, rightPt); const float rightVal = rightPt.x;
const bool compareResult = leftVal > rightVal; const bool compareResult = leftVal > rightVal;
...@@ -152,124 +148,22 @@ __kernel ...@@ -152,124 +148,22 @@ __kernel
corners[rightId] = sortOrder ? greater : lesser; corners[rightId] = sortOrder ? greater : lesser;
} }
//selection sort for gfft // this is simple short serial kernel that makes some short reduction and initialization work
//kernel is ported from Bolt library: // it makes HOST like work to avoid additional sync with HOST to do this short work
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl // data - input/output float2.
// Local sort will firstly sort elements of each workgroup using selection sort // input data are sevral (min,max) pairs
// its performance is O(n) // output data is one reduced (min,max) pair
__kernel // g_counter - counter that have to be initialized by 0 for next findCorner call.
void sortCorners_selectionSortLocal __kernel void arithm_op_minMax_final(__global float * data, int groupnum,__global int * g_counter)
(
image2d_t eig,
__global float2 * corners,
const int count,
__local float2 * scratch
)
{ {
int i = get_local_id(0); // index in workgroup g_counter[0] = 0;
int numOfGroups = get_num_groups(0); // index in workgroup float minVal = data[0];
int groupID = get_group_id(0); float maxVal = data[groupnum];
int wg = get_local_size(0); // workgroup size = block size for(int i=1;i<groupnum;++i)
int n; // number of elements to be processed for this work group
int offset = groupID * wg;
int same = 0;
corners += offset;
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
float2 pt1, pt2;
pt1 = corners[min(i, n)];
scratch[i] = pt1;
barrier(CLK_LOCAL_MEM_FENCE);
if(i >= n)
{ {
return; minVal = min(minVal,data[i]);
maxVal = max(maxVal,data[i+groupnum]);
} }
data[0] = minVal;
float val1 = ELEM_FLT2(eig, pt1); data[1] = maxVal;
float val2; }
\ No newline at end of file
int pos = 0;
for (int j=0;j<n;++j)
{
pt2 = scratch[j];
val2 = ELEM_FLT2(eig, pt2);
if(val2 > val1)
pos++;//calculate the rank of this element in this work group
else
{
if(val1 > val2)
continue;
else
{
// val1 and val2 are same
same++;
}
}
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}
__kernel
void sortCorners_selectionSortFinal
(
image2d_t eig,
__global float2 * corners,
const int count
)
{
const int i = get_local_id(0); // index in workgroup
const int numOfGroups = get_num_groups(0); // index in workgroup
const int groupID = get_group_id(0);
const int wg = get_local_size(0); // workgroup size = block size
int pos = 0, same = 0;
const int offset = get_group_id(0) * wg;
const int remainder = count - wg*(numOfGroups-1);
if((offset + i ) >= count)
return;
float2 pt1, pt2;
pt1 = corners[groupID*wg + i];
float val1 = ELEM_FLT2(eig, pt1);
float val2;
for(int j=0; j<numOfGroups-1; j++ )
{
for(int k=0; k<wg; k++)
{
pt2 = corners[j*wg + k];
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Increment only if the value is not the same.
if( val2 > val1 )
pos++;
else
same++;
}
}
}
for(int k=0; k<remainder; k++)
{
pt2 = corners[(numOfGroups-1)*wg + k];
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Don't increment if the value is the same.
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
if(val2 > val1)
pos++;
else
same++;
}
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment