Commit a6592b07 authored by acyen's avatar acyen Committed by Vladislav Sovrasov

OCL SURF: Fix descriptor calculation.

parent cdbdb573
...@@ -875,9 +875,6 @@ inline float linearFilter( ...@@ -875,9 +875,6 @@ inline float linearFilter(
float centerX, float centerY, float win_offset, float centerX, float centerY, float win_offset,
float cos_dir, float sin_dir, float y, float x ) float cos_dir, float sin_dir, float y, float x )
{ {
x -= 0.5f;
y -= 0.5f;
float out = 0.0f; float out = 0.0f;
const int x1 = round(x); const int x1 = round(x);
...@@ -900,6 +897,60 @@ inline float linearFilter( ...@@ -900,6 +897,60 @@ inline float linearFilter(
return out; return out;
} }
inline float areaFilter( __PARAM_imgTex__, int img_rows, int img_cols,
float centerX, float centerY, float win_offset,
float cos_dir, float sin_dir, float x, float y, float s)
{
float fsx1 = x * s;
float fsx2 = fsx1 + s;
int sx1 = convert_int_rtp(fsx1);
int sx2 = convert_int_rtn(fsx2);
float fsy1 = y * s;
float fsy2 = fsy1 + s;
int sy1 = convert_int_rtp(fsy1);
int sy2 = convert_int_rtn(fsy2);
float scale = 1.f / (s * s);
float out = 0.f;
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, dx) * scale;
if (sx1 > fsx1)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, (sx1 -1)) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return out;
}
void calc_dx_dy( void calc_dx_dy(
__PARAM_imgTex__, __PARAM_imgTex__,
int img_rows, int img_cols, int img_rows, int img_cols,
...@@ -946,9 +997,18 @@ void calc_dx_dy( ...@@ -946,9 +997,18 @@ void calc_dx_dy(
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size; const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size; const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
if (s > 1)
{
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
areaFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
win_offset, cos_dir, sin_dir, xIndex, yIndex, s);
}
else
{
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] = s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY, linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
win_offset, cos_dir, sin_dir, icoo, jcoo); win_offset, cos_dir, sin_dir, icoo, jcoo);
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -1075,19 +1135,17 @@ void SURF_computeDescriptors64( ...@@ -1075,19 +1135,17 @@ void SURF_computeDescriptors64(
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid); reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25) if (tid == 0)
{ {
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2); __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy| // write dx, dy, |dx|, |dy|
if (tid == 0)
{
descriptors_block[0] = sdx[0]; descriptors_block[0] = sdx[0];
descriptors_block[1] = sdy[0]; descriptors_block[1] = sdy[0];
descriptors_block[2] = sdxabs[0]; descriptors_block[2] = sdxabs[0];
descriptors_block[3] = sdyabs[0]; descriptors_block[3] = sdyabs[0];
} }
}
} }
__kernel __kernel
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment