Commit bd7930ad authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1345 from pengx17:2.4_tvl1ocl_opt

parents 7e4c0ac4 040b3b82
...@@ -341,7 +341,8 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx ...@@ -341,7 +341,8 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
int u1_offset_x, int u1_offset_x,
int u1_offset_y, int u1_offset_y,
int u2_offset_x, int u2_offset_x,
int u2_offset_y) int u2_offset_y,
char calc_error)
{ {
//const int x = blockIdx.x * blockDim.x + threadIdx.x; //const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx ...@@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
if(calc_error)
{
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error[y * I1wx_step + x] = n1 + n2; error[y * I1wx_step + x] = n1 + n2;
} }
}
} }
...@@ -172,7 +172,7 @@ namespace ocl_tvl1flow ...@@ -172,7 +172,7 @@ namespace ocl_tvl1flow
void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
oclMat &rho_c, oclMat &p11, oclMat &p12, oclMat &rho_c, oclMat &p11, oclMat &p12,
oclMat &p21, oclMat &p22, oclMat &u1, oclMat &p21, oclMat &p22, oclMat &u1,
oclMat &u2, oclMat &error, float l_t, float theta); oclMat &u2, oclMat &error, float l_t, float theta, char calc_error);
void estimateDualVariables(oclMat &u1, oclMat &u2, void estimateDualVariables(oclMat &u1, oclMat &u2,
oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut); oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
...@@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl ...@@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
double error = numeric_limits<double>::max(); double error = numeric_limits<double>::max();
double prev_error = 0;
for (int n = 0; error > scaledEpsilon && n < iterations; ++n) for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
{ {
// some tweaks to make sum operation less frequently
char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
u1, u2, diff, l_t, static_cast<float>(theta)); u1, u2, diff, l_t, static_cast<float>(theta), calc_error);
if(calc_error)
{
error = ocl::sum(diff)[0]; error = ocl::sum(diff)[0];
prev_error = error;
}
else
{
error = numeric_limits<double>::max();
prev_error -= scaledEpsilon;
}
estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
} }
} }
} }
void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
...@@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc ...@@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc
void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
oclMat &rho_c, oclMat &p11, oclMat &p12, oclMat &rho_c, oclMat &p11, oclMat &p12,
oclMat &p21, oclMat &p22, oclMat &u1, oclMat &p21, oclMat &p22, oclMat &u1,
oclMat &u2, oclMat &error, float l_t, float theta) oclMat &u2, oclMat &error, float l_t, float theta, char calc_error)
{ {
Context* clCxt = I1wx.clCxt; Context* clCxt = I1wx.clCxt;
...@@ -401,6 +412,7 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, ...@@ -401,6 +412,7 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error));
openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment