Commit dcc47661 authored by peng xiao's avatar peng xiao

Fix white-spacing

parent 92702059
...@@ -136,11 +136,13 @@ PERFTEST(PyrLKOpticalFlow) ...@@ -136,11 +136,13 @@ PERFTEST(PyrLKOpticalFlow)
size_t mismatch = 0; size_t mismatch = 0;
for (int i = 0; i < (int)nextPts.size(); ++i) for (int i = 0; i < (int)nextPts.size(); ++i)
{ {
if(status[i] != ocl_status.at<unsigned char>(0, i)){ if(status[i] != ocl_status.at<unsigned char>(0, i))
{
mismatch++; mismatch++;
continue; continue;
} }
if(status[i]){ if(status[i])
{
Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i); Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i);
Point2f cpu_rst = nextPts[i]; Point2f cpu_rst = nextPts[i];
if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.) if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.)
...@@ -193,24 +195,24 @@ PERFTEST(tvl1flow) ...@@ -193,24 +195,24 @@ PERFTEST(tvl1flow)
WARMUP_ON; WARMUP_ON;
d_alg(d0, d1, d_flowx, d_flowy); d_alg(d0, d1, d_flowx, d_flowy);
WARMUP_OFF; WARMUP_OFF;
/* /*
double diff1 = 0.0, diff2 = 0.0; double diff1 = 0.0, diff2 = 0.0;
if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1 if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1
&&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1) &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1)
TestSystem::instance().setAccurate(1); TestSystem::instance().setAccurate(1);
else else
TestSystem::instance().setAccurate(0); TestSystem::instance().setAccurate(0);
TestSystem::instance().setDiff(diff1); TestSystem::instance().setDiff(diff1);
TestSystem::instance().setDiff(diff2); TestSystem::instance().setDiff(diff2);
*/ */
GPU_ON; GPU_ON;
d_alg(d0, d1, d_flowx, d_flowy); d_alg(d0, d1, d_flowx, d_flowy);
d_alg.collectGarbage(); d_alg.collectGarbage();
GPU_OFF; GPU_OFF;
cv::Mat flowx, flowy; cv::Mat flowx, flowy;
...@@ -352,4 +354,3 @@ PERFTEST(FarnebackOpticalFlow) ...@@ -352,4 +354,3 @@ PERFTEST(FarnebackOpticalFlow)
} }
} }
} }
...@@ -71,7 +71,7 @@ __kernel void polynomialExpansion(__global float * dst, ...@@ -71,7 +71,7 @@ __kernel void polynomialExpansion(__global float * dst,
dstStep /= sizeof(*dst); dstStep /= sizeof(*dst);
srcStep /= sizeof(*src); srcStep /= sizeof(*src);
int xWarped; int xWarped;
__local float *row = smem + tx; __local float *row = smem + tx;
...@@ -168,7 +168,7 @@ __kernel void gaussianBlur(__global float * dst, ...@@ -168,7 +168,7 @@ __kernel void gaussianBlur(__global float * dst,
srcStep /= sizeof(*src); srcStep /= sizeof(*src);
__local float *row = smem + ty * (bdx + 2*ksizeHalf); __local float *row = smem + ty * (bdx + 2*ksizeHalf);
if (y < height) if (y < height)
{ {
// Vertical pass // Vertical pass
...@@ -184,7 +184,7 @@ __kernel void gaussianBlur(__global float * dst, ...@@ -184,7 +184,7 @@ __kernel void gaussianBlur(__global float * dst,
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (y < height && y >= 0 && x < width && x >= 0) if (y < height && y >= 0 && x < width && x >= 0)
{ {
// Horizontal pass // Horizontal pass
...@@ -207,7 +207,7 @@ __kernel void updateMatrices(__global float * M, ...@@ -207,7 +207,7 @@ __kernel void updateMatrices(__global float * M,
{ {
const int y = get_global_id(1); const int y = get_global_id(1);
const int x = get_global_id(0); const int x = get_global_id(0);
mStep /= sizeof(*M); mStep /= sizeof(*M);
xStep /= sizeof(*flowx); xStep /= sizeof(*flowx);
yStep /= sizeof(*flowy); yStep /= sizeof(*flowy);
...@@ -223,7 +223,8 @@ __kernel void updateMatrices(__global float * M, ...@@ -223,7 +223,8 @@ __kernel void updateMatrices(__global float * M,
int x1 = convert_int(floor(fx)); int x1 = convert_int(floor(fx));
int y1 = convert_int(floor(fy)); int y1 = convert_int(floor(fy));
fx -= x1; fy -= y1; fx -= x1;
fy -= y1;
float r2, r3, r4, r5, r6; float r2, r3, r4, r5, r6;
...@@ -278,13 +279,16 @@ __kernel void updateMatrices(__global float * M, ...@@ -278,13 +279,16 @@ __kernel void updateMatrices(__global float * M,
r3 += r6*dy + r5*dx; r3 += r6*dy + r5*dx;
float scale = float scale =
c_border[min(x, BORDER_SIZE)] * c_border[min(x, BORDER_SIZE)] *
c_border[min(y, BORDER_SIZE)] * c_border[min(y, BORDER_SIZE)] *
c_border[min(width - x - 1, BORDER_SIZE)] * c_border[min(width - x - 1, BORDER_SIZE)] *
c_border[min(height - y - 1, BORDER_SIZE)]; c_border[min(height - y - 1, BORDER_SIZE)];
r2 *= scale; r3 *= scale; r4 *= scale; r2 *= scale;
r5 *= scale; r6 *= scale; r3 *= scale;
r4 *= scale;
r5 *= scale;
r6 *= scale;
M[mad24(y, mStep, x)] = r4*r4 + r6*r6; M[mad24(y, mStep, x)] = r4*r4 + r6*r6;
M[mad24(height + y, mStep, x)] = (r4 + r5)*r6; M[mad24(height + y, mStep, x)] = (r4 + r5)*r6;
...@@ -303,7 +307,7 @@ __kernel void boxFilter5(__global float * dst, ...@@ -303,7 +307,7 @@ __kernel void boxFilter5(__global float * dst,
{ {
const int y = get_global_id(1); const int y = get_global_id(1);
const int x = get_global_id(0); const int x = get_global_id(0);
const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf)); const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
const int smw = bdx + 2*ksizeHalf; // shared memory "width" const int smw = bdx + 2*ksizeHalf; // shared memory "width"
__local float *row = smem + 5 * ty * smw; __local float *row = smem + 5 * ty * smw;
...@@ -319,16 +323,16 @@ __kernel void boxFilter5(__global float * dst, ...@@ -319,16 +323,16 @@ __kernel void boxFilter5(__global float * dst,
int xExt = (int)(bx * bdx) + i - ksizeHalf; int xExt = (int)(bx * bdx) + i - ksizeHalf;
xExt = min(max(xExt, 0), width - 1); xExt = min(max(xExt, 0), width - 1);
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)]; row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)];
for (int j = 1; j <= ksizeHalf; ++j) for (int j = 1; j <= ksizeHalf; ++j)
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
row[k*smw + i] += row[k*smw + i] +=
src[mad24(k*height + max(y - j, 0), srcStep, xExt)] + src[mad24(k*height + max(y - j, 0), srcStep, xExt)] +
src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)]; src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)];
} }
} }
...@@ -341,16 +345,16 @@ __kernel void boxFilter5(__global float * dst, ...@@ -341,16 +345,16 @@ __kernel void boxFilter5(__global float * dst,
row += tx + ksizeHalf; row += tx + ksizeHalf;
float res[5]; float res[5];
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
res[k] = row[k*smw]; res[k] = row[k*smw];
for (int i = 1; i <= ksizeHalf; ++i) for (int i = 1; i <= ksizeHalf; ++i)
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
res[k] += row[k*smw - i] + row[k*smw + i]; res[k] += row[k*smw - i] + row[k*smw + i];
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
dst[mad24(k*height + y, dstStep, x)] = res[k] * boxAreaInv; dst[mad24(k*height + y, dstStep, x)] = res[k] * boxAreaInv;
} }
...@@ -372,7 +376,7 @@ __kernel void updateFlow(__global float4 * flowx, __global float4 * flowy, ...@@ -372,7 +376,7 @@ __kernel void updateFlow(__global float4 * flowx, __global float4 * flowy,
{ {
float4 g11 = M[mad24(y, mStep, x)]; float4 g11 = M[mad24(y, mStep, x)];
float4 g12 = M[mad24(height + y, mStep, x)]; float4 g12 = M[mad24(height + y, mStep, x)];
float4 g22 = M[mad24(2*height + y, mStep, x)]; float4 g22 = M[mad24(2*height + y, mStep, x)];
float4 h1 = M[mad24(3*height + y, mStep, x)]; float4 h1 = M[mad24(3*height + y, mStep, x)];
float4 h2 = M[mad24(4*height + y, mStep, x)]; float4 h2 = M[mad24(4*height + y, mStep, x)];
...@@ -408,16 +412,16 @@ __kernel void gaussianBlur5(__global float * dst, ...@@ -408,16 +412,16 @@ __kernel void gaussianBlur5(__global float * dst,
int xExt = (int)(bx * bdx) + i - ksizeHalf; int xExt = (int)(bx * bdx) + i - ksizeHalf;
xExt = idx_col(xExt, width - 1); xExt = idx_col(xExt, width - 1);
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)] * c_gKer[0]; row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)] * c_gKer[0];
for (int j = 1; j <= ksizeHalf; ++j) for (int j = 1; j <= ksizeHalf; ++j)
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
row[k*smw + i] += row[k*smw + i] +=
(src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] + (src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] +
src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j]; src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
} }
} }
...@@ -430,16 +434,16 @@ __kernel void gaussianBlur5(__global float * dst, ...@@ -430,16 +434,16 @@ __kernel void gaussianBlur5(__global float * dst,
row += tx + ksizeHalf; row += tx + ksizeHalf;
float res[5]; float res[5];
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
res[k] = row[k*smw] * c_gKer[0]; res[k] = row[k*smw] * c_gKer[0];
for (int i = 1; i <= ksizeHalf; ++i) for (int i = 1; i <= ksizeHalf; ++i)
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i]; res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
#pragma unroll #pragma unroll
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
dst[mad24(k*height + y, dstStep, x)] = res[k]; dst[mad24(k*height + y, dstStep, x)] = res[k];
} }
......
...@@ -55,178 +55,184 @@ using namespace cv::ocl; ...@@ -55,178 +55,184 @@ using namespace cv::ocl;
namespace cv namespace cv
{ {
namespace ocl namespace ocl
{ {
///////////////////////////OpenCL kernel strings/////////////////////////// ///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *optical_flow_farneback; extern const char *optical_flow_farneback;
} }
} }
namespace cv { namespace ocl { namespace optflow_farneback namespace cv {
namespace ocl {
namespace optflow_farneback
{ {
oclMat g; oclMat g;
oclMat xg; oclMat xg;
oclMat xxg; oclMat xxg;
oclMat gKer; oclMat gKer;
float ig[4]; float ig[4];
inline int divUp(int total, int grain) inline int divUp(int total, int grain)
{ {
return (total + grain - 1) / grain; return (total + grain - 1) / grain;
} }
inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf) inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
{ {
cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer)); cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
gKer.upload(t_gKer); gKer.upload(t_gKer);
} }
static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst) static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
{ {
string kernelName("gaussianBlur"); string kernelName("gaussianBlur");
size_t localThreads[3] = { 256, 1, 1 }; size_t localThreads[3] = { 256, 1, 1 };
size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 }; size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 };
int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float); int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
CV_Assert(dst.size() == src.size()); CV_Assert(dst.size() == src.size());
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); globalThreads, localThreads, args, -1, -1);
} }
static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst) static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
{ {
string kernelName("polynomialExpansion"); string kernelName("polynomialExpansion");
size_t localThreads[3] = { 256, 1, 1 }; size_t localThreads[3] = { 256, 1, 1 };
size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 }; size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
int smem_size = 3 * localThreads[0] * sizeof(float); int smem_size = 3 * localThreads[0] * sizeof(float);
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig)); args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
char opt [128]; char opt [128];
sprintf(opt, "-D polyN=%d", polyN); sprintf(opt, "-D polyN=%d", polyN);
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1, opt); globalThreads, localThreads, args, -1, -1, opt);
} }
static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M) static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
{ {
string kernelName("updateMatrices"); string kernelName("updateMatrices");
size_t localThreads[3] = { 32, 8, 1 }; size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0], size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0],
divUp(flowx.rows, localThreads[1]) * localThreads[1], divUp(flowx.rows, localThreads[1]) * localThreads[1],
1 }; 1
};
std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
} globalThreads, localThreads, args, -1, -1);
}
static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
{ {
string kernelName("boxFilter5"); string kernelName("boxFilter5");
int height = src.rows / 5; int height = src.rows / 5;
size_t localThreads[3] = { 256, 1, 1 }; size_t localThreads[3] = { 256, 1, 1 };
size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 }; size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 };
int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); globalThreads, localThreads, args, -1, -1);
} }
static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy) static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
{ {
string kernelName("updateFlow"); string kernelName("updateFlow");
int cols = divUp(flowx.cols, 4); int cols = divUp(flowx.cols, 4);
size_t localThreads[3] = { 32, 8, 1 }; size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
divUp(flowx.rows, localThreads[1]) * localThreads[0], divUp(flowx.rows, localThreads[1]) * localThreads[0],
1 }; 1
};
std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
} globalThreads, localThreads, args, -1, -1);
}
static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
{ {
string kernelName("gaussianBlur5"); string kernelName("gaussianBlur5");
int height = src.rows / 5; int height = src.rows / 5;
int width = src.cols; int width = src.cols;
size_t localThreads[3] = { 256, 1, 1 }; size_t localThreads[3] = { 256, 1, 1 };
size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 }; size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 };
int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(smem_size, (void *)NULL));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&width)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&width));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
globalThreads, localThreads, args, -1, -1); globalThreads, localThreads, args, -1, -1);
} }
}}} // namespace cv { namespace ocl { namespace optflow_farneback }
}
} // namespace cv { namespace ocl { namespace optflow_farneback
static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
{ {
...@@ -236,8 +242,8 @@ static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) ...@@ -236,8 +242,8 @@ static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
} }
void cv::ocl::FarnebackOpticalFlow::prepareGaussian( void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg, int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55) double &ig11, double &ig03, double &ig33, double &ig55)
{ {
double s = 0.; double s = 0.;
for (int x = -n; x <= n; x++) for (int x = -n; x <= n; x++)
...@@ -316,8 +322,8 @@ void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s ...@@ -316,8 +322,8 @@ void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s
} }
void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy, const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
{ {
optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM); optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
...@@ -333,8 +339,8 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( ...@@ -333,8 +339,8 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur( void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
{ {
optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM); optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
...@@ -348,7 +354,7 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur( ...@@ -348,7 +354,7 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
void cv::ocl::FarnebackOpticalFlow::operator ()( void cv::ocl::FarnebackOpticalFlow::operator ()(
const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy) const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
{ {
CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
CV_Assert(frame0.size() == frame1.size()); CV_Assert(frame0.size() == frame1.size());
...@@ -504,4 +510,3 @@ void cv::ocl::FarnebackOpticalFlow::operator ()( ...@@ -504,4 +510,3 @@ void cv::ocl::FarnebackOpticalFlow::operator ()(
flowx = curFlowX; flowx = curFlowX;
flowy = curFlowY; flowy = curFlowY;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment