Commit 0904f10a authored by Konstantin Matskevich's avatar Konstantin Matskevich

optimizations

parent bfc843a5
......@@ -51,7 +51,7 @@ namespace ocl {
typedef std::tr1::tuple<int, int> StereoBMFixture_t;
typedef TestBaseWithParam<StereoBMFixture_t> StereoBMFixture;
OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, 64, 128), OCL_PERF_ENUM(11,21) ) )
OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, 64), OCL_PERF_ENUM(11,21) ) )
{
const int n_disp = get<0>(GetParam()), winSize = get<1>(GetParam());
UMat left, right, disp;
......@@ -64,11 +64,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
declare.in(left, right);
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize );
bm->setPreFilterType(bm->PREFILTER_NORMALIZED_RESPONSE);
bm->setPreFilterType(bm->PREFILTER_XSOBEL);
OCL_TEST_CYCLE() bm->compute(left, right, disp);
SANITY_CHECK(disp, 0.05, ERROR_RELATIVE);
SANITY_CHECK(disp, 1e-3, ERROR_RELATIVE);
}
}//ocl
......
This diff is collapsed.
......@@ -180,13 +180,11 @@ static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int pre
_output.create(input.size(), input.type());
output = _output.getUMat();
size_t blockSize = 1;
size_t globalThreads[3] = { input.cols, input.rows, 1 };
size_t localThreads[3] = { blockSize, blockSize, 1 };
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap);
return k.run(2, globalThreads, localThreads, false);
return k.run(2, globalThreads, NULL, false);
}
static void
......@@ -655,6 +653,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
mind = d;
}
}
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
if( tsum < textureThreshold )
{
......@@ -738,9 +737,9 @@ struct PrefilterInvoker : public ParallelLoopBody
static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state)
{
{//printf("opt\n");
int ndisp = state->numDisparities;
ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d", ndisp*ndisp, ndisp) );
ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d -D wsz=%d", ndisp*ndisp, ndisp, state->SADWindowSize) );
if(k.empty())
return false;
......@@ -748,8 +747,9 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
_disp.create(_left.size(), CV_16S);
UMat disp = _disp.getUMat();
size_t globalThreads[3] = { left.cols, left.rows, 1 };
size_t localThreads[3] = {ndisp, 1, 1};
int nthreads = (ndisp <= 64) ? 2 : 4;
size_t globalThreads[3] = { left.cols, (left.rows - left.rows%ndisp + ndisp), nthreads};
size_t localThreads[3] = {1, ndisp, nthreads};
int idx = 0;
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
......@@ -758,11 +758,11 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
idx = k.set(idx, state->minDisparity);
idx = k.set(idx, ndisp);
idx = k.set(idx, state->preFilterCap);
idx = k.set(idx, state->SADWindowSize);
idx = k.set(idx, nthreads);
idx = k.set(idx, state->textureThreshold);
idx = k.set(idx, state->uniquenessRatio);
return k.run(2, globalThreads, localThreads, false);
return k.run(3, globalThreads, localThreads, false);
}
static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
......@@ -790,15 +790,16 @@ static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
idx = k.set(idx, state->uniquenessRatio);
return k.run(2, globalThreads, NULL, false);
return false;
}
static bool ocl_stereo(InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state)
{
if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(int) )
if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(short) )
return ocl_stereobm_opt(_left, _right, _disp, state);
else
return ocl_stereobm_bf(_left, _right, _disp, state);
return false;//ocl_stereobm_bf(_left, _right, _disp, state);
}
struct FindStereoCorrespInvoker : public ParallelLoopBody
......@@ -992,7 +993,7 @@ public:
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
#if CV_SSE2
bool useShorts = false;//params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
#else
const bool useShorts = false;
#endif
......
......@@ -81,11 +81,27 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
{
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize);
bm->setPreFilterType(bm->PREFILTER_XSOBEL);
// bm->setMinDisparity(15);
long t1 = clock();
OCL_OFF(bm->compute(left, right, disp));
long t2 = clock();
OCL_ON(bm->compute(uleft, uright, udisp));
Near(0.05);
cv::ocl::finish();
long t3 = clock();
std::cout << (double)(t2-t1)/CLOCKS_PER_SEC << " " << (double)(t3-t2)/CLOCKS_PER_SEC << std::endl;
/*
Mat t; absdiff(disp, udisp, t);
/* for(int i = 0; i<t.rows; i++)
for(int j = 0; j< t.cols; j++)
// if(t.at<short>(i,j) > 0)
if(i>=5 && i <=16 && j == 36+15)
printf("%d %d cv: %d ocl: %d\n", i, j, disp.at<short>(i,j), udisp.getMat(ACCESS_READ).at<short>(i,j) );*/
/* imshow("diff.png", t*100);
imshow("cv.png", disp*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
waitKey(0);*/
Near(1e-3);
}
OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(32, 64, 128),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment