Commit 0904f10a authored by Konstantin Matskevich's avatar Konstantin Matskevich

optimizations

parent bfc843a5
...@@ -51,7 +51,7 @@ namespace ocl { ...@@ -51,7 +51,7 @@ namespace ocl {
typedef std::tr1::tuple<int, int> StereoBMFixture_t; typedef std::tr1::tuple<int, int> StereoBMFixture_t;
typedef TestBaseWithParam<StereoBMFixture_t> StereoBMFixture; typedef TestBaseWithParam<StereoBMFixture_t> StereoBMFixture;
OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, 64, 128), OCL_PERF_ENUM(11,21) ) ) OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, 64), OCL_PERF_ENUM(11,21) ) )
{ {
const int n_disp = get<0>(GetParam()), winSize = get<1>(GetParam()); const int n_disp = get<0>(GetParam()), winSize = get<1>(GetParam());
UMat left, right, disp; UMat left, right, disp;
...@@ -64,11 +64,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, ...@@ -64,11 +64,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
declare.in(left, right); declare.in(left, right);
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize ); Ptr<StereoBM> bm = createStereoBM( n_disp, winSize );
bm->setPreFilterType(bm->PREFILTER_NORMALIZED_RESPONSE); bm->setPreFilterType(bm->PREFILTER_XSOBEL);
OCL_TEST_CYCLE() bm->compute(left, right, disp); OCL_TEST_CYCLE() bm->compute(left, right, disp);
SANITY_CHECK(disp, 0.05, ERROR_RELATIVE); SANITY_CHECK(disp, 1e-3, ERROR_RELATIVE);
} }
}//ocl }//ocl
......
This diff is collapsed.
...@@ -180,13 +180,11 @@ static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int pre ...@@ -180,13 +180,11 @@ static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int pre
_output.create(input.size(), input.type()); _output.create(input.size(), input.type());
output = _output.getUMat(); output = _output.getUMat();
size_t blockSize = 1;
size_t globalThreads[3] = { input.cols, input.rows, 1 }; size_t globalThreads[3] = { input.cols, input.rows, 1 };
size_t localThreads[3] = { blockSize, blockSize, 1 };
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap); k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap);
return k.run(2, globalThreads, localThreads, false); return k.run(2, globalThreads, NULL, false);
} }
static void static void
...@@ -655,6 +653,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, ...@@ -655,6 +653,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
mind = d; mind = d;
} }
} }
tsum += htext[y + wsz2] - htext[y - wsz2 - 1]; tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
if( tsum < textureThreshold ) if( tsum < textureThreshold )
{ {
...@@ -738,9 +737,9 @@ struct PrefilterInvoker : public ParallelLoopBody ...@@ -738,9 +737,9 @@ struct PrefilterInvoker : public ParallelLoopBody
static bool ocl_stereobm_opt( InputArray _left, InputArray _right, static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state) OutputArray _disp, StereoBMParams* state)
{ {//printf("opt\n");
int ndisp = state->numDisparities; int ndisp = state->numDisparities;
ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d", ndisp*ndisp, ndisp) ); ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d -D wsz=%d", ndisp*ndisp, ndisp, state->SADWindowSize) );
if(k.empty()) if(k.empty())
return false; return false;
...@@ -748,8 +747,9 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, ...@@ -748,8 +747,9 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
_disp.create(_left.size(), CV_16S); _disp.create(_left.size(), CV_16S);
UMat disp = _disp.getUMat(); UMat disp = _disp.getUMat();
size_t globalThreads[3] = { left.cols, left.rows, 1 }; int nthreads = (ndisp <= 64) ? 2 : 4;
size_t localThreads[3] = {ndisp, 1, 1}; size_t globalThreads[3] = { left.cols, (left.rows - left.rows%ndisp + ndisp), nthreads};
size_t localThreads[3] = {1, ndisp, nthreads};
int idx = 0; int idx = 0;
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left)); idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
...@@ -758,11 +758,11 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, ...@@ -758,11 +758,11 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
idx = k.set(idx, state->minDisparity); idx = k.set(idx, state->minDisparity);
idx = k.set(idx, ndisp); idx = k.set(idx, ndisp);
idx = k.set(idx, state->preFilterCap); idx = k.set(idx, state->preFilterCap);
idx = k.set(idx, state->SADWindowSize); idx = k.set(idx, nthreads);
idx = k.set(idx, state->textureThreshold); idx = k.set(idx, state->textureThreshold);
idx = k.set(idx, state->uniquenessRatio); idx = k.set(idx, state->uniquenessRatio);
return k.run(2, globalThreads, localThreads, false); return k.run(3, globalThreads, localThreads, false);
} }
static bool ocl_stereobm_bf(InputArray _left, InputArray _right, static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
...@@ -790,15 +790,16 @@ static bool ocl_stereobm_bf(InputArray _left, InputArray _right, ...@@ -790,15 +790,16 @@ static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
idx = k.set(idx, state->uniquenessRatio); idx = k.set(idx, state->uniquenessRatio);
return k.run(2, globalThreads, NULL, false); return k.run(2, globalThreads, NULL, false);
return false;
} }
static bool ocl_stereo(InputArray _left, InputArray _right, static bool ocl_stereo(InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state) OutputArray _disp, StereoBMParams* state)
{ {
if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(int) ) if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(short) )
return ocl_stereobm_opt(_left, _right, _disp, state); return ocl_stereobm_opt(_left, _right, _disp, state);
else else
return ocl_stereobm_bf(_left, _right, _disp, state); return false;//ocl_stereobm_bf(_left, _right, _disp, state);
} }
struct FindStereoCorrespInvoker : public ParallelLoopBody struct FindStereoCorrespInvoker : public ParallelLoopBody
...@@ -992,7 +993,7 @@ public: ...@@ -992,7 +993,7 @@ public:
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar)); bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
#if CV_SSE2 #if CV_SSE2
bool useShorts = false;//params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
#else #else
const bool useShorts = false; const bool useShorts = false;
#endif #endif
......
...@@ -81,11 +81,27 @@ OCL_TEST_P(StereoBMFixture, StereoBM) ...@@ -81,11 +81,27 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
{ {
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize); Ptr<StereoBM> bm = createStereoBM( n_disp, winSize);
bm->setPreFilterType(bm->PREFILTER_XSOBEL); bm->setPreFilterType(bm->PREFILTER_XSOBEL);
// bm->setMinDisparity(15);
long t1 = clock();
OCL_OFF(bm->compute(left, right, disp)); OCL_OFF(bm->compute(left, right, disp));
long t2 = clock();
OCL_ON(bm->compute(uleft, uright, udisp)); OCL_ON(bm->compute(uleft, uright, udisp));
cv::ocl::finish();
Near(0.05); long t3 = clock();
std::cout << (double)(t2-t1)/CLOCKS_PER_SEC << " " << (double)(t3-t2)/CLOCKS_PER_SEC << std::endl;
/*
Mat t; absdiff(disp, udisp, t);
/* for(int i = 0; i<t.rows; i++)
for(int j = 0; j< t.cols; j++)
// if(t.at<short>(i,j) > 0)
if(i>=5 && i <=16 && j == 36+15)
printf("%d %d cv: %d ocl: %d\n", i, j, disp.at<short>(i,j), udisp.getMat(ACCESS_READ).at<short>(i,j) );*/
/* imshow("diff.png", t*100);
imshow("cv.png", disp*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
waitKey(0);*/
Near(1e-3);
} }
OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(32, 64, 128), OCL_INSTANTIATE_TEST_CASE_P(StereoMatcher, StereoBMFixture, testing::Combine(testing::Values(32, 64, 128),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment