Unverified Commit 53c77155 authored by Alexander Alekhin's avatar Alexander Alekhin Committed by GitHub

Merge pull request #14473 from alalek:video_dis_update_opencl

video(DISOpticalFlow): update OpenCL implementation (#14473)

* video(DIS): add code for profiling

* video(DIS): fix test parameters

* video(DIS): simplify OpenCL kernels

- parameters -> defines
- avoid float3
- const / local scope
- improve readability, replace Kernel::set() -> args()

* video(DIS): use CV_32FC2 buffers
parent 43467a2a
This diff is collapsed.
This diff is collapsed.
...@@ -133,20 +133,28 @@ class VariationalRefinementImpl CV_FINAL : public VariationalRefinement ...@@ -133,20 +133,28 @@ class VariationalRefinementImpl CV_FINAL : public VariationalRefinement
}; };
void gradHorizAndSplitOp(void *src, void *dst, void *dst_split) void gradHorizAndSplitOp(void *src, void *dst, void *dst_split)
{ {
CV_INSTRUMENT_REGION();
Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE); Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE);
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst);
} }
void gradVertAndSplitOp(void *src, void *dst, void *dst_split) void gradVertAndSplitOp(void *src, void *dst, void *dst_split)
{ {
CV_INSTRUMENT_REGION();
Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE); Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE);
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst);
} }
void averageOp(void *src1, void *src2, void *dst) void averageOp(void *src1, void *src2, void *dst)
{ {
CV_INSTRUMENT_REGION();
addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F); addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F);
} }
void subtractOp(void *src1, void *src2, void *dst) void subtractOp(void *src1, void *src2, void *dst)
{ {
CV_INSTRUMENT_REGION();
subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F); subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F);
} }
...@@ -206,6 +214,8 @@ class VariationalRefinementImpl CV_FINAL : public VariationalRefinement ...@@ -206,6 +214,8 @@ class VariationalRefinementImpl CV_FINAL : public VariationalRefinement
VariationalRefinementImpl::VariationalRefinementImpl() VariationalRefinementImpl::VariationalRefinementImpl()
{ {
CV_INSTRUMENT_REGION();
fixedPointIterations = 5; fixedPointIterations = 5;
sorIterations = 5; sorIterations = 5;
alpha = 20.0f; alpha = 20.0f;
...@@ -222,6 +232,8 @@ VariationalRefinementImpl::VariationalRefinementImpl() ...@@ -222,6 +232,8 @@ VariationalRefinementImpl::VariationalRefinementImpl()
*/ */
void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src) void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src)
{ {
CV_INSTRUMENT_REGION();
int buf_j, j; int buf_j, j;
int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders
...@@ -288,6 +300,8 @@ void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src) ...@@ -288,6 +300,8 @@ void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src)
*/ */
void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src) void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src)
{ {
CV_INSTRUMENT_REGION();
int buf_j, j; int buf_j, j;
for (int i = 0; i < dst.rows; i++) for (int i = 0; i < dst.rows; i++)
{ {
...@@ -326,6 +340,8 @@ void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src) ...@@ -326,6 +340,8 @@ void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src)
*/ */
void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst) void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst)
{ {
CV_INSTRUMENT_REGION();
int buf_w = dst.red.cols; int buf_w = dst.red.cols;
for (int i = 0; i < dst.red.rows - 2; i++) for (int i = 0; i < dst.red.rows - 2; i++)
{ {
...@@ -369,10 +385,14 @@ void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst) ...@@ -369,10 +385,14 @@ void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst)
VariationalRefinementImpl::RedBlackBuffer::RedBlackBuffer() VariationalRefinementImpl::RedBlackBuffer::RedBlackBuffer()
{ {
CV_INSTRUMENT_REGION();
release(); release();
} }
void VariationalRefinementImpl::RedBlackBuffer::create(Size s) void VariationalRefinementImpl::RedBlackBuffer::create(Size s)
{ {
CV_INSTRUMENT_REGION();
/* Allocate enough memory to include borders */ /* Allocate enough memory to include borders */
int w = (int)ceil(s.width / 2.0) + 2; int w = (int)ceil(s.width / 2.0) + 2;
red.create(s.height + 2, w); red.create(s.height + 2, w);
...@@ -389,6 +409,8 @@ void VariationalRefinementImpl::RedBlackBuffer::create(Size s) ...@@ -389,6 +409,8 @@ void VariationalRefinementImpl::RedBlackBuffer::create(Size s)
void VariationalRefinementImpl::RedBlackBuffer::release() void VariationalRefinementImpl::RedBlackBuffer::release()
{ {
CV_INSTRUMENT_REGION();
red.release(); red.release();
black.release(); black.release();
red_even_len = red_odd_len = black_even_len = black_odd_len = 0; red_even_len = red_odd_len = black_even_len = black_odd_len = 0;
...@@ -403,12 +425,16 @@ VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRef ...@@ -403,12 +425,16 @@ VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRef
void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const
{ {
CV_INSTRUMENT_REGION();
for (int i = range.start; i < range.end; i++) for (int i = range.start; i < range.end; i++)
(var->*ops[i])(op1s[i], op2s[i], op3s[i]); (var->*ops[i])(op1s[i], op2s[i], op3s[i]);
} }
void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v) void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v)
{ {
CV_INSTRUMENT_REGION();
for (int i = 0; i < flow_u.rows; i++) for (int i = 0; i < flow_u.rows; i++)
{ {
float *pFlowU = flow_u.ptr<float>(i); float *pFlowU = flow_u.ptr<float>(i);
...@@ -426,6 +452,8 @@ void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat & ...@@ -426,6 +452,8 @@ void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &
void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v) void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v)
{ {
CV_INSTRUMENT_REGION();
Size s = I0.size(); Size s = I0.size();
A11.create(s); A11.create(s);
A12.create(s); A12.create(s);
...@@ -550,6 +578,8 @@ VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(Vari ...@@ -550,6 +578,8 @@ VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(Vari
*/ */
void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const
{ {
CV_INSTRUMENT_REGION();
int start_i = min(range.start * stripe_sz, h); int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h); int end_i = min(range.end * stripe_sz, h);
...@@ -709,6 +739,8 @@ VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothne ...@@ -709,6 +739,8 @@ VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothne
*/ */
void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const
{ {
CV_INSTRUMENT_REGION();
int start_i = min(range.start * stripe_sz, h); int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h); int end_i = min(range.end * stripe_sz, h);
...@@ -873,6 +905,8 @@ VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothn ...@@ -873,6 +905,8 @@ VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothn
/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */ /* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */
void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const
{ {
CV_INSTRUMENT_REGION();
int start_i = min(range.start * stripe_sz, h); int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h); int end_i = min(range.end * stripe_sz, h);
...@@ -965,6 +999,8 @@ VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalR ...@@ -965,6 +999,8 @@ VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalR
*/ */
void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const
{ {
CV_INSTRUMENT_REGION();
int start = min(range.start * stripe_sz, h); int start = min(range.start * stripe_sz, h);
int end = min(range.end * stripe_sz, h); int end = min(range.end * stripe_sz, h);
...@@ -1079,6 +1115,8 @@ void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &ran ...@@ -1079,6 +1115,8 @@ void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &ran
void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
{ {
CV_INSTRUMENT_REGION();
CV_Assert(!I0.empty() && I0.channels() == 1); CV_Assert(!I0.empty() && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.channels() == 1); CV_Assert(!I1.empty() && I1.channels() == 1);
CV_Assert(I0.sameSize(I1)); CV_Assert(I0.sameSize(I1));
...@@ -1095,6 +1133,8 @@ void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputAr ...@@ -1095,6 +1133,8 @@ void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputAr
void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v)
{ {
CV_INSTRUMENT_REGION();
CV_Assert(!I0.empty() && I0.channels() == 1); CV_Assert(!I0.empty() && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.channels() == 1); CV_Assert(!I1.empty() && I1.channels() == 1);
CV_Assert(I0.sameSize(I1)); CV_Assert(I0.sameSize(I1));
...@@ -1124,6 +1164,8 @@ void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutput ...@@ -1124,6 +1164,8 @@ void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutput
for (int i = 0; i < fixedPointIterations; i++) for (int i = 0; i < fixedPointIterations; i++)
{ {
CV_TRACE_REGION("fixedPoint_iteration");
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true));
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false));
...@@ -1139,6 +1181,7 @@ void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutput ...@@ -1139,6 +1181,7 @@ void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutput
for (int j = 0; j < sorIterations; j++) for (int j = 0; j < sorIterations; j++)
{ {
CV_TRACE_REGION("SOR_iteration");
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true));
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false));
} }
...@@ -1155,6 +1198,8 @@ void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutput ...@@ -1155,6 +1198,8 @@ void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutput
} }
void VariationalRefinementImpl::collectGarbage() void VariationalRefinementImpl::collectGarbage()
{ {
CV_INSTRUMENT_REGION();
Ix.release(); Ix.release();
Iy.release(); Iy.release();
Iz.release(); Iz.release();
......
...@@ -46,18 +46,13 @@ ...@@ -46,18 +46,13 @@
namespace opencv_test { namespace { namespace opencv_test { namespace {
PARAM_TEST_CASE(OCL_DenseOpticalFlow_DIS, int) CV_ENUM(DIS_TestPresets, DISOpticalFlow::PRESET_ULTRAFAST, DISOpticalFlow::PRESET_FAST, DISOpticalFlow::PRESET_MEDIUM);
{
int preset;
virtual void SetUp() typedef ocl::TSTestWithParam<DIS_TestPresets> OCL_DenseOpticalFlow_DIS;
{
preset = GET_PARAM(0);
}
};
OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat) OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat)
{ {
int preset = (int)GetParam();
Mat frame1, frame2, GT; Mat frame1, frame2, GT;
frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"); frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png");
...@@ -68,15 +63,11 @@ OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat) ...@@ -68,15 +63,11 @@ OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat)
cvtColor(frame1, frame1, COLOR_BGR2GRAY); cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY); cvtColor(frame2, frame2, COLOR_BGR2GRAY);
Ptr<DenseOpticalFlow> algo;
// iterate over presets:
for (int i = 0; i < cvtest::ocl::test_loop_times; i++)
{ {
Mat flow; Mat flow;
UMat ocl_flow; UMat ocl_flow;
algo = DISOpticalFlow::create(preset); Ptr<DenseOpticalFlow> algo = DISOpticalFlow::create(preset);
OCL_OFF(algo->calc(frame1, frame2, flow)); OCL_OFF(algo->calc(frame1, frame2, flow));
OCL_ON(algo->calc(frame1, frame2, ocl_flow)); OCL_ON(algo->calc(frame1, frame2, ocl_flow));
ASSERT_EQ(flow.rows, ocl_flow.rows); ASSERT_EQ(flow.rows, ocl_flow.rows);
...@@ -87,9 +78,7 @@ OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat) ...@@ -87,9 +78,7 @@ OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat)
} }
OCL_INSTANTIATE_TEST_CASE_P(Video, OCL_DenseOpticalFlow_DIS, OCL_INSTANTIATE_TEST_CASE_P(Video, OCL_DenseOpticalFlow_DIS,
Values(DISOpticalFlow::PRESET_ULTRAFAST, DIS_TestPresets::all());
DISOpticalFlow::PRESET_FAST,
DISOpticalFlow::PRESET_MEDIUM));
}} // namespace }} // namespace
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment