Commit 05eb9f35 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #1062 from pengli:dis_optflow

parents 1ecbb8f0 7ed6f778
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
using std::tr1::tuple;
using std::tr1::get;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::optflow;
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2);
typedef tuple<String, Size> DISParams;
typedef TestBaseWithParam<DISParams> DenseOpticalFlow_DIS;
OCL_PERF_TEST_P(DenseOpticalFlow_DIS, perf,
Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p)))
{
DISParams params = GetParam();
// use strings to print preset names in the perf test results:
String preset_string = get<0>(params);
int preset = DISOpticalFlow::PRESET_FAST;
if (preset_string == "PRESET_ULTRAFAST")
preset = DISOpticalFlow::PRESET_ULTRAFAST;
else if (preset_string == "PRESET_FAST")
preset = DISOpticalFlow::PRESET_FAST;
else if (preset_string == "PRESET_MEDIUM")
preset = DISOpticalFlow::PRESET_MEDIUM;
Size sz = get<1>(params);
UMat frame1(sz, CV_8U);
UMat frame2(sz, CV_8U);
UMat flow;
MakeArtificialExample(frame1, frame2);
Ptr<DenseOpticalFlow> algo = createOptFlow_DIS(preset);
OCL_TEST_CYCLE_N(10)
{
algo->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2)
{
int src_scale = 2;
int OF_scale = 6;
double sigma = dst_frame1.cols / 300;
UMat tmp(Size(dst_frame1.cols / (int)pow(2, src_scale), dst_frame1.rows / (int)pow(2, src_scale)), CV_8U);
randu(tmp, 0, 255);
resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR);
resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR);
Mat displacement_field(Size(dst_frame1.cols / (int)pow(2, OF_scale), dst_frame1.rows / (int)pow(2, OF_scale)),
CV_32FC2);
randn(displacement_field, 0.0, sigma);
resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC);
for (int i = 0; i < displacement_field.rows; i++)
for (int j = 0; j < displacement_field.cols; j++)
displacement_field.at<Vec2f>(i, j) += Vec2f((float)j, (float)i);
remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE);
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
......@@ -42,6 +42,8 @@
#include "opencv2/core/hal/intrin.hpp"
#include "precomp.hpp"
#include "opencl_kernels_optflow.hpp"
using namespace std;
#define EPS 0.001F
#define INF 1E+10F
......@@ -166,6 +168,50 @@ class DISOpticalFlowImpl : public DISOpticalFlow
Mat &src_Sy, Mat &_I0, Mat &_I1);
void operator()(const Range &range) const;
};
#ifdef HAVE_OPENCL
vector<UMat> u_I0s; //!< Gaussian pyramid for the current frame
vector<UMat> u_I1s; //!< Gaussian pyramid for the next frame
vector<UMat> u_I1s_ext; //!< I1s with borders
vector<UMat> u_I0xs; //!< Gaussian pyramid for the x gradient of the current frame
vector<UMat> u_I0ys; //!< Gaussian pyramid for the y gradient of the current frame
vector<UMat> u_Ux; //!< x component of the flow vectors
vector<UMat> u_Uy; //!< y component of the flow vectors
vector<UMat> u_initial_Ux; //!< x component of the initial flow field, if one was passed as an input
vector<UMat> u_initial_Uy; //!< y component of the initial flow field, if one was passed as an input
UMat u_U; //!< a buffer for the merged flow
UMat u_Sx; //!< intermediate sparse flow representation (x component)
UMat u_Sy; //!< intermediate sparse flow representation (y component)
/* Structure tensor components: */
UMat u_I0xx_buf; //!< sum of squares of x gradient values
UMat u_I0yy_buf; //!< sum of squares of y gradient values
UMat u_I0xy_buf; //!< sum of x and y gradient products
/* Extra buffers that are useful if patch mean-normalization is used: */
UMat u_I0x_buf; //!< sum of x gradient values
UMat u_I0y_buf; //!< sum of y gradient values
/* Auxiliary buffers used in structure tensor computation: */
UMat u_I0xx_buf_aux;
UMat u_I0yy_buf_aux;
UMat u_I0xy_buf_aux;
UMat u_I0x_buf_aux;
UMat u_I0y_buf_aux;
bool ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy,
UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y);
void ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow);
bool ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow);
bool ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1);
bool ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy,
UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level);
#endif
};
DISOpticalFlowImpl::DISOpticalFlowImpl()
......@@ -1004,6 +1050,319 @@ void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) c
#undef UPDATE_SPARSE_J_COORDINATES
}
#ifdef HAVE_OPENCL
bool DISOpticalFlowImpl::ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy,
UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level)
{
size_t globalSize[] = {(size_t)ws, (size_t)hs};
size_t localSize[] = {16, 16};
int idx;
int num_inner_iter = (int)floor(grad_descent_iter / (float)num_iter);
for (int iter = 0; iter < num_iter; iter++)
{
if (iter == 0)
{
ocl::Kernel k1("dis_patch_inverse_search_fwd_1", ocl::optflow::dis_flow_oclsrc);
size_t global_sz[] = {(size_t)hs * 8};
size_t local_sz[] = {8};
idx = 0;
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux));
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy));
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k1.set(idx, (int)border_size);
idx = k1.set(idx, (int)patch_size);
idx = k1.set(idx, (int)patch_stride);
idx = k1.set(idx, (int)w);
idx = k1.set(idx, (int)h);
idx = k1.set(idx, (int)ws);
idx = k1.set(idx, (int)hs);
idx = k1.set(idx, (int)pyr_level);
idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sx));
idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sy));
if (!k1.run(1, global_sz, local_sz, false))
return false;
ocl::Kernel k2("dis_patch_inverse_search_fwd_2", ocl::optflow::dis_flow_oclsrc);
idx = 0;
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0x));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0y));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf));
idx = k2.set(idx, (int)border_size);
idx = k2.set(idx, (int)patch_size);
idx = k2.set(idx, (int)patch_stride);
idx = k2.set(idx, (int)w);
idx = k2.set(idx, (int)h);
idx = k2.set(idx, (int)ws);
idx = k2.set(idx, (int)hs);
idx = k2.set(idx, (int)num_inner_iter);
idx = k2.set(idx, (int)pyr_level);
idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx));
idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy));
if (!k2.run(2, globalSize, localSize, false))
return false;
}
else
{
ocl::Kernel k3("dis_patch_inverse_search_bwd_1", ocl::optflow::dis_flow_oclsrc);
size_t global_sz[] = {(size_t)hs * 8};
size_t local_sz[] = {8};
idx = 0;
idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k3.set(idx, (int)border_size);
idx = k3.set(idx, (int)patch_size);
idx = k3.set(idx, (int)patch_stride);
idx = k3.set(idx, (int)w);
idx = k3.set(idx, (int)h);
idx = k3.set(idx, (int)ws);
idx = k3.set(idx, (int)hs);
idx = k3.set(idx, (int)pyr_level);
idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx));
idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy));
if (!k3.run(1, global_sz, local_sz, false))
return false;
ocl::Kernel k4("dis_patch_inverse_search_bwd_2", ocl::optflow::dis_flow_oclsrc);
idx = 0;
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0x));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0y));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf));
idx = k4.set(idx, (int)border_size);
idx = k4.set(idx, (int)patch_size);
idx = k4.set(idx, (int)patch_stride);
idx = k4.set(idx, (int)w);
idx = k4.set(idx, (int)h);
idx = k4.set(idx, (int)ws);
idx = k4.set(idx, (int)hs);
idx = k4.set(idx, (int)num_inner_iter);
idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx));
idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy));
if (!k4.run(2, globalSize, localSize, false))
return false;
}
}
return true;
}
bool DISOpticalFlowImpl::ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1)
{
size_t globalSize[] = {(size_t)w, (size_t)h};
size_t localSize[] = {16, 16};
ocl::Kernel kernel("dis_densification", ocl::optflow::dis_flow_oclsrc);
kernel.args(ocl::KernelArg::PtrReadOnly(src_Sx),
ocl::KernelArg::PtrReadOnly(src_Sy),
ocl::KernelArg::PtrReadOnly(_I0),
ocl::KernelArg::PtrReadOnly(_I1),
(int)patch_size, (int)patch_stride,
(int)w, (int)h, (int)ws,
ocl::KernelArg::PtrWriteOnly(dst_Ux),
ocl::KernelArg::PtrWriteOnly(dst_Uy));
return kernel.run(2, globalSize, localSize, false);
}
void DISOpticalFlowImpl::ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow)
{
u_I0s.resize(coarsest_scale + 1);
u_I1s.resize(coarsest_scale + 1);
u_I1s_ext.resize(coarsest_scale + 1);
u_I0xs.resize(coarsest_scale + 1);
u_I0ys.resize(coarsest_scale + 1);
u_Ux.resize(coarsest_scale + 1);
u_Uy.resize(coarsest_scale + 1);
vector<UMat> flow_uv(2);
if (use_flow)
{
split(flow, flow_uv);
u_initial_Ux.resize(coarsest_scale + 1);
u_initial_Uy.resize(coarsest_scale + 1);
}
int fraction = 1;
int cur_rows = 0, cur_cols = 0;
for (int i = 0; i <= coarsest_scale; i++)
{
/* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */
if (i == finest_scale)
{
cur_rows = I0.rows / fraction;
cur_cols = I0.cols / fraction;
u_I0s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(I0, u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA);
u_I1s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(I1, u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA);
/* These buffers are reused in each scale so we initialize them once on the finest scale: */
u_Sx.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_Sy.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0x_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0y_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_U.create(cur_rows, cur_cols, CV_32FC2);
}
else if (i > finest_scale)
{
cur_rows = u_I0s[i - 1].rows / 2;
cur_cols = u_I0s[i - 1].cols / 2;
u_I0s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(u_I0s[i - 1], u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA);
u_I1s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(u_I1s[i - 1], u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA);
}
if (i >= finest_scale)
{
u_I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size, CV_8UC1);
copyMakeBorder(u_I1s[i], u_I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE);
u_I0xs[i].create(cur_rows, cur_cols, CV_16SC1);
u_I0ys[i].create(cur_rows, cur_cols, CV_16SC1);
spatialGradient(u_I0s[i], u_I0xs[i], u_I0ys[i]);
u_Ux[i].create(cur_rows, cur_cols, CV_32FC1);
u_Uy[i].create(cur_rows, cur_cols, CV_32FC1);
variational_refinement_processors[i]->setAlpha(variational_refinement_alpha);
variational_refinement_processors[i]->setDelta(variational_refinement_delta);
variational_refinement_processors[i]->setGamma(variational_refinement_gamma);
variational_refinement_processors[i]->setSorIterations(5);
variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter);
if (use_flow)
{
resize(flow_uv[0], u_initial_Ux[i], Size(cur_cols, cur_rows));
divide(u_initial_Ux[i], static_cast<float>(fraction), u_initial_Ux[i]);
resize(flow_uv[1], u_initial_Uy[i], Size(cur_cols, cur_rows));
divide(u_initial_Uy[i], static_cast<float>(fraction), u_initial_Uy[i]);
}
}
fraction *= 2;
}
}
bool DISOpticalFlowImpl::ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy,
UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y)
{
size_t globalSizeX[] = {(size_t)h};
size_t localSizeX[] = {16};
ocl::Kernel kernelX("dis_precomputeStructureTensor_hor", ocl::optflow::dis_flow_oclsrc);
kernelX.args(ocl::KernelArg::PtrReadOnly(I0x),
ocl::KernelArg::PtrReadOnly(I0y),
(int)patch_size, (int)patch_stride,
(int)w, (int)h, (int)ws,
ocl::KernelArg::PtrWriteOnly(u_I0xx_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0yy_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0xy_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0x_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0y_buf_aux));
if (!kernelX.run(1, globalSizeX, localSizeX, false))
return false;
size_t globalSizeY[] = {(size_t)ws};
size_t localSizeY[] = {16};
ocl::Kernel kernelY("dis_precomputeStructureTensor_ver", ocl::optflow::dis_flow_oclsrc);
kernelY.args(ocl::KernelArg::PtrReadOnly(u_I0xx_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0yy_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0xy_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0x_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0y_buf_aux),
(int)patch_size, (int)patch_stride,
(int)w, (int)h, (int)ws,
ocl::KernelArg::PtrWriteOnly(dst_I0xx),
ocl::KernelArg::PtrWriteOnly(dst_I0yy),
ocl::KernelArg::PtrWriteOnly(dst_I0xy),
ocl::KernelArg::PtrWriteOnly(dst_I0x),
ocl::KernelArg::PtrWriteOnly(dst_I0y));
return kernelY.run(1, globalSizeY, localSizeY, false);
}
bool DISOpticalFlowImpl::ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow)
{
UMat I0Mat = I0.getUMat();
UMat I1Mat = I1.getUMat();
bool use_input_flow = false;
if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2)
use_input_flow = true;
else
flow.create(I1Mat.size(), CV_32FC2);
UMat &u_flowMat = flow.getUMatRef();
coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1;
ocl_prepareBuffers(I0Mat, I1Mat, u_flowMat, use_input_flow);
u_Ux[coarsest_scale].setTo(0.0f);
u_Uy[coarsest_scale].setTo(0.0f);
for (int i = coarsest_scale; i >= finest_scale; i--)
{
w = u_I0s[i].cols;
h = u_I0s[i].rows;
ws = 1 + (w - patch_size) / patch_stride;
hs = 1 + (h - patch_size) / patch_stride;
if (!ocl_precomputeStructureTensor(u_I0xx_buf, u_I0yy_buf, u_I0xy_buf,
u_I0x_buf, u_I0y_buf, u_I0xs[i], u_I0ys[i]))
return false;
if (!ocl_PatchInverseSearch(u_Ux[i], u_Uy[i], u_I0s[i], u_I1s_ext[i], u_I0xs[i], u_I0ys[i], 2, i))
return false;
if (!ocl_Densification(u_Ux[i], u_Uy[i], u_Sx, u_Sy, u_I0s[i], u_I1s[i]))
return false;
if (variational_refinement_iter > 0)
variational_refinement_processors[i]->calcUV(u_I0s[i], u_I1s[i],
u_Ux[i].getMat(ACCESS_WRITE), u_Uy[i].getMat(ACCESS_WRITE));
if (i > finest_scale)
{
resize(u_Ux[i], u_Ux[i - 1], u_Ux[i - 1].size());
resize(u_Uy[i], u_Uy[i - 1], u_Uy[i - 1].size());
multiply(u_Ux[i - 1], 2, u_Ux[i - 1]);
multiply(u_Uy[i - 1], 2, u_Uy[i - 1]);
}
}
vector<UMat> uxy(2);
uxy[0] = u_Ux[finest_scale];
uxy[1] = u_Uy[finest_scale];
merge(uxy, u_U);
resize(u_U, u_flowMat, u_flowMat.size());
multiply(u_flowMat, 1 << finest_scale, u_flowMat);
return true;
}
#endif
void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
{
CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1);
......@@ -1012,6 +1371,10 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
CV_Assert(I0.isContinuous());
CV_Assert(I1.isContinuous());
CV_OCL_RUN(ocl::Device::getDefault().isIntel() && flow.isUMat() &&
(patch_size == 8) && (use_spatial_propagation == true),
ocl_calc(I0, I1, flow));
Mat I0Mat = I0.getMat();
Mat I1Mat = I1.getMat();
bool use_input_flow = false;
......@@ -1019,7 +1382,7 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
use_input_flow = true;
else
flow.create(I1Mat.size(), CV_32FC2);
Mat &flowMat = flow.getMatRef();
Mat flowMat = flow.getMat();
coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1;
int num_stripes = getNumThreads();
......@@ -1088,6 +1451,25 @@ void DISOpticalFlowImpl::collectGarbage()
I0yy_buf_aux.release();
I0xy_buf_aux.release();
#ifdef HAVE_OPENCL
u_I0s.clear();
u_I1s.clear();
u_I1s_ext.clear();
u_I0xs.clear();
u_I0ys.clear();
u_Ux.clear();
u_Uy.clear();
u_U.release();
u_Sx.release();
u_Sy.release();
u_I0xx_buf.release();
u_I0yy_buf.release();
u_I0xy_buf.release();
u_I0xx_buf_aux.release();
u_I0yy_buf_aux.release();
u_I0xy_buf_aux.release();
#endif
for (int i = finest_scale; i <= coarsest_scale; i++)
variational_refinement_processors[i]->collectGarbage();
variational_refinement_processors.clear();
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#define EPS 0.001f
#define INF 1E+10F
__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x,
__global const short *I0y,
int patch_size, int patch_stride,
int w, int h, int ws,
__global float *I0xx_aux_ptr,
__global float *I0yy_aux_ptr,
__global float *I0xy_aux_ptr,
__global float *I0x_aux_ptr,
__global float *I0y_aux_ptr)
{
int i = get_global_id(0);
if (i >= h) return;
const __global short *x_row = I0x + i * w;
const __global short *y_row = I0y + i * w;
float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f;
float8 x_vec = convert_float8(vload8(0, x_row));
float8 y_vec = convert_float8(vload8(0, y_row));
sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi);
sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi);
sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi);
sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f);
sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f);
I0xx_aux_ptr[i * ws] = sum_xx;
I0yy_aux_ptr[i * ws] = sum_yy;
I0xy_aux_ptr[i * ws] = sum_xy;
I0x_aux_ptr[i * ws] = sum_x;
I0y_aux_ptr[i * ws] = sum_y;
int js = 1;
for (int j = patch_size; j < w; j++)
{
short x_val1 = x_row[j];
short x_val2 = x_row[j - patch_size];
short y_val1 = y_row[j];
short y_val2 = y_row[j - patch_size];
sum_xx += (x_val1 * x_val1 - x_val2 * x_val2);
sum_yy += (y_val1 * y_val1 - y_val2 * y_val2);
sum_xy += (x_val1 * y_val1 - x_val2 * y_val2);
sum_x += (x_val1 - x_val2);
sum_y += (y_val1 - y_val2);
if ((j - patch_size + 1) % patch_stride == 0)
{
int index = i * ws + js;
I0xx_aux_ptr[index] = sum_xx;
I0yy_aux_ptr[index] = sum_yy;
I0xy_aux_ptr[index] = sum_xy;
I0x_aux_ptr[index] = sum_x;
I0y_aux_ptr[index] = sum_y;
js++;
}
}
}
__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr,
__global const float *I0yy_aux_ptr,
__global const float *I0xy_aux_ptr,
__global const float *I0x_aux_ptr,
__global const float *I0y_aux_ptr,
int patch_size, int patch_stride,
int w, int h, int ws,
__global float *I0xx_ptr,
__global float *I0yy_ptr,
__global float *I0xy_ptr,
__global float *I0x_ptr,
__global float *I0y_ptr)
{
int j = get_global_id(0);
if (j >= ws) return;
float sum_xx, sum_yy, sum_xy, sum_x, sum_y;
sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f;
for (int i = 0; i < patch_size; i++)
{
sum_xx += I0xx_aux_ptr[i * ws + j];
sum_yy += I0yy_aux_ptr[i * ws + j];
sum_xy += I0xy_aux_ptr[i * ws + j];
sum_x += I0x_aux_ptr[i * ws + j];
sum_y += I0y_aux_ptr[i * ws + j];
}
I0xx_ptr[j] = sum_xx;
I0yy_ptr[j] = sum_yy;
I0xy_ptr[j] = sum_xy;
I0x_ptr[j] = sum_x;
I0y_ptr[j] = sum_y;
int is = 1;
for (int i = patch_size; i < h; i++)
{
sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]);
sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]);
sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]);
sum_x += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]);
sum_y += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]);
if ((i - patch_size + 1) % patch_stride == 0)
{
I0xx_ptr[is * ws + j] = sum_xx;
I0yy_ptr[is * ws + j] = sum_yy;
I0xy_ptr[is * ws + j] = sum_xy;
I0x_ptr[is * ws + j] = sum_x;
I0y_ptr[is * ws + j] = sum_y;
is++;
}
}
}
__kernel void dis_densification(__global const float *sx, __global const float *sy,
__global const uchar *i0, __global const uchar *i1,
int psz, int pstr,
int w, int h, int ws,
__global float *ux, __global float *uy)
{
int x = get_global_id(0);
int y = get_global_id(1);
int i, j;
if (x >= w || y >= h) return;
int start_is, end_is;
int start_js, end_js;
end_is = min(y / pstr, (h - psz) / pstr);
start_is = max(0, y - psz + pstr) / pstr;
start_is = min(start_is, end_is);
end_js = min(x / pstr, (w - psz) / pstr);
start_js = max(0, x - psz + pstr) / pstr;
start_js = min(start_js, end_js);
float coef, sum_coef = 0.0f;
float sum_Ux = 0.0f;
float sum_Uy = 0.0f;
int i_l, i_u;
int j_l, j_u;
float i_m, j_m, diff;
i = y;
j = x;
/* Iterate through all the patches that overlap the current location (i,j) */
for (int is = start_is; is <= end_is; is++)
for (int js = start_js; js <= end_js; js++)
{
float sx_val = sx[is * ws + js];
float sy_val = sy[is * ws + js];
uchar2 i1_vec1, i1_vec2;
j_m = min(max(j + sx_val, 0.0f), w - 1.0f - EPS);
i_m = min(max(i + sy_val, 0.0f), h - 1.0f - EPS);
j_l = (int)j_m;
j_u = j_l + 1;
i_l = (int)i_m;
i_u = i_l + 1;
i1_vec1 = vload2(0, i1 + i_u * w + j_l);
i1_vec2 = vload2(0, i1 + i_l * w + j_l);
diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y +
(j_u - j_m) * (i_m - i_l) * i1_vec1.x +
(j_m - j_l) * (i_u - i_m) * i1_vec2.y +
(j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j];
coef = 1 / max(1.0f, fabs(diff));
sum_Ux += coef * sx_val;
sum_Uy += coef * sy_val;
sum_coef += coef;
}
ux[i * w + j] = sum_Ux / sum_coef;
uy[i * w + j] = sum_Uy / sum_coef;
}
#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \
j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \
\
w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \
w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \
w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \
w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1);
float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
int I0_stride, int I1_stride,
float w00, float w01, float w10, float w11, int patch_sz, int i)
{
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
int n = patch_sz * patch_sz;
uchar8 I1_vec1, I1_vec2, I0_vec;
uchar I1_val1, I1_val2;
I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = vload8(0, I1_ptr + i * I1_stride);
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = I1_ptr[i * I1_stride + 8];
I1_val2 = I1_ptr[(i + 1) * I1_stride + 8];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
sum_diff = sub_group_reduce_add(sum_diff);
sum_diff_sq = sub_group_reduce_add(sum_diff_sq);
return sum_diff_sq - sum_diff * sum_diff / n;
}
__kernel void dis_patch_inverse_search_fwd_1(__global const float *Ux_ptr, __global const float *Uy_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int id = get_global_id(0);
int is = id / 8;
if (id >= (hs * 8)) return;
int i = is * patch_stride;
int j = 0;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float i_I1, j_I1, w00, w01, w10, w11;
float prev_Ux = Ux_ptr[(i + psz2) * w + j + psz2];
float prev_Uy = Uy_ptr[(i + psz2) * w + j + psz2];
Sx_ptr[is * ws] = prev_Ux;
Sy_ptr[is * ws] = prev_Uy;
j += patch_stride;
int sid = get_sub_group_local_id();
for (int js = 1; js < ws; js++, j += patch_stride)
{
float min_SSD, cur_SSD;
float Ux = Ux_ptr[(i + psz2) * w + j + psz2];
float Uy = Uy_ptr[(i + psz2) * w + j + psz2];
INIT_BILINEAR_WEIGHTS(Ux, Uy);
min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
INIT_BILINEAR_WEIGHTS(prev_Ux, prev_Uy);
cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
if (cur_SSD < min_SSD)
{
Ux = prev_Ux;
Uy = prev_Uy;
}
prev_Ux = Ux;
prev_Uy = Uy;
Sx_ptr[is * ws + js] = Ux;
Sy_ptr[is * ws + js] = Uy;
}
}
float3 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
const __global short *I0x_ptr, const __global short *I0y_ptr,
int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz, float x_grad_sum, float y_grad_sum)
{
float sum_diff = 0.0, sum_diff_sq = 0.0;
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
int n = patch_sz * patch_sz;
uchar8 I1_vec1, I1_vec2;
uchar I1_val1, I1_val2;
for (int i = 0; i < 8; i++)
{
uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = (i == 0) ? vload8(0, I1_ptr + i * I1_stride) : I1_vec2;
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = (i == 0) ? I1_ptr[i * I1_stride + patch_sz] : I1_val2;
I1_val2 = I1_ptr[(i + 1) * I1_stride + patch_sz];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride);
short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride);
sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo));
sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi));
sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo));
sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi));
}
float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n;
float SSD = sum_diff_sq - sum_diff * sum_diff / n;
return (float3)(SSD, dst_dUx, dst_dUy);
}
__kernel void dis_patch_inverse_search_fwd_2(__global const float *Ux_ptr, __global const float *Uy_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int num_inner_iter, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
int i = is * patch_stride;
int j = js * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
int index = is * ws + js;
if (js >= ws || is >= hs) return;
float Ux = Sx_ptr[index];
float Uy = Sy_ptr[index];
float cur_Ux = Ux;
float cur_Uy = Uy;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
if (fabs(detH) < EPS) detH = EPS;
float invH11 = cur_yy / detH;
float invH12 = -cur_xy / detH;
float invH22 = cur_xx / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
float3 res;
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
res = processPatchMeanNorm(I0_ptr + i * w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j,
I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
SSD = res.x;
dUx = res.y;
dUy = res.z;
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy);
if (dot(vec, vec) <= (float)(psz * psz))
{
Sx_ptr[index] = cur_Ux;
Sy_ptr[index] = cur_Uy;
}
}
__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int id = get_global_id(0);
int is = id / 8;
if (id >= (hs * 8)) return;
is = (hs - 1 - is);
int i = is * patch_stride;
int j = (ws - 2) * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float i_I1, j_I1, w00, w01, w10, w11;
int sid = get_sub_group_local_id();
for (int js = (ws - 2); js > -1; js--, j -= patch_stride)
{
float min_SSD, cur_SSD;
float2 Ux = vload2(0, Sx_ptr + is * ws + js);
float2 Uy = vload2(0, Sy_ptr + is * ws + js);
INIT_BILINEAR_WEIGHTS(Ux.x, Uy.x);
min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
INIT_BILINEAR_WEIGHTS(Ux.y, Uy.y);
cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
if (cur_SSD < min_SSD)
{
Sx_ptr[is * ws + js] = Ux.y;
Sy_ptr[is * ws + js] = Uy.y;
}
}
}
__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int num_inner_iter,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
if (js >= ws || is >= hs) return;
js = (ws - 1 - js);
is = (hs - 1 - is);
int j = js * patch_stride;
int i = is * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
int index = is * ws + js;
float Ux = Sx_ptr[index];
float Uy = Sy_ptr[index];
float cur_Ux = Ux;
float cur_Uy = Uy;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
if (fabs(detH) < EPS) detH = EPS;
float invH11 = cur_yy / detH;
float invH12 = -cur_xy / detH;
float invH22 = cur_xx / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
float3 res;
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
res = processPatchMeanNorm(I0_ptr + i * w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j,
I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
SSD = res.x;
dUx = res.y;
dUy = res.z;
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy);
if ((dot(vec, vec)) <= (float)(psz * psz))
{
Sx_ptr[index] = cur_Ux;
Sy_ptr[index] = cur_Uy;
}
}
......@@ -45,6 +45,8 @@ the use of this software, even if advised of the possibility of such damage.
#include <opencv2/optflow.hpp>
#include <opencv2/video.hpp>
#include <opencv2/imgproc.hpp>
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#include <algorithm>
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
using namespace cv;
using namespace optflow;
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(OCL_DenseOpticalFlow_DIS, int)
{
int preset;
virtual void SetUp()
{
preset = GET_PARAM(0);
}
};
OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat)
{
Mat frame1, frame2, GT;
frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png");
frame2 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale2.png");
CV_Assert(!frame1.empty() && !frame2.empty());
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
Ptr<DenseOpticalFlow> algo;
// iterate over presets:
for (int i = 0; i < test_loop_times; i++)
{
Mat flow;
UMat ocl_flow;
algo = createOptFlow_DIS(preset);
OCL_OFF(algo->calc(frame1, frame2, flow));
OCL_ON(algo->calc(frame1, frame2, ocl_flow));
ASSERT_EQ(flow.rows, ocl_flow.rows);
ASSERT_EQ(flow.cols, ocl_flow.cols);
EXPECT_MAT_SIMILAR(flow, ocl_flow, 6e-3);
}
}
OCL_INSTANTIATE_TEST_CASE_P(Contrib, OCL_DenseOpticalFlow_DIS,
Values(DISOpticalFlow::PRESET_ULTRAFAST,
DISOpticalFlow::PRESET_FAST,
DISOpticalFlow::PRESET_MEDIUM));
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment