Commit c57e427f authored by vbystricky's avatar vbystricky

Move OpticalFlowPyrLK from ocl module to video module

parent ac3f06bc
......@@ -60,6 +60,7 @@ class CV_EXPORTS Program;
class CV_EXPORTS ProgramSource2;
class CV_EXPORTS Queue;
class CV_EXPORTS PlatformInfo2;
class CV_EXPORTS Image2D;
class CV_EXPORTS Device
{
......@@ -89,6 +90,7 @@ public:
String vendor() const;
String OpenCL_C_Version() const;
String OpenCLVersion() const;
String deviceVersion() const;
String driverVersion() const;
void* ptr() const;
......@@ -325,6 +327,7 @@ public:
const String& buildopts, String* errmsg=0);
int set(int i, const void* value, size_t sz);
int set(int i, const Image2D& image2D);
int set(int i, const UMat& m);
int set(int i, const KernelArg& arg);
template<typename _Tp> int set(int i, const _Tp& value)
......@@ -574,6 +577,19 @@ CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo2>& platform_info);
class CV_EXPORTS Image2D
{
public:
Image2D();
Image2D(const UMat &src);
~Image2D();
void* ptr() const;
protected:
struct Impl;
Impl* p;
};
}}
#endif
......@@ -1705,6 +1705,9 @@ String Device::OpenCL_C_Version() const
String Device::OpenCLVersion() const
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
String Device::deviceVersion() const
{ return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
String Device::driverVersion() const
{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); }
......@@ -2689,6 +2692,12 @@ int Kernel::set(int i, const void* value, size_t sz)
return i+1;
}
int Kernel::set(int i, const Image2D& image2D)
{
cl_mem h = (cl_mem)image2D.ptr();
return set(i, &h, sizeof(h));
}
int Kernel::set(int i, const UMat& m)
{
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
......@@ -3785,4 +3794,149 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
return buf;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// deviceVersion has format
// OpenCL<space><major_version.minor_version><space><vendor-specific information>
// by specification
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
{
major = minor = 0;
if (10 >= deviceVersion.length())
return;
const char *pstr = deviceVersion.c_str();
if (0 != strncmp(pstr, "OpenCL ", 7))
return;
size_t ppos = deviceVersion.find('.', 7);
if (String::npos == ppos)
return;
String temp = deviceVersion.substr(7, ppos - 7);
major = atoi(temp.c_str());
temp = deviceVersion.substr(ppos + 1);
minor = atoi(temp.c_str());
}
struct Image2D::Impl
{
Impl(const UMat &src)
{
init(src);
}
~Impl()
{
if (handle)
clReleaseMemObject(handle);
}
void init(const UMat &src)
{
cl_image_format format;
int err;
int depth = src.depth();
int channels = src.channels();
switch(depth)
{
case CV_8U:
format.image_channel_data_type = CL_UNSIGNED_INT8;
break;
case CV_32S:
format.image_channel_data_type = CL_UNSIGNED_INT32;
break;
case CV_32F:
format.image_channel_data_type = CL_FLOAT;
break;
default:
CV_Error(-1, "Image forma is not supported");
break;
}
switch(channels)
{
case 1:
format.image_channel_order = CL_R;
break;
case 3:
format.image_channel_order = CL_RGB;
break;
case 4:
format.image_channel_order = CL_RGBA;
break;
default:
CV_Error(-1, "Image format is not supported");
break;
}
#ifdef CL_VERSION_1_2
//this enables backwards portability to
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
int minor, major;
parseDeviceVersion(Device::getDefault().deviceVersion(), major, minor);
if ((1 < major) || ((1 == major) && (2 <= minor)))
{
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = src.cols;
desc.image_height = src.rows;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
handle = clCreateImage((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
}
else
#endif
{
handle = clCreateImage2D((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
}
size_t origin[] = { 0, 0, 0 };
size_t region[] = { src.cols, src.rows, 1 };
cl_mem devData;
if (!src.isContinuous())
{
devData = clCreateBuffer((cl_context)Context2::getDefault().ptr(), CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, NULL);
const size_t roi[3] = {src.cols * src.elemSize(), src.rows, 1};
clEnqueueCopyBufferRect((cl_command_queue)Queue::getDefault().ptr(), (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL);
clFlush((cl_command_queue)Queue::getDefault().ptr());
}
else
{
devData = (cl_mem)src.handle(ACCESS_READ);
}
clEnqueueCopyBufferToImage((cl_command_queue)Queue::getDefault().ptr(), devData, handle, 0, origin, region, 0, NULL, 0);
if (!src.isContinuous())
{
clFlush((cl_command_queue)Queue::getDefault().ptr());
clReleaseMemObject(devData);
}
}
IMPLEMENT_REFCOUNTABLE();
cl_mem handle;
};
Image2D::Image2D()
{
p = NULL;
}
Image2D::Image2D(const UMat &src)
{
p = new Impl(src);
}
Image2D::~Image2D()
{
if (p)
p->release();
}
void* Image2D::ptr() const
{
return p ? p->handle : 0;
}
}}
......@@ -43,6 +43,7 @@
#include <float.h>
#include <stdio.h>
#include "lkpyramid.hpp"
#include "opencl_kernels.hpp"
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
......@@ -590,6 +591,231 @@ int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Si
return maxLevel;
}
namespace cv
{
class PyrLKOpticalFlow
{
struct dim3
{
unsigned int x, y, z;
};
public:
PyrLKOpticalFlow()
{
winSize = Size(21, 21);
maxLevel = 3;
iters = 30;
derivLambda = 0.5;
useInitialFlow = false;
//minEigThreshold = 1e-4f;
//getMinEigenVals = false;
}
bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
{
if (prevPts.empty())
{
nextPts.release();
status.release();
return false;
}
derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
if (derivLambda < 0)
return false;
if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
return false;
iters = std::min(std::max(iters, 0), 100);
if (prevPts.rows != 1 || prevPts.type() != CV_32FC2)
return false;
dim3 patch;
calcPatchSize(patch);
if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
return false;
if (!initWaveSize())
return false;
if (useInitialFlow)
{
if (nextPts.size() != prevPts.size() || nextPts.type() != CV_32FC2)
return false;
}
else
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
UMat temp2 = nextPts.reshape(1);
multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1));
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err);
// build the image pyramids.
std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);
prevImg.convertTo(prevPyr[0], CV_32F);
nextImg.convertTo(nextPyr[0], CV_32F);
for (int level = 1; level <= maxLevel; ++level)
{
pyrDown(prevPyr[level - 1], prevPyr[level]);
pyrDown(nextPyr[level - 1], nextPyr[level]);
}
// dI/dx ~ Ix, dI/dy ~ Iy
for (int level = maxLevel; level >= 0; level--)
{
lkSparse_run(prevPyr[level], nextPyr[level],
prevPts, nextPts, status, err, prevPts.cols,
level, patch);
}
return true;
}
Size winSize;
int maxLevel;
int iters;
double derivLambda;
bool useInitialFlow;
//float minEigThreshold;
//bool getMinEigenVals;
private:
void calcPatchSize(dim3 &patch)
{
dim3 block;
//winSize.width *= cn;
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
{
block.x = 32;
block.y = 8;
}
else
{
block.x = 16;
block.y = 16;
}
patch.x = (winSize.width + block.x - 1) / block.x;
patch.y = (winSize.height + block.y - 1) / block.y;
block.z = patch.z = 1;
}
private:
int waveSize;
bool initWaveSize()
{
waveSize = 1;
if (isDeviceCPU())
return true;
ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
return false;
waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
return true;
}
bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
int ptcount, int level, dim3 patch)
{
size_t localThreads[3] = { 8, 8};
size_t globalThreads[3] = { 8 * ptcount, 8};
char calcErr = (0 == level) ? 1 : 0;
cv::String build_options;
if (isDeviceCPU())
build_options = " -D CPU";
else
build_options = cv::format("-D WAVE_SIZE=%d", waveSize);
ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
return false;
ocl::Image2D imageI(I);
ocl::Image2D imageJ(J);
int idxArg = 0;
idxArg = kernel.set(idxArg, imageI); //image2d_t I
idxArg = kernel.set(idxArg, imageJ); //image2d_t J
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
idxArg = kernel.set(idxArg, (int)prevPts.step); // int prevPtsStep
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
idxArg = kernel.set(idxArg, (int)nextPts.step); // int nextPtsStep
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
idxArg = kernel.set(idxArg, (int)level); // const int level
idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
idxArg = kernel.set(idxArg, (int)iters); // int c_iters
idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
return kernel.run(2, globalThreads, localThreads, true);
}
private:
inline static bool isDeviceCPU()
{
return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
}
inline static void ensureSizeIsEnough(int rows, int cols, int type, UMat &m)
{
if (m.type() == type && m.rows >= rows && m.cols >= cols)
m = m(Rect(0, 0, cols, rows));
else
m.create(rows, cols, type);
}
};
bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err,
Size winSize, int maxLevel,
TermCriteria criteria,
int flags/*, double minEigThreshold*/ )
{
if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
return false;
if (!cv::ocl::Device::getDefault().imageSupport())
return false;
if (_nextImg.size() != _prevImg.size())
return false;
int typePrev = _prevImg.type();
int typeNext = _nextImg.type();
if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
return false;
if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
return false;
PyrLKOpticalFlow opticalFlow;
opticalFlow.winSize = winSize;
opticalFlow.maxLevel = maxLevel;
opticalFlow.iters = criteria.maxCount;
opticalFlow.derivLambda = criteria.epsilon;
opticalFlow.useInitialFlow = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
UMat umatErr;
if (_err.needed())
{
_err.create(_prevPts.size(), CV_8UC1);
umatErr = _err.getUMat();
}
_nextPts.create(_prevPts.size(), _prevPts.type());
_status.create(_prevPts.size(), CV_8UC1);
UMat umatNextPts = _nextPts.getUMat();
UMat umatStatus = _status.getUMat();
return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
}
};
void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err,
......@@ -597,6 +823,10 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
TermCriteria criteria,
int flags, double minEigThreshold )
{
bool use_opencl = ocl::useOpenCL() && (_prevImg.isUMat() || _nextImg.isUMat());
if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
return;
Mat prevPtsMat = _prevPts.getMat();
const int derivDepth = DataType<cv::detail::deriv_type>::depth;
......
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
/////////////////////////////////////////////////////////////////////////////////////////////////
// PyrLKOpticalFlow
PARAM_TEST_CASE(PyrLKOpticalFlow, int, int)
{
Size winSize;
int maxLevel;
TermCriteria criteria;
int flags;
double minEigThreshold;
virtual void SetUp()
{
winSize = Size(GET_PARAM(0), GET_PARAM(0));
maxLevel = GET_PARAM(1);
criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01);
flags = 0;
minEigThreshold = 1e-4f;
}
};
OCL_TEST_P(PyrLKOpticalFlow, Mat)
{
cv::Mat frame0 = readImage("optflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
UMat umatFrame0; frame0.copyTo(umatFrame0);
cv::Mat frame1 = readImage("optflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
UMat umatFrame1; frame1.copyTo(umatFrame1);
std::vector<cv::Point2f> pts;
cv::goodFeaturesToTrack(frame0, pts, 1000, 0.01, 0.0);
std::vector<cv::Point2f> nextPtsCPU;
std::vector<unsigned char> statusCPU;
std::vector<float> errCPU;
OCL_OFF(cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPtsCPU, statusCPU, errCPU, winSize, maxLevel, criteria, flags, minEigThreshold));
UMat umatNextPts, umatStatus, umatErr;
OCL_ON(cv::calcOpticalFlowPyrLK(umatFrame0, umatFrame1, pts, umatNextPts, umatStatus, umatErr, winSize, maxLevel, criteria, flags, minEigThreshold));
std::vector<cv::Point2f> nextPts(umatNextPts.cols); umatNextPts.copyTo(nextPts);
std::vector<unsigned char> status; umatStatus.copyTo(status);
std::vector<float> err; umatErr.copyTo(err);
ASSERT_EQ(nextPtsCPU.size(), nextPts.size());
ASSERT_EQ(statusCPU.size(), status.size());
size_t mistmatch = 0;
for (size_t i = 0; i < nextPts.size(); ++i)
{
if (status[i] != statusCPU[i])
{
++mistmatch;
continue;
}
if (status[i])
{
cv::Point2i a = nextPts[i];
cv::Point2i b = nextPtsCPU[i];
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
float errdiff = 0.0f;
if (!eq || errdiff > 1e-1)
++mistmatch;
}
}
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size());
ASSERT_LE(bad_ratio, 0.02f);
}
OCL_INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlow,
Combine(
Values(21, 25),
Values(3, 5)
)
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment