Commit 0239c195 authored by Juan María Gómez López's avatar Juan María Gómez López Committed by Alexander Alekhin

Merge pull request #11060 from juanecito:2.4

* Thread-safe version of sparse function in cv::gpu::PyrLKOpticalFlow
class. The new function name is sparse_multi

* Thread-safe sparse function in cv::gpu::PyrLKOpticalFlow. Tests

* Thread-safe sparse function in cv::gpu::PyrLKOpticalFlow class.

Add intel_TBB conditional compilation
parent a32aec5b
...@@ -60,6 +60,10 @@ ...@@ -60,6 +60,10 @@
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif #endif
#if !defined(HAVE_TBB)
#define throw_notbb() CV_Error(CV_StsNotImplemented, "The library is compiled without TBB support")
#endif
namespace cv { namespace gpu { namespace cv { namespace gpu {
//////////////////////////////// CudaMem //////////////////////////////// //////////////////////////////// CudaMem ////////////////////////////////
...@@ -1824,6 +1828,14 @@ public: ...@@ -1824,6 +1828,14 @@ public:
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, GpuMat* err = 0); GpuMat& status, GpuMat* err = 0);
#if !defined(HAVE_TBB)
void sparse_multi(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&,
GpuMat&, Stream&, GpuMat*) {throw_notbb();}
#else
void sparse_multi(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, Stream& stream, GpuMat* err = 0);
#endif
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0); void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
void releaseMemory(); void releaseMemory();
......
...@@ -303,6 +303,88 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse ...@@ -303,6 +303,88 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse
} }
} }
//////////////////////////////////////////////////////
// PyrLKOpticalFlowSparseMulti
#ifdef HAVE_TBB
DEF_PARAM_TEST(ImagePair_Gray_NPts_WinSz_Levels_Iters, pair_string, bool, int, int, int, int);
PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparseMulti,
Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
Bool(),
Values(8000),
Values(21),
Values(1, 3),
Values(1, 30)))
{
declare.time(20.0);
const pair_string imagePair = GET_PARAM(0);
const bool useGray = GET_PARAM(1);
const int points = GET_PARAM(2);
const int winSize = GET_PARAM(3);
const int levels = GET_PARAM(4);
const int iters = GET_PARAM(5);
const cv::Mat frame0 = readImage(imagePair.first, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame0.empty());
const cv::Mat frame1 = readImage(imagePair.second, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame1.empty());
cv::Mat gray_frame;
if (useGray)
gray_frame = frame0;
else
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
cv::Mat pts;
cv::goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
cv::gpu::PyrLKOpticalFlow d_pyrLK;
d_pyrLK.winSize = cv::Size(winSize, winSize);
d_pyrLK.maxLevel = levels - 1;
d_pyrLK.iters = iters;
const cv::gpu::GpuMat d_frame0(frame0);
const cv::gpu::GpuMat d_frame1(frame1);
cv::gpu::GpuMat nextPts;
cv::gpu::GpuMat status;
cv::gpu::Stream stream;
TEST_CYCLE()
{
d_pyrLK.sparse_multi(d_frame0, d_frame1, d_pts, nextPts, status, stream);
stream.waitForCompletion();
}
GPU_SANITY_CHECK(nextPts);
GPU_SANITY_CHECK(status);
}
else
{
cv::Mat nextPts;
cv::Mat status;
TEST_CYCLE()
{
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
cv::Size(winSize, winSize), levels - 1,
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
}
CPU_SANITY_CHECK(nextPts);
CPU_SANITY_CHECK(status);
}
}
#endif // HAVE_TBB
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// PyrLKOpticalFlowDense // PyrLKOpticalFlowDense
......
This diff is collapsed.
...@@ -42,6 +42,11 @@ ...@@ -42,6 +42,11 @@
#include "precomp.hpp" #include "precomp.hpp"
#ifdef HAVE_TBB
#include <tbb/compat/condition_variable>
#include <tbb/mutex.h>
#endif
using namespace std; using namespace std;
using namespace cv; using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
...@@ -64,6 +69,22 @@ namespace pyrlk ...@@ -64,6 +69,22 @@ namespace pyrlk
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0); int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
#if !defined(HAVE_TBB)
void loadConstants_multi(int2, int, int, cudaStream_t) { throw_notbb(); }
void sparse1_multi(PtrStepSzf, PtrStepSzf, const float2*, float2*, uchar*, float*, int,
int, dim3, dim3, cudaStream_t, int) { throw_notbb(); }
void sparse4_multi(PtrStepSz<float4>, PtrStepSz<float4>, const float2*, float2*, uchar*, float*, int,
int, dim3, dim3, cudaStream_t, int) { throw_notbb(); }
#else
void loadConstants_multi(int2 winSize, int iters, int index = 0, cudaStream_t stream = 0);
void sparse1_multi(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0, int index = 0);
void sparse4_multi(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0, int index = 0);
#endif
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
PtrStepSzf err, int2 winSize, cudaStream_t stream = 0); PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
} }
...@@ -98,7 +119,9 @@ namespace ...@@ -98,7 +119,9 @@ namespace
} }
} }
void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err) void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg,
const GpuMat& nextImg, const GpuMat& prevPts,
GpuMat& nextPts, GpuMat& status, GpuMat* err)
{ {
if (prevPts.empty()) if (prevPts.empty())
{ {
...@@ -181,6 +204,130 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next ...@@ -181,6 +204,130 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
} }
} }
#ifdef HAVE_TBB
//--------------------------------------------------------------------------
// Multi-threading support
static bool index_vector_use[5] = {true, true, true, true, true}; // all free
static tbb::mutex s_PyrLKOpticalFlow_Mutex;
static condition_variable s_PyrLKOpticalFlow_ConditionVariable;
void cv::gpu::PyrLKOpticalFlow::sparse_multi(const GpuMat& prevImg,
const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, Stream& stream, GpuMat* err)
{
if (prevPts.empty())
{
nextPts.release();
status.release();
if (err) err->release();
return;
}
dim3 block, patch;
calcPatchSize(winSize, block, patch);
CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
CV_Assert(maxLevel >= 0);
CV_Assert(winSize.width > 2 && winSize.height > 2);
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
if (useInitialFlow)
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
else
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
GpuMat temp2 = nextPts.reshape(1);
multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1));
if (err)
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
// build the image pyramids.
prevPyr_.resize(maxLevel + 1);
nextPyr_.resize(maxLevel + 1);
int cn = prevImg.channels();
if (cn == 1 || cn == 4)
{
prevImg.convertTo(prevPyr_[0], CV_32F);
nextImg.convertTo(nextPyr_[0], CV_32F);
}
else
{
buf_.resize(1);
cvtColor(prevImg, buf_[0], COLOR_BGR2BGRA);
buf_[0].convertTo(prevPyr_[0], CV_32F);
cvtColor(nextImg, buf_[0], COLOR_BGR2BGRA);
buf_[0].convertTo(nextPyr_[0], CV_32F);
}
for (int level = 1; level <= maxLevel; ++level)
{
pyrDown(prevPyr_[level - 1], prevPyr_[level]);
pyrDown(nextPyr_[level - 1], nextPyr_[level]);
}
//--------------------------------------------------------------------------
// Multithreading support
int index = -1;
do
{
unique_lock<tbb::mutex> ul(s_PyrLKOpticalFlow_Mutex);
for (unsigned int uiI = 0; uiI < 5; ++uiI)
{
if (index_vector_use[uiI])
{
index = uiI;
index_vector_use[uiI] = false;
break;
}
}
if (index < 0)
s_PyrLKOpticalFlow_ConditionVariable.wait(ul);
ul.unlock();
}while (index < 0);
//--------------------------------------------------------------------------
pyrlk::loadConstants_multi(make_int2(winSize.width, winSize.height), iters, index);
for (int level = maxLevel; level >= 0; level--)
{
if (cn == 1)
{
pyrlk::sparse1_multi(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(),
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch, StreamAccessor::getStream(stream), index);
}
else
{
pyrlk::sparse4_multi(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(),
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch, StreamAccessor::getStream(stream), index);
}
}
unique_lock<tbb::mutex> ul(s_PyrLKOpticalFlow_Mutex);
index_vector_use[index] = true;
s_PyrLKOpticalFlow_ConditionVariable.notify_one();
}
#endif
void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err) void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
{ {
CV_Assert(prevImg.type() == CV_8UC1); CV_Assert(prevImg.type() == CV_8UC1);
......
...@@ -44,6 +44,10 @@ ...@@ -44,6 +44,10 @@
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
#ifdef HAVE_TBB
#include <tbb/tbb.h>
#endif
using namespace cvtest; using namespace cvtest;
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
...@@ -322,6 +326,134 @@ GPU_TEST_P(PyrLKOpticalFlow, Sparse) ...@@ -322,6 +326,134 @@ GPU_TEST_P(PyrLKOpticalFlow, Sparse)
ASSERT_LE(bad_ratio, 0.01); ASSERT_LE(bad_ratio, 0.01);
} }
#ifdef HAVE_TBB
struct Sparse_Multi_Functor
{
explicit Sparse_Multi_Functor(const cv::Mat& in_frame0, const cv::Mat& in_frame1,
const cv::Mat& in_pts_mat,
cv::gpu::GpuMat* in_d_pts,
cv::gpu::GpuMat* in_d_nextPts,
cv::gpu::GpuMat* in_d_status,
cv::gpu::Stream* in_streams):
m_frame0(in_frame0), m_frame1(in_frame1),
m_pts_mat(in_pts_mat),
m_d_pts(in_d_pts), m_d_nextPts(in_d_nextPts),
m_d_status(in_d_status), m_streams(in_streams){}
void operator()( const tbb::blocked_range<size_t>& r ) const
{
for( size_t i = r.begin(); i != r.end(); ++i )
{
m_d_pts[i].upload(m_pts_mat);
cv::gpu::PyrLKOpticalFlow pyrLK;
pyrLK.sparse_multi(loadMat(m_frame0), loadMat(m_frame1), m_d_pts[i],
m_d_nextPts[i], m_d_status[i], m_streams[i]);
m_streams[i].waitForCompletion();
}
}
const cv::Mat& m_frame0;
const cv::Mat& m_frame1;
const cv::Mat& m_pts_mat;
cv::gpu::GpuMat* m_d_pts;
cv::gpu::GpuMat* m_d_nextPts;
cv::gpu::GpuMat* m_d_status;
cv::gpu::Stream* m_streams;
};
GPU_TEST_P(PyrLKOpticalFlow, Sparse_Multi)
{
cv::Mat frame0 = readImage("opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame1.empty());
cv::Mat gray_frame;
if (useGray)
gray_frame = frame0;
else
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
std::vector<cv::Point2f> pts;
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
//--------------------------------------------------------------------------
// GPU
const unsigned int NB_EXEC_LINES = 27;
cv::gpu::GpuMat d_pts[NB_EXEC_LINES];
cv::gpu::GpuMat d_nextPts[NB_EXEC_LINES];
cv::gpu::GpuMat d_status[NB_EXEC_LINES];
cv::gpu::Stream streams[NB_EXEC_LINES];
cv::Mat pts_mat(1, (int) pts.size(), CV_32FC2, (void*) &pts[0]);
tbb::parallel_for(tbb::blocked_range<size_t>(0, NB_EXEC_LINES),
Sparse_Multi_Functor(frame0, frame1, pts_mat,
d_pts, d_nextPts, d_status, streams));
std::vector<cv::Point2f> nextPts[NB_EXEC_LINES];
std::vector<unsigned char> status[NB_EXEC_LINES];
for (unsigned int i = 0; i < NB_EXEC_LINES; ++i)
{
nextPts[i].resize(d_nextPts[i].cols);
cv::Mat nextPts_mat(1, d_nextPts[i].cols, CV_32FC2, (void*) &(nextPts[i][0]));
d_nextPts[i].download(nextPts_mat);
status[i].resize(d_status[i].cols);
cv::Mat status_mat(1, d_status[i].cols, CV_8UC1, (void*) &(status[i][0]));
d_status[i].download(status_mat);
}
//--------------------------------------------------------------------------
// CPU
std::vector<cv::Point2f> nextPts_gold;
std::vector<unsigned char> status_gold;
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts_gold, status_gold, cv::noArray());
//--------------------------------------------------------------------------
// CHECKS
for (unsigned int uiI = 0; uiI < NB_EXEC_LINES; ++uiI)
{
ASSERT_EQ(nextPts_gold.size(), nextPts[uiI].size());
ASSERT_EQ(status_gold.size(), status[uiI].size());
}
size_t mistmatch = 0;
for (unsigned int uiI = 0; uiI < NB_EXEC_LINES; ++uiI)
{
for (size_t i = 0; i < nextPts[uiI].size(); ++i)
{
cv::Point2i a = nextPts[uiI][i];
cv::Point2i b = nextPts_gold[i];
if (status[uiI][i] != status_gold[i])
{
++mistmatch;
continue;
}
if (status[uiI][i])
{
bool eq = std::abs(a.x - b.x) <= 1 && std::abs(a.y - b.y) <= 1;
if (!eq)
++mistmatch;
}
}
}
double bad_ratio = static_cast<double>(mistmatch) / (nextPts[0].size() * NB_EXEC_LINES);
ASSERT_LE(bad_ratio, 0.01);
}
#endif // HAVE_TBB
INSTANTIATE_TEST_CASE_P(GPU_Video, PyrLKOpticalFlow, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Video, PyrLKOpticalFlow, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
testing::Values(UseGray(true), UseGray(false)))); testing::Values(UseGray(true), UseGray(false))));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment