Commit 54e746be authored by Vladislav Samsonov's avatar Vladislav Samsonov

Added OpenCL support

parent d8aa1625
...@@ -88,10 +88,10 @@ public: ...@@ -88,10 +88,10 @@ public:
void collectGarbage(); void collectGarbage();
private: private:
void findSparseFeatures( Mat &from, Mat &to, std::vector<Point2f> &features, void findSparseFeatures( UMat &from, UMat &to, std::vector<Point2f> &features,
std::vector<Point2f> &predictedFeatures ) const; std::vector<Point2f> &predictedFeatures ) const;
void removeOcclusions( Mat &from, Mat &to, std::vector<Point2f> &features, void removeOcclusions( UMat &from, UMat &to, std::vector<Point2f> &features,
std::vector<Point2f> &predictedFeatures ) const; std::vector<Point2f> &predictedFeatures ) const;
void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector<Point2f> &features, void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector<Point2f> &features,
......
#include "opencv2/highgui.hpp" #include "opencv2/highgui.hpp"
#include "opencv2/video.hpp" #include "opencv2/video.hpp"
#include "opencv2/optflow.hpp" #include "opencv2/optflow.hpp"
#include "opencv2/core/ocl.hpp"
#include <fstream> #include <fstream>
#include <limits> #include <limits>
...@@ -15,7 +16,8 @@ const String keys = "{help h usage ? | | print this message }" ...@@ -15,7 +16,8 @@ const String keys = "{help h usage ? | | print this message }"
"{@groundtruth | | path to the .flo file (optional), Middlebury format }" "{@groundtruth | | path to the .flo file (optional), Middlebury format }"
"{m measure |endpoint| error measure - [endpoint or angular] }" "{m measure |endpoint| error measure - [endpoint or angular] }"
"{r region |all | region to compute stats about [all, discontinuities, untextured] }" "{r region |all | region to compute stats about [all, discontinuities, untextured] }"
"{d display | | display additional info images (pauses program execution) }"; "{d display | | display additional info images (pauses program execution) }"
"{g gpu | | use OpenCL}";
inline bool isFlowCorrect( const Point2f u ) inline bool isFlowCorrect( const Point2f u )
{ {
...@@ -200,6 +202,7 @@ int main( int argc, char** argv ) ...@@ -200,6 +202,7 @@ int main( int argc, char** argv )
String error_measure = parser.get<String>("measure"); String error_measure = parser.get<String>("measure");
String region = parser.get<String>("region"); String region = parser.get<String>("region");
bool display_images = parser.has("display"); bool display_images = parser.has("display");
const bool useGpu = parser.has("gpu");
if ( !parser.check() ) if ( !parser.check() )
{ {
...@@ -207,6 +210,9 @@ int main( int argc, char** argv ) ...@@ -207,6 +210,9 @@ int main( int argc, char** argv )
return 0; return 0;
} }
cv::ocl::setUseOpenCL(useGpu);
printf("OpenCL Enabled: %u\n", useGpu && cv::ocl::haveOpenCL());
Mat i1, i2; Mat i1, i2;
Mat_<Point2f> flow, ground_truth; Mat_<Point2f> flow, ground_truth;
Mat computed_errors; Mat computed_errors;
......
...@@ -140,13 +140,15 @@ static void solveLSQR( const Mat &A, const Mat &b, OutputArray xOut, const doubl ...@@ -140,13 +140,15 @@ static void solveLSQR( const Mat &A, const Mat &b, OutputArray xOut, const doubl
for ( unsigned itn = 0; itn < iter_lim; ++itn ) for ( unsigned itn = 0; itn < iter_lim; ++itn )
{ {
u = A * v - alfa * u; u *= -alfa;
u += A * v;
beta = cv::norm( u, NORM_L2 ); beta = cv::norm( u, NORM_L2 );
if ( beta > 0 ) if ( beta > 0 )
{ {
u *= 1 / beta; u *= 1 / beta;
v = AT * u - beta * v; v *= -beta;
v += AT * u;
alfa = cv::norm( v, NORM_L2 ); alfa = cv::norm( v, NORM_L2 );
if ( alfa > 0 ) if ( alfa > 0 )
v = ( 1 / alfa ) * v; v = ( 1 / alfa ) * v;
...@@ -173,7 +175,7 @@ static void solveLSQR( const Mat &A, const Mat &b, OutputArray xOut, const doubl ...@@ -173,7 +175,7 @@ static void solveLSQR( const Mat &A, const Mat &b, OutputArray xOut, const doubl
} }
} }
void OpticalFlowPCAFlow::findSparseFeatures( Mat &from, Mat &to, std::vector<Point2f> &features, void OpticalFlowPCAFlow::findSparseFeatures( UMat &from, UMat &to, std::vector<Point2f> &features,
std::vector<Point2f> &predictedFeatures ) const std::vector<Point2f> &predictedFeatures ) const
{ {
Size size = from.size(); Size size = from.size();
...@@ -207,7 +209,7 @@ void OpticalFlowPCAFlow::findSparseFeatures( Mat &from, Mat &to, std::vector<Poi ...@@ -207,7 +209,7 @@ void OpticalFlowPCAFlow::findSparseFeatures( Mat &from, Mat &to, std::vector<Poi
predictedFeatures.resize( j ); predictedFeatures.resize( j );
} }
void OpticalFlowPCAFlow::removeOcclusions( Mat &from, Mat &to, std::vector<Point2f> &features, void OpticalFlowPCAFlow::removeOcclusions( UMat &from, UMat &to, std::vector<Point2f> &features,
std::vector<Point2f> &predictedFeatures ) const std::vector<Point2f> &predictedFeatures ) const
{ {
std::vector<uchar> predictedStatus; std::vector<uchar> predictedStatus;
...@@ -234,6 +236,27 @@ void OpticalFlowPCAFlow::removeOcclusions( Mat &from, Mat &to, std::vector<Point ...@@ -234,6 +236,27 @@ void OpticalFlowPCAFlow::removeOcclusions( Mat &from, Mat &to, std::vector<Point
predictedFeatures.resize( j ); predictedFeatures.resize( j );
} }
static inline void _cpu_fillDCTSampledPoints( float *row, const Point2f &p, const Size &basisSize, const Size &size )
{
for ( int n1 = 0; n1 < basisSize.width; ++n1 )
for ( int n2 = 0; n2 < basisSize.height; ++n2 )
row[n1 * basisSize.height + n2] =
cosf( ( n1 * M_PI / size.width ) * ( p.x + 0.5 ) ) * cosf( ( n2 * M_PI / size.height ) * ( p.y + 0.5 ) );
}
static ocl::ProgramSource _ocl_fillDCTSampledPointsSource(
"__kernel void fillDCTSampledPoints(__global const uchar* features, int fstep, int foff, __global "
"uchar* A, int Astep, int Aoff, int fs, int bsw, int bsh, int sw, int sh) {"
"const int i = get_global_id(0);"
"const int n1 = get_global_id(1);"
"const int n2 = get_global_id(2);"
"if (i >= fs || n1 >= bsw || n2 >= bsh) return;"
"__global const float2* f = features + (fstep * i + foff);"
"__global float* a = A + (Astep * i + Aoff + (n1 * bsh + n2) * 4);"
"const float2 p = f[0];"
"a[0] = cos((n1 * M_PI / sw) * (p.x + 0.5)) * cos((n2 * M_PI / sh) * (p.y + 0.5));"
"}" );
void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out,
const std::vector<Point2f> &features, const std::vector<Point2f> &predictedFeatures, const std::vector<Point2f> &features, const std::vector<Point2f> &predictedFeatures,
const Size size ) const Size size )
...@@ -241,20 +264,40 @@ void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputA ...@@ -241,20 +264,40 @@ void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputA
AOut.create( features.size(), basisSize.area(), CV_32F ); AOut.create( features.size(), basisSize.area(), CV_32F );
b1Out.create( features.size(), 1, CV_32F ); b1Out.create( features.size(), 1, CV_32F );
b2Out.create( features.size(), 1, CV_32F ); b2Out.create( features.size(), 1, CV_32F );
Mat A = AOut.getMat(); if ( ocl::useOpenCL() )
Mat b1 = b1Out.getMat();
Mat b2 = b2Out.getMat();
for ( size_t i = 0; i < features.size(); ++i )
{ {
const Point2f &p = features[i]; UMat A = AOut.getUMat();
float *row = A.ptr<float>( i ); Mat b1 = b1Out.getMat();
for ( int n1 = 0; n1 < basisSize.width; ++n1 ) Mat b2 = b2Out.getMat();
for ( int n2 = 0; n2 < basisSize.height; ++n2 )
row[n1 * basisSize.height + n2] = ocl::Kernel kernel( "fillDCTSampledPoints", _ocl_fillDCTSampledPointsSource );
cosf( ( n1 * M_PI / size.width ) * ( p.x + 0.5 ) ) * cosf( ( n2 * M_PI / size.height ) * ( p.y + 0.5 ) ); size_t globSize[] = {features.size(), basisSize.width, basisSize.height};
const Point2f flow = predictedFeatures[i] - features[i]; kernel
b1.at<float>( i ) = flow.x; .args( cv::ocl::KernelArg::ReadOnlyNoSize( Mat( features ).getUMat( ACCESS_READ ) ),
b2.at<float>( i ) = flow.y; cv::ocl::KernelArg::WriteOnlyNoSize( A ), (int)features.size(), (int)basisSize.width,
(int)basisSize.height, (int)size.width, (int)size.height )
.run( 3, globSize, 0, true );
for ( size_t i = 0; i < features.size(); ++i )
{
const Point2f flow = predictedFeatures[i] - features[i];
b1.at<float>( i ) = flow.x;
b2.at<float>( i ) = flow.y;
}
}
else
{
Mat A = AOut.getMat();
Mat b1 = b1Out.getMat();
Mat b2 = b2Out.getMat();
for ( size_t i = 0; i < features.size(); ++i )
{
_cpu_fillDCTSampledPoints( A.ptr<float>( i ), features[i], basisSize, size );
const Point2f flow = predictedFeatures[i] - features[i];
b1.at<float>( i ) = flow.x;
b2.at<float>( i ) = flow.y;
}
} }
} }
...@@ -268,29 +311,54 @@ void OpticalFlowPCAFlow::getSystem( OutputArray A1Out, OutputArray A2Out, Output ...@@ -268,29 +311,54 @@ void OpticalFlowPCAFlow::getSystem( OutputArray A1Out, OutputArray A2Out, Output
A2Out.create( features.size() + prior->getPadding(), basisSize.area(), CV_32F ); A2Out.create( features.size() + prior->getPadding(), basisSize.area(), CV_32F );
b1Out.create( features.size() + prior->getPadding(), 1, CV_32F ); b1Out.create( features.size() + prior->getPadding(), 1, CV_32F );
b2Out.create( features.size() + prior->getPadding(), 1, CV_32F ); b2Out.create( features.size() + prior->getPadding(), 1, CV_32F );
if ( ocl::useOpenCL() )
{
UMat A = A1Out.getUMat();
Mat b1 = b1Out.getMat();
Mat b2 = b2Out.getMat();
ocl::Kernel kernel( "fillDCTSampledPoints", _ocl_fillDCTSampledPointsSource );
size_t globSize[] = {features.size(), basisSize.width, basisSize.height};
kernel
.args( cv::ocl::KernelArg::ReadOnlyNoSize( Mat( features ).getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::WriteOnlyNoSize( A ), (int)features.size(), (int)basisSize.width,
(int)basisSize.height, (int)size.width, (int)size.height )
.run( 3, globSize, 0, true );
for ( size_t i = 0; i < features.size(); ++i )
{
const Point2f flow = predictedFeatures[i] - features[i];
b1.at<float>( i ) = flow.x;
b2.at<float>( i ) = flow.y;
}
}
else
{
Mat A1 = A1Out.getMat();
Mat b1 = b1Out.getMat();
Mat b2 = b2Out.getMat();
for ( size_t i = 0; i < features.size(); ++i )
{
_cpu_fillDCTSampledPoints( A1.ptr<float>( i ), features[i], basisSize, size );
const Point2f flow = predictedFeatures[i] - features[i];
b1.at<float>( i ) = flow.x;
b2.at<float>( i ) = flow.y;
}
}
Mat A1 = A1Out.getMat(); Mat A1 = A1Out.getMat();
Mat A2 = A2Out.getMat(); Mat A2 = A2Out.getMat();
Mat b1 = b1Out.getMat(); Mat b1 = b1Out.getMat();
Mat b2 = b2Out.getMat(); Mat b2 = b2Out.getMat();
for ( size_t i = 0; i < features.size(); ++i )
{
const Point2f &p = features[i];
float *row = A1.ptr<float>( i );
for ( int n1 = 0; n1 < basisSize.width; ++n1 )
for ( int n2 = 0; n2 < basisSize.height; ++n2 )
row[n1 * basisSize.height + n2] =
cosf( ( n1 * M_PI / size.width ) * ( p.x + 0.5 ) ) * cosf( ( n2 * M_PI / size.height ) * ( p.y + 0.5 ) );
const Point2f flow = predictedFeatures[i] - features[i];
b1.at<float>( i ) = flow.x;
b2.at<float>( i ) = flow.y;
}
memcpy( A2.ptr<float>(), A1.ptr<float>(), features.size() * basisSize.area() * sizeof( float ) ); memcpy( A2.ptr<float>(), A1.ptr<float>(), features.size() * basisSize.area() * sizeof( float ) );
prior->fillConstraints( A1.ptr<float>( features.size(), 0 ), A2.ptr<float>( features.size(), 0 ), prior->fillConstraints( A1.ptr<float>( features.size(), 0 ), A2.ptr<float>( features.size(), 0 ),
b1.ptr<float>( features.size(), 0 ), b2.ptr<float>( features.size(), 0 ) ); b1.ptr<float>( features.size(), 0 ), b2.ptr<float>( features.size(), 0 ) );
} }
static void applyCLAHE( Mat &img ) static void applyCLAHE( UMat &img )
{ {
Ptr<CLAHE> clahe = createCLAHE(); Ptr<CLAHE> clahe = createCLAHE();
clahe->setClipLimit( 14 ); clahe->setClipLimit( 14 );
...@@ -334,7 +402,7 @@ void OpticalFlowPCAFlow::calc( InputArray I0, InputArray I1, InputOutputArray fl ...@@ -334,7 +402,7 @@ void OpticalFlowPCAFlow::calc( InputArray I0, InputArray I1, InputOutputArray fl
const Size size = I0.size(); const Size size = I0.size();
CV_Assert( size == I1.size() ); CV_Assert( size == I1.size() );
Mat from, to; UMat from, to;
if ( I0.channels() == 3 ) if ( I0.channels() == 3 )
{ {
cvtColor( I0, from, COLOR_BGR2GRAY ); cvtColor( I0, from, COLOR_BGR2GRAY );
...@@ -357,7 +425,7 @@ void OpticalFlowPCAFlow::calc( InputArray I0, InputArray I1, InputOutputArray fl ...@@ -357,7 +425,7 @@ void OpticalFlowPCAFlow::calc( InputArray I0, InputArray I1, InputOutputArray fl
CV_Assert( from.channels() == 1 ); CV_Assert( from.channels() == 1 );
CV_Assert( to.channels() == 1 ); CV_Assert( to.channels() == 1 );
const Mat fromOrig = from.clone(); const Mat fromOrig = from.getMat( ACCESS_READ ).clone();
applyCLAHE( from ); applyCLAHE( from );
applyCLAHE( to ); applyCLAHE( to );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment