Commit 97156897 authored by niko's avatar niko

format files to ANSI C style with coolformat

change the download channels to oclchannles()
fix bugs of arithm functions
perf fix of bilateral
bug fix of split test case
add build_warps functions
parent 69fbc610
This diff is collapsed.
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__ #ifndef __OPENCV_TEST_INTERPOLATION_HPP__
#define __OPENCV_TEST_INTERPOLATION_HPP__ #define __OPENCV_TEST_INTERPOLATION_HPP__
template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{ {
if (border_type == cv::BORDER_CONSTANT) if (border_type == cv::BORDER_CONSTANT)
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]); return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
...@@ -52,7 +52,7 @@ template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int bor ...@@ -52,7 +52,7 @@ template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int bor
template <typename T> struct NearestInterpolator template <typename T> struct NearestInterpolator
{ {
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{ {
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal); return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
} }
...@@ -60,7 +60,7 @@ template <typename T> struct NearestInterpolator ...@@ -60,7 +60,7 @@ template <typename T> struct NearestInterpolator
template <typename T> struct LinearInterpolator template <typename T> struct LinearInterpolator
{ {
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{ {
x -= 0.5f; x -= 0.5f;
y -= 0.5f; y -= 0.5f;
...@@ -85,7 +85,7 @@ template <typename T> struct CubicInterpolator ...@@ -85,7 +85,7 @@ template <typename T> struct CubicInterpolator
{ {
static float getValue(float p[4], float x) static float getValue(float p[4], float x)
{ {
return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0]))); return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
} }
static float getValue(float p[4][4], float x, float y) static float getValue(float p[4][4], float x, float y)
...@@ -100,7 +100,7 @@ template <typename T> struct CubicInterpolator ...@@ -100,7 +100,7 @@ template <typename T> struct CubicInterpolator
return getValue(arr, y); return getValue(arr, y);
} }
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{ {
int ix = cvRound(x); int ix = cvRound(x);
int iy = cvRound(y); int iy = cvRound(y);
......
...@@ -50,46 +50,46 @@ using namespace cvtest; ...@@ -50,46 +50,46 @@ using namespace cvtest;
using namespace testing; using namespace testing;
void print_info() void print_info()
{ {
printf("\n"); printf("\n");
#if defined _WIN32 #if defined _WIN32
# if defined _WIN64 # if defined _WIN64
puts("OS: Windows 64"); puts("OS: Windows 64");
# else # else
puts("OS: Windows 32"); puts("OS: Windows 32");
# endif # endif
#elif defined linux #elif defined linux
# if defined _LP64 # if defined _LP64
puts("OS: Linux 64"); puts("OS: Linux 64");
# else # else
puts("OS: Linux 32"); puts("OS: Linux 32");
# endif # endif
#elif defined __APPLE__ #elif defined __APPLE__
# if defined _LP64 # if defined _LP64
puts("OS: Apple 64"); puts("OS: Apple 64");
# else # else
puts("OS: Apple 32"); puts("OS: Apple 32");
# endif # endif
#endif #endif
} }
int main(int argc, char** argv) int main(int argc, char **argv)
{ {
std::vector<cv::ocl::Info> oclinfo; std::vector<cv::ocl::Info> oclinfo;
TS::ptr()->init("ocl"); TS::ptr()->init("ocl");
InitGoogleTest(&argc, argv); InitGoogleTest(&argc, argv);
print_info(); print_info();
int devnums = getDevice(oclinfo); int devnums = getDevice(oclinfo);
if(devnums<1) if(devnums < 1)
{ {
std::cout << "no device found\n"; std::cout << "no device found\n";
return -1; return -1;
} }
//if you want to use undefault device, set it here //if you want to use undefault device, set it here
//setDevice(oclinfo[0]); //setDevice(oclinfo[0]);
setBinpath(CLBINPATH); setBinpath(CLBINPATH);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
......
This diff is collapsed.
...@@ -55,66 +55,66 @@ using namespace std; ...@@ -55,66 +55,66 @@ using namespace std;
PARAM_TEST_CASE(Blend, MatType, int) PARAM_TEST_CASE(Blend, MatType, int)
{ {
int type; int type;
int channels; int channels;
std::vector<cv::ocl::Info> oclinfo; std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); type = GET_PARAM(0);
channels = GET_PARAM(1); channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo); //int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0); //CV_Assert(devnums > 0);
//cv::ocl::setBinpath(CLBINPATH); //cv::ocl::setBinpath(CLBINPATH);
} }
}; };
TEST_P(Blend, Performance) TEST_P(Blend, Performance)
{ {
cv::Size size(MWIDTH, MHEIGHT); cv::Size size(MWIDTH, MHEIGHT);
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F); cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels)); cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
double totalgputick_all = 0; double totalgputick_all = 0;
double totalgputick_kernel = 0; double totalgputick_kernel = 0;
double t1 = 0; double t1 = 0;
double t2 = 0; double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100 for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
{ {
t1 = (double)cvGetTickCount(); t1 = (double)cvGetTickCount();
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host); cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host); cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1); cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1); cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
t2 = (double)cvGetTickCount(); t2 = (double)cvGetTickCount();
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst); cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
t2 = (double)cvGetTickCount() - t2; t2 = (double)cvGetTickCount() - t2;
cv::Mat m; cv::Mat m;
gdst.download(m); gdst.download(m);
t1 = (double)cvGetTickCount() - t1; t1 = (double)cvGetTickCount() - t1;
if (j == 0) if (j == 0)
{ {
continue; continue;
} }
totalgputick_all = t1 + totalgputick_all; totalgputick_all = t1 + totalgputick_all;
totalgputick_kernel = t2 + totalgputick_kernel; totalgputick_kernel = t2 + totalgputick_kernel;
}; };
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
} }
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
......
...@@ -85,70 +85,70 @@ IMPLEMENT_PARAM_CLASS(L2gradient, bool); ...@@ -85,70 +85,70 @@ IMPLEMENT_PARAM_CLASS(L2gradient, bool);
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient) PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
{ {
int apperture_size; int apperture_size;
bool useL2gradient; bool useL2gradient;
//std::vector<cv::ocl::Info> oclinfo; //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp() virtual void SetUp()
{ {
apperture_size = GET_PARAM(0); apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1); useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo); //int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0); //CV_Assert(devnums > 0);
} }
}; };
TEST_P(Canny1, Performance) TEST_P(Canny1, Performance)
{ {
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE); cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty()); ASSERT_FALSE(img.empty());
double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
} double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
} }
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)), testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true)))); testing::Values(L2gradient(false), L2gradient(true))));
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
// //
// @Authors // @Authors
// Fangfang Bai fangfang@multicorewareinc.com // Fangfang Bai fangfang@multicorewareinc.com
// //
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
...@@ -63,53 +63,53 @@ using namespace std; ...@@ -63,53 +63,53 @@ using namespace std;
PARAM_TEST_CASE(ColumnSum) PARAM_TEST_CASE(ColumnSum)
{ {
cv::Mat src; cv::Mat src;
//std::vector<cv::ocl::Info> oclinfo; //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp() virtual void SetUp()
{ {
//int devnums = getDevice(oclinfo); //int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0); //CV_Assert(devnums > 0);
} }
}; };
TEST_F(ColumnSum, Performance) TEST_F(ColumnSum, Performance)
{ {
cv::Size size(MWIDTH,MHEIGHT); cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_32FC1); cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst; cv::ocl::oclMat d_dst;
double totalgputick=0; double totalgputick = 0;
double totalgputick_kernel=0; double totalgputick_kernel = 0;
double t1=0; double t1 = 0;
double t2=0; double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++) for(int j = 0; j < LOOP_TIMES + 1; j ++)
{ {
t1 = (double)cvGetTickCount();//gpu start1 t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_src(src); cv::ocl::oclMat d_src(src);
t2=(double)cvGetTickCount();//kernel t2 = (double)cvGetTickCount(); //kernel
cv::ocl::columnSum(d_src,d_dst); cv::ocl::columnSum(d_src, d_dst);
t2 = (double)cvGetTickCount() - t2;//kernel t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst; cv::Mat cpu_dst;
d_dst.download (cpu_dst);//download d_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1 t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0) if(j == 0)
continue; continue;
totalgputick=t1+totalgputick; totalgputick = t1 + totalgputick;
totalgputick_kernel=t2+totalgputick_kernel; totalgputick_kernel = t2 + totalgputick_kernel;
} }
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
...@@ -117,4 +117,4 @@ TEST_F(ColumnSum, Performance) ...@@ -117,4 +117,4 @@ TEST_F(ColumnSum, Performance)
#endif #endif
\ No newline at end of file \ No newline at end of file
...@@ -48,75 +48,75 @@ using namespace std; ...@@ -48,75 +48,75 @@ using namespace std;
#ifdef HAVE_CLAMDFFT #ifdef HAVE_CLAMDFFT
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Dft // Dft
PARAM_TEST_CASE(Dft, cv::Size, bool) PARAM_TEST_CASE(Dft, cv::Size, bool)
{ {
cv::Size dft_size; cv::Size dft_size;
bool dft_rows; bool dft_rows;
vector<cv::ocl::Info> info; vector<cv::ocl::Info> info;
virtual void SetUp() virtual void SetUp()
{ {
dft_size = GET_PARAM(0); dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1); dft_rows = GET_PARAM(1);
cv::ocl::getDevice(info); cv::ocl::getDevice(info);
} }
}; };
TEST_P(Dft, C2C) TEST_P(Dft, C2C)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
int flags = 0; int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0; flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b; cv::ocl::oclMat d_b;
double totalgputick=0; double totalgputick = 0;
double totalgputick_kernel=0; double totalgputick_kernel = 0;
double t1=0; double t1 = 0;
double t2=0; double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++) for(int j = 0; j < LOOP_TIMES + 1; j ++)
{ {
t1 = (double)cvGetTickCount();//gpu start1 t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload
t2=(double)cvGetTickCount();//kernel t2 = (double)cvGetTickCount(); //kernel
cv::ocl::dft(ga, d_b, a.size(), flags); cv::ocl::dft(ga, d_b, a.size(), flags);
t2 = (double)cvGetTickCount() - t2;//kernel t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst; cv::Mat cpu_dst;
d_b.download (cpu_dst);//download d_b.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1 t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0) if(j == 0)
continue; continue;
totalgputick=t1+totalgputick; totalgputick = t1 + totalgputick;
totalgputick_kernel=t2+totalgputick_kernel; totalgputick_kernel = t2 + totalgputick_kernel;
} }
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
} }
TEST_P(Dft, R2CthenC2R) TEST_P(Dft, R2CthenC2R)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0; int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c; cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
} }
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( //INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
......
This diff is collapsed.
...@@ -48,66 +48,66 @@ using namespace std; ...@@ -48,66 +48,66 @@ using namespace std;
#ifdef HAVE_CLAMDBLAS #ifdef HAVE_CLAMDBLAS
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// GEMM // GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int) PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{ {
int type; int type;
cv::Size mat_size; cv::Size mat_size;
int flags; int flags;
vector<cv::ocl::Info> info; vector<cv::ocl::Info> info;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); type = GET_PARAM(0);
mat_size = GET_PARAM(1); mat_size = GET_PARAM(1);
flags = GET_PARAM(2); flags = GET_PARAM(2);
cv::ocl::getDevice(info); cv::ocl::getDevice(info);
} }
}; };
TEST_P(Gemm, Performance) TEST_P(Gemm, Performance)
{ {
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst; cv::ocl::oclMat ocl_dst;
double totalgputick=0; double totalgputick = 0;
double totalgputick_kernel=0; double totalgputick_kernel = 0;
double t1=0; double t1 = 0;
double t2=0; double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++) for(int j = 0; j < LOOP_TIMES + 1; j ++)
{ {
t1 = (double)cvGetTickCount();//gpu start1 t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
t2=(double)cvGetTickCount();//kernel t2 = (double)cvGetTickCount(); //kernel
cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags); cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst; cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download ocl_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end t1 = (double)cvGetTickCount() - t1;//gpu end
if(j == 0) if(j == 0)
continue; continue;
totalgputick=t1+totalgputick; totalgputick = t1 + totalgputick;
totalgputick_kernel=t2+totalgputick_kernel; totalgputick_kernel = t2 + totalgputick_kernel;
} }
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
} }
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/), testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)), testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T))); testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif #endif
\ No newline at end of file
...@@ -53,118 +53,125 @@ using namespace testing; ...@@ -53,118 +53,125 @@ using namespace testing;
using namespace std; using namespace std;
using namespace cv; using namespace cv;
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
return e.rect;
}
};
PARAM_TEST_CASE(HaarTestBase, int, int) PARAM_TEST_CASE(HaarTestBase, int, int)
{ {
//std::vector<cv::ocl::Info> oclinfo; //std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade; cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade; cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img; // Mat img;
double scale; double scale;
int index; int index;
virtual void SetUp() virtual void SetUp()
{ {
scale = 1.0; scale = 1.0;
index=0; index = 0;
string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml"; string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
{ {
cout << "ERROR: Could not load classifier cascade" << endl; cout << "ERROR: Could not load classifier cascade" << endl;
cout << "Usage: facedetect [--cascade=<cascade_path>]\n" cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
" [--scale[=<image scale>\n" " [--scale[=<image scale>\n"
" [filename|camera_index]\n" << endl ; " [filename|camera_index]\n" << endl ;
return; return;
} }
//int devnums = getDevice(oclinfo); //int devnums = getDevice(oclinfo);
//CV_Assert(devnums>0); //CV_Assert(devnums>0);
////if you want to use undefault device, set it here ////if you want to use undefault device, set it here
////setDevice(oclinfo[0]); ////setDevice(oclinfo[0]);
//cv::ocl::setBinpath("E:\\"); //cv::ocl::setBinpath("E:\\");
} }
}; };
////////////////////////////////faceDetect///////////////////////////////////////////////// ////////////////////////////////faceDetect/////////////////////////////////////////////////
struct Haar : HaarTestBase {}; struct Haar : HaarTestBase {};
TEST_F(Haar, FaceDetect) TEST_F(Haar, FaceDetect)
{ {
string imgName = "../../../samples/c/lena.jpg"; string imgName = "../../../samples/c/lena.jpg";
Mat img = imread( imgName, 1 ); Mat img = imread( imgName, 1 );
if(img.empty()) if(img.empty())
{ {
std::cout << "Couldn't read test" << index <<".jpg" << std::endl; std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
return ; return ;
} }
int i = 0; int i = 0;
double t = 0; double t = 0;
vector<Rect> faces, oclfaces; vector<Rect> faces, oclfaces;
const static Scalar colors[] = { CV_RGB(0,0,255), const static Scalar colors[] = { CV_RGB(0, 0, 255),
CV_RGB(0,128,255), CV_RGB(0, 128, 255),
CV_RGB(0,255,255), CV_RGB(0, 255, 255),
CV_RGB(0,255,0), CV_RGB(0, 255, 0),
CV_RGB(255,128,0), CV_RGB(255, 128, 0),
CV_RGB(255,255,0), CV_RGB(255, 255, 0),
CV_RGB(255,0,0), CV_RGB(255, 0, 0),
CV_RGB(255,0,255)} ; CV_RGB(255, 0, 255)
} ;
Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0)); Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
cvtColor( img, gray, CV_BGR2GRAY ); MemStorage storage(cvCreateMemStorage(0));
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); cvtColor( img, gray, CV_BGR2GRAY );
equalizeHist( smallImg, smallImg ); resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
t = (double)cvGetTickCount();
for(int k= 0; k<LOOP_TIMES; k++) t = (double)cvGetTickCount();
{ for(int k = 0; k < LOOP_TIMES; k++)
cpucascade.detectMultiScale( smallImg, faces, 1.1, {
3, 0 cpucascade.detectMultiScale( smallImg, faces, 1.1,
|CV_HAAR_SCALE_IMAGE 3, 0
, Size(30,30), Size(0, 0) ); | CV_HAAR_SCALE_IMAGE
} , Size(30, 30), Size(0, 0) );
t = (double)cvGetTickCount() - t ; }
printf( "cpudetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) ); t = (double)cvGetTickCount() - t ;
printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
cv::ocl::oclMat image;
CvSeq* _objects; cv::ocl::oclMat image;
t = (double)cvGetTickCount(); CvSeq *_objects;
for(int k= 0; k<LOOP_TIMES; k++) t = (double)cvGetTickCount();
{ for(int k = 0; k < LOOP_TIMES; k++)
image.upload(smallImg); {
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1, image.upload(smallImg);
3, 0 _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
|CV_HAAR_SCALE_IMAGE 3, 0
, Size(30,30), Size(0, 0) ); | CV_HAAR_SCALE_IMAGE
} , Size(30, 30), Size(0, 0) );
t = (double)cvGetTickCount() - t ; }
printf( "ocldetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) ); t = (double)cvGetTickCount() - t ;
vector<CvAvgComp> vecAvgComp; printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp); vector<CvAvgComp> vecAvgComp;
oclfaces.resize(vecAvgComp.size()); Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
//{ //for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
// Mat smallImgROI; //{
// Point center; // Mat smallImgROI;
// Scalar color = colors[i%8]; // Point center;
// int radius; // Scalar color = colors[i%8];
// center.x = cvRound((r->x + r->width*0.5)*scale); // int radius;
// center.y = cvRound((r->y + r->height*0.5)*scale); // center.x = cvRound((r->x + r->width*0.5)*scale);
// radius = cvRound((r->width + r->height)*0.25*scale); // center.y = cvRound((r->y + r->height*0.5)*scale);
// circle( img, center, radius, color, 3, 8, 0 ); // radius = cvRound((r->width + r->height)*0.25*scale);
//} // circle( img, center, radius, color, 3, 8, 0 );
//namedWindow("result"); //}
//imshow("result",img); //namedWindow("result");
//waitKey(0); //imshow("result",img);
//destroyAllWindows(); //waitKey(0);
//destroyAllWindows();
} }
#endif // HAVE_OPENCL #endif // HAVE_OPENCL
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -42,4 +42,3 @@ ...@@ -42,4 +42,3 @@
#include "precomp.hpp" #include "precomp.hpp"
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -63,10 +63,10 @@ namespace cv ...@@ -63,10 +63,10 @@ namespace cv
DISABLE DISABLE
}; };
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
}//namespace ocl }//namespace ocl
}//namespace cv }//namespace cv
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment