Commit 97156897 authored by niko's avatar niko

format files to ANSI C style with coolformat

change the download channels to oclchannles()
fix bugs of arithm functions
perf fix of bilateral
bug fix of split test case
add build_warps functions
parent 69fbc610
This diff is collapsed.
......@@ -42,7 +42,7 @@
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
#define __OPENCV_TEST_INTERPOLATION_HPP__
template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
if (border_type == cv::BORDER_CONSTANT)
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
......@@ -52,7 +52,7 @@ template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int bor
template <typename T> struct NearestInterpolator
{
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
}
......@@ -60,7 +60,7 @@ template <typename T> struct NearestInterpolator
template <typename T> struct LinearInterpolator
{
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
x -= 0.5f;
y -= 0.5f;
......@@ -85,7 +85,7 @@ template <typename T> struct CubicInterpolator
{
static float getValue(float p[4], float x)
{
return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])));
return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
}
static float getValue(float p[4][4], float x, float y)
......@@ -100,7 +100,7 @@ template <typename T> struct CubicInterpolator
return getValue(arr, y);
}
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
int ix = cvRound(x);
int iy = cvRound(y);
......
......@@ -50,46 +50,46 @@ using namespace cvtest;
using namespace testing;
void print_info()
{
{
printf("\n");
#if defined _WIN32
# if defined _WIN64
puts("OS: Windows 64");
puts("OS: Windows 64");
# else
puts("OS: Windows 32");
puts("OS: Windows 32");
# endif
#elif defined linux
# if defined _LP64
puts("OS: Linux 64");
puts("OS: Linux 64");
# else
puts("OS: Linux 32");
puts("OS: Linux 32");
# endif
#elif defined __APPLE__
# if defined _LP64
puts("OS: Apple 64");
puts("OS: Apple 64");
# else
puts("OS: Apple 32");
puts("OS: Apple 32");
# endif
#endif
}
int main(int argc, char** argv)
int main(int argc, char **argv)
{
std::vector<cv::ocl::Info> oclinfo;
std::vector<cv::ocl::Info> oclinfo;
TS::ptr()->init("ocl");
InitGoogleTest(&argc, argv);
print_info();
int devnums = getDevice(oclinfo);
if(devnums<1)
{
std::cout << "no device found\n";
return -1;
}
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
setBinpath(CLBINPATH);
int devnums = getDevice(oclinfo);
if(devnums < 1)
{
std::cout << "no device found\n";
return -1;
}
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
setBinpath(CLBINPATH);
return RUN_ALL_TESTS();
}
......
This diff is collapsed.
......@@ -55,66 +55,66 @@ using namespace std;
PARAM_TEST_CASE(Blend, MatType, int)
{
int type;
int channels;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
//cv::ocl::setBinpath(CLBINPATH);
}
int type;
int channels;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
//cv::ocl::setBinpath(CLBINPATH);
}
};
TEST_P(Blend, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
double totalgputick_all = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
{
t1 = (double)cvGetTickCount();
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
t2 = (double)cvGetTickCount();
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
t2 = (double)cvGetTickCount() - t2;
cv::Mat m;
gdst.download(m);
t1 = (double)cvGetTickCount() - t1;
if (j == 0)
{
continue;
}
totalgputick_all = t1 + totalgputick_all;
totalgputick_kernel = t2 + totalgputick_kernel;
};
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
double totalgputick_all = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
{
t1 = (double)cvGetTickCount();
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
t2 = (double)cvGetTickCount();
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
t2 = (double)cvGetTickCount() - t2;
cv::Mat m;
gdst.download(m);
t1 = (double)cvGetTickCount() - t1;
if (j == 0)
{
continue;
}
totalgputick_all = t1 + totalgputick_all;
totalgputick_kernel = t2 + totalgputick_kernel;
};
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
......
......@@ -85,70 +85,70 @@ IMPLEMENT_PARAM_CLASS(L2gradient, bool);
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
{
int apperture_size;
bool useL2gradient;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
int apperture_size;
bool useL2gradient;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_P(Canny1, Performance)
{
cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
}
double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
......
......@@ -16,7 +16,7 @@
//
// @Authors
// Fangfang Bai fangfang@multicorewareinc.com
//
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -63,53 +63,53 @@ using namespace std;
PARAM_TEST_CASE(ColumnSum)
{
cv::Mat src;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
cv::Mat src;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_F(ColumnSum, Performance)
{
cv::Size size(MWIDTH,MHEIGHT);
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_src(src);
cv::ocl::oclMat d_src(src);
t2=(double)cvGetTickCount();//kernel
cv::ocl::columnSum(d_src,d_dst);
t2 = (double)cvGetTickCount() - t2;//kernel
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::columnSum(d_src, d_dst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_dst.download (cpu_dst);//download
cv::Mat cpu_dst;
d_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
......@@ -117,4 +117,4 @@ TEST_F(ColumnSum, Performance)
#endif
\ No newline at end of file
#endif
\ No newline at end of file
......@@ -48,75 +48,75 @@ using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
PARAM_TEST_CASE(Dft, cv::Size, bool)
{
cv::Size dft_size;
bool dft_rows;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
cv::ocl::getDevice(info);
}
cv::Size dft_size;
bool dft_rows;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
cv::ocl::getDevice(info);
}
};
TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b;
cv::ocl::oclMat d_b;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload
cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::dft(ga, d_b, a.size(), flags);
t2 = (double)cvGetTickCount() - t2;//kernel
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::dft(ga, d_b, a.size(), flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_b.download (cpu_dst);//download
cv::Mat cpu_dst;
d_b.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
......
This diff is collapsed.
......@@ -48,66 +48,66 @@ using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Performance)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end
t1 = (double)cvGetTickCount() - t1;//gpu end
if(j == 0)
continue;
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif
\ No newline at end of file
......@@ -53,118 +53,125 @@ using namespace testing;
using namespace std;
using namespace cv;
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
return e.rect;
}
};
PARAM_TEST_CASE(HaarTestBase, int, int)
{
//std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img;
double scale;
int index;
virtual void SetUp()
{
scale = 1.0;
index=0;
string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml";
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
{
cout << "ERROR: Could not load classifier cascade" << endl;
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
" [--scale[=<image scale>\n"
" [filename|camera_index]\n" << endl ;
return;
}
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums>0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//cv::ocl::setBinpath("E:\\");
}
//std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img;
double scale;
int index;
virtual void SetUp()
{
scale = 1.0;
index = 0;
string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
{
cout << "ERROR: Could not load classifier cascade" << endl;
cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
" [--scale[=<image scale>\n"
" [filename|camera_index]\n" << endl ;
return;
}
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums>0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//cv::ocl::setBinpath("E:\\");
}
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
struct Haar : HaarTestBase {};
TEST_F(Haar, FaceDetect)
{
string imgName = "../../../samples/c/lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
return ;
}
int i = 0;
double t = 0;
vector<Rect> faces, oclfaces;
const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(0,128,255),
CV_RGB(0,255,255),
CV_RGB(0,255,0),
CV_RGB(255,128,0),
CV_RGB(255,255,0),
CV_RGB(255,0,0),
CV_RGB(255,0,255)} ;
Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
t = (double)cvGetTickCount();
for(int k= 0; k<LOOP_TIMES; k++)
{
cpucascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
|CV_HAAR_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
}
t = (double)cvGetTickCount() - t ;
printf( "cpudetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) );
cv::ocl::oclMat image;
CvSeq* _objects;
t = (double)cvGetTickCount();
for(int k= 0; k<LOOP_TIMES; k++)
{
image.upload(smallImg);
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
3, 0
|CV_HAAR_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
}
t = (double)cvGetTickCount() - t ;
printf( "ocldetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
//{
// Mat smallImgROI;
// Point center;
// Scalar color = colors[i%8];
// int radius;
// center.x = cvRound((r->x + r->width*0.5)*scale);
// center.y = cvRound((r->y + r->height*0.5)*scale);
// radius = cvRound((r->width + r->height)*0.25*scale);
// circle( img, center, radius, color, 3, 8, 0 );
//}
//namedWindow("result");
//imshow("result",img);
//waitKey(0);
//destroyAllWindows();
TEST_F(Haar, FaceDetect)
{
string imgName = "../../../samples/c/lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
return ;
}
int i = 0;
double t = 0;
vector<Rect> faces, oclfaces;
const static Scalar colors[] = { CV_RGB(0, 0, 255),
CV_RGB(0, 128, 255),
CV_RGB(0, 255, 255),
CV_RGB(0, 255, 0),
CV_RGB(255, 128, 0),
CV_RGB(255, 255, 0),
CV_RGB(255, 0, 0),
CV_RGB(255, 0, 255)
} ;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
{
cpucascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
| CV_HAAR_SCALE_IMAGE
, Size(30, 30), Size(0, 0) );
}
t = (double)cvGetTickCount() - t ;
printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
cv::ocl::oclMat image;
CvSeq *_objects;
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
{
image.upload(smallImg);
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
3, 0
| CV_HAAR_SCALE_IMAGE
, Size(30, 30), Size(0, 0) );
}
t = (double)cvGetTickCount() - t ;
printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
//{
// Mat smallImgROI;
// Point center;
// Scalar color = colors[i%8];
// int radius;
// center.x = cvRound((r->x + r->width*0.5)*scale);
// center.y = cvRound((r->y + r->height*0.5)*scale);
// radius = cvRound((r->width + r->height)*0.25*scale);
// circle( img, center, radius, color, 3, 8, 0 );
//}
//namedWindow("result");
//imshow("result",img);
//waitKey(0);
//destroyAllWindows();
}
#endif // HAVE_OPENCL
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -42,4 +42,3 @@
#include "precomp.hpp"
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -63,10 +63,10 @@ namespace cv
DISABLE
};
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
}//namespace ocl
}//namespace cv
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment