Commit 79ba160c authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

added more GPU perf. tests, refactored

parent 11579324
...@@ -9,11 +9,14 @@ void TestSystem::run() ...@@ -9,11 +9,14 @@ void TestSystem::run()
// Run initializers // Run initializers
vector<Runnable*>::iterator it = inits_.begin(); vector<Runnable*>::iterator it = inits_.begin();
for (; it != inits_.end(); ++it) for (; it != inits_.end(); ++it)
{
(*it)->run(); (*it)->run();
}
cout << setiosflags(ios_base::left); cout << setiosflags(ios_base::left);
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
<< setw(10) << "SPEEDUP" << "DESCRIPTION\n"; << setw(10) << "SPEEDUP"
<< "DESCRIPTION\n";
cout << resetiosflags(ios_base::left); cout << resetiosflags(ios_base::left);
// Run tests // Run tests
...@@ -24,30 +27,23 @@ void TestSystem::run() ...@@ -24,30 +27,23 @@ void TestSystem::run()
try try
{ {
(*it)->run(); (*it)->run();
flush_subtest_data(); flushSubtestData();
} }
catch (const cv::Exception& e) catch (const cv::Exception&)
{
cout << TAB << "error";
switch (e.code)
{ {
case CV_StsNoMem: cout << ": out of memory"; break; resetSubtestData();
}
if (!description_.str().empty())
cout << " [" << description_.str() << "]";
cout << endl;
reset_subtest_data();
} }
} }
cout << setiosflags(ios_base::fixed | ios_base::left); cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x" << setprecision(3) cout << "\naverage GPU speedup: x"
<< speedup_total_ / num_subtests_called_ << endl; << setprecision(3) << speedup_total_ / num_subtests_called_
cout << resetiosflags(ios_base::fixed | ios_base::left); << endl;
cout << resetiosflags(ios_base::fixed);
} }
void TestSystem::flush_subtest_data() void TestSystem::flushSubtestData()
{ {
if (!can_flush_) if (!can_flush_)
return; return;
...@@ -58,9 +54,10 @@ void TestSystem::flush_subtest_data() ...@@ -58,9 +54,10 @@ void TestSystem::flush_subtest_data()
double speedup = static_cast<double>(cpu_time) / std::max(1, gpu_time); double speedup = static_cast<double>(cpu_time) / std::max(1, gpu_time);
speedup_total_ += speedup; speedup_total_ += speedup;
cout << TAB << setiosflags(ios_base::fixed | ios_base::left); cout << TAB << setiosflags(ios_base::left);
stringstream stream; stringstream stream;
stream << cpu_time; stream << cpu_time;
cout << setw(10) << stream.str(); cout << setw(10) << stream.str();
...@@ -73,11 +70,10 @@ void TestSystem::flush_subtest_data() ...@@ -73,11 +70,10 @@ void TestSystem::flush_subtest_data()
cout << setw(10) << stream.str(); cout << setw(10) << stream.str();
cout << description_.str(); cout << description_.str();
cout << resetiosflags(ios_base::left) << endl;
cout << resetiosflags(ios_base::fixed | ios_base::left) << endl;
num_subtests_called_++; num_subtests_called_++;
reset_subtest_data(); resetSubtestData();
} }
......
...@@ -38,6 +38,13 @@ public: ...@@ -38,6 +38,13 @@ public:
void run(); void run();
// Ends current subtest and starts new one
std::stringstream& subtest()
{
flushSubtestData();
return description_;
}
void cpuOn() { cpu_started_ = cv::getTickCount(); } void cpuOn() { cpu_started_ = cv::getTickCount(); }
void cpuOff() void cpuOff()
...@@ -56,20 +63,13 @@ public: ...@@ -56,20 +63,13 @@ public:
can_flush_ = true; can_flush_ = true;
} }
// Ends current subtest and starts new one
std::stringstream& subtest()
{
flush_subtest_data();
return description_;
}
private: private:
TestSystem(): can_flush_(false), cpu_elapsed_(0), gpu_elapsed_(0), TestSystem(): can_flush_(false), cpu_elapsed_(0), gpu_elapsed_(0),
speedup_total_(0.0), num_subtests_called_(0) {}; speedup_total_(0.0), num_subtests_called_(0) {};
void flush_subtest_data(); void flushSubtestData();
void reset_subtest_data() void resetSubtestData()
{ {
cpu_elapsed_ = 0; cpu_elapsed_ = 0;
gpu_elapsed_ = 0; gpu_elapsed_ = 0;
...@@ -93,17 +93,6 @@ private: ...@@ -93,17 +93,6 @@ private:
}; };
#define TEST(name) \
struct name##_test: Runnable \
{ \
name##_test(): Runnable(#name) { \
TestSystem::instance()->addTest(this); \
} \
void run(); \
} name##_test_instance; \
void name##_test::run()
#define INIT(name) \ #define INIT(name) \
struct name##_init: Runnable \ struct name##_init: Runnable \
{ \ { \
...@@ -115,12 +104,22 @@ private: ...@@ -115,12 +104,22 @@ private:
void name##_init::run() void name##_init::run()
#define TEST(name) \
struct name##_test: Runnable \
{ \
name##_test(): Runnable(#name) { \
TestSystem::instance()->addTest(this); \
} \
void run(); \
} name##_test_instance; \
void name##_test::run()
#define SUBTEST TestSystem::instance()->subtest()
#define DESCRIPTION TestSystem::instance()->subtest()
#define CPU_ON TestSystem::instance()->cpuOn() #define CPU_ON TestSystem::instance()->cpuOn()
#define GPU_ON TestSystem::instance()->gpuOn() #define GPU_ON TestSystem::instance()->gpuOn()
#define CPU_OFF TestSystem::instance()->cpuOff() #define CPU_OFF TestSystem::instance()->cpuOff()
#define GPU_OFF TestSystem::instance()->gpuOff() #define GPU_OFF TestSystem::instance()->gpuOff()
#define SUBTEST TestSystem::instance()->subtest()
#define DESCRIPTION TestSystem::instance()->subtest()
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low, void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
cv::Scalar high); cv::Scalar high);
......
...@@ -6,42 +6,40 @@ ...@@ -6,42 +6,40 @@
using namespace std; using namespace std;
using namespace cv; using namespace cv;
// This code calls CUFFT DFT and initializes that lib INIT(matchTemplate)
INIT(CUFFT_library)
{ {
Mat src, templ; Mat src; gen(src, 500, 500, CV_32F, 0, 1);
gen(src, 500, 500, CV_32F, 0, 1); Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
gen(templ, 500, 500, CV_32F, 0, 1);
gpu::GpuMat d_src(src); gpu::GpuMat d_src(src), d_templ(templ), d_dst;
gpu::GpuMat d_templ(templ);
gpu::GpuMat d_result;
gpu::matchTemplate(d_src, d_templ, d_result, CV_TM_CCORR); gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
} }
TEST(matchTemplate) TEST(matchTemplate)
{ {
Mat src, templ, result; Mat src, templ, dst;
gen(src, 3000, 3000, CV_32F, 0, 1); gen(src, 3000, 3000, CV_32F, 0, 1);
gpu::GpuMat d_image(src), d_templ, d_result; gpu::GpuMat d_src(src), d_templ, d_dst;
for (int templ_size = 5; templ_size <= 1000; templ_size *= 2) for (int templ_size = 5; templ_size < 200; templ_size *= 5)
{ {
SUBTEST << "src " << src.rows << ", templ " << templ_size << ", 32F, CCORR"; SUBTEST << "src " << src.rows << ", templ " << templ_size << ", 32F, CCORR";
gen(templ, templ_size, templ_size, CV_32F, 0, 1); gen(templ, templ_size, templ_size, CV_32F, 0, 1);
dst.create(src.rows - templ.rows + 1, src.cols - templ.cols + 1, CV_32F);
CPU_ON; CPU_ON;
matchTemplate(src, templ, result, CV_TM_CCORR); matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_OFF; CPU_OFF;
d_templ = templ; d_templ = templ;
d_dst.create(d_src.rows - d_templ.rows + 1, d_src.cols - d_templ.cols + 1, CV_32F);
GPU_ON; GPU_ON;
gpu::matchTemplate(d_image, d_templ, d_result, CV_TM_CCORR); gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
GPU_OFF; GPU_OFF;
} }
} }
...@@ -86,6 +84,7 @@ TEST(remap) ...@@ -86,6 +84,7 @@ TEST(remap)
gen(src, size, size, CV_8UC1, 0, 256); gen(src, size, size, CV_8UC1, 0, 256);
gen(xmap, size, size, CV_32F, 0, size); gen(xmap, size, size, CV_32F, 0, size);
gen(ymap, size, size, CV_32F, 0, size); gen(ymap, size, size, CV_32F, 0, size);
dst.create(xmap.size(), src.type());
CPU_ON; CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR); remap(src, dst, xmap, ymap, INTER_LINEAR);
...@@ -94,6 +93,7 @@ TEST(remap) ...@@ -94,6 +93,7 @@ TEST(remap)
d_src = src; d_src = src;
d_xmap = xmap; d_xmap = xmap;
d_ymap = ymap; d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON; GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap); gpu::remap(d_src, d_dst, d_xmap, d_ymap);
...@@ -107,17 +107,19 @@ TEST(dft) ...@@ -107,17 +107,19 @@ TEST(dft)
Mat src, dst; Mat src, dst;
gpu::GpuMat d_src, d_dst; gpu::GpuMat d_src, d_dst;
for (int size = 1000; size <= 8000; size *= 2) for (int size = 1000; size <= 4000; size *= 2)
{ {
SUBTEST << "size " << size << ", 32FC2, complex-to-complex"; SUBTEST << "size " << size << ", 32FC2, complex-to-complex";
gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1)); gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
dst.create(src.size(), src.type());
CPU_ON; CPU_ON;
dft(src, dst); dft(src, dst);
CPU_OFF; CPU_OFF;
d_src = src; d_src = src;
d_dst.create(d_src.size(), d_src.type());
GPU_ON; GPU_ON;
gpu::dft(d_src, d_dst, Size(size, size)); gpu::dft(d_src, d_dst, Size(size, size));
...@@ -136,12 +138,14 @@ TEST(cornerHarris) ...@@ -136,12 +138,14 @@ TEST(cornerHarris)
SUBTEST << "size " << size << ", 32FC1"; SUBTEST << "size " << size << ", 32FC1";
gen(src, size, size, CV_32F, 0, 1); gen(src, size, size, CV_32F, 0, 1);
dst.create(src.size(), src.type());
CPU_ON; CPU_ON;
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101); cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
CPU_OFF; CPU_OFF;
d_src = src; d_src = src;
d_dst.create(src.size(), src.type());
GPU_ON; GPU_ON;
gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1); gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1);
...@@ -150,22 +154,51 @@ TEST(cornerHarris) ...@@ -150,22 +154,51 @@ TEST(cornerHarris)
} }
TEST(memoryAllocation) TEST(integral)
{ {
Mat mat; Mat src, sum;
gpu::GpuMat d_mat; gpu::GpuMat d_src, d_sum;
int begin = 100, end = 8000, step = 100; for (int size = 1000; size <= 8000; size *= 2)
{
SUBTEST << "size " << size << ", 8U";
DESCRIPTION << "32F matrices from " << begin << " to " << end; gen(src, size, size, CV_8U, 0, 256);
sum.create(size + 1, size + 1, CV_32S);
CPU_ON; CPU_ON;
for (int size = begin; size <= end; size += step) integral(src, sum);
mat.create(size, size, CV_32FC1);
CPU_OFF; CPU_OFF;
d_src = src;
d_sum.create(size + 1, size + 1, CV_32S);
GPU_ON; GPU_ON;
for (int size = begin; size <= end; size += step) gpu::integral(d_src, d_sum);
d_mat.create(size, size, CV_32FC1);
GPU_OFF; GPU_OFF;
}
}
TEST(norm)
{
Mat src;
gpu::GpuMat d_src;
for (int size = 1000; size <= 8000; size *= 2)
{
SUBTEST << "size " << size << ", 8U";
gen(src, size, size, CV_8U, 0, 256);
CPU_ON;
norm(src);
CPU_OFF;
d_src = src;
GPU_ON;
gpu::norm(d_src);
GPU_OFF;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment