Commit b6d45b97 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #11244 from alalek:cuda_samples_drop_performance

parents 569011a1 dd243067
...@@ -60,4 +60,3 @@ foreach(sample_filename ${all_samples}) ...@@ -60,4 +60,3 @@ foreach(sample_filename ${all_samples})
ocv_target_link_libraries(${tgt} opencv_cudacodec) ocv_target_link_libraries(${tgt} opencv_cudacodec)
endif() endif()
endforeach() endforeach()
include("performance/CMakeLists.txt" OPTIONAL)
set(the_target "example_gpu_performance")
file(GLOB sources "performance/*.cpp")
file(GLOB headers "performance/*.h")
if(HAVE_opencv_xfeatures2d)
ocv_include_modules_recurse(opencv_xfeatures2d)
endif()
if(HAVE_opencv_bgsegm)
ocv_include_modules_recurse(opencv_bgsegm)
endif()
add_executable(${the_target} ${sources} ${headers})
ocv_target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
if(HAVE_opencv_xfeatures2d)
ocv_target_link_libraries(${the_target} opencv_xfeatures2d)
endif()
if(HAVE_opencv_bgsegm)
ocv_target_link_libraries(${the_target} opencv_bgsegm)
endif()
set_target_properties(${the_target} PROPERTIES
OUTPUT_NAME "performance_gpu"
PROJECT_LABEL "(EXAMPLE_CUDA) performance")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "samples//gpu")
endif()
if(WIN32)
install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/gpu" COMPONENT samples)
endif()
ocv_install_example_src("gpu/performance" performance/*.cpp performance/*.h)
#include <iomanip>
#include <stdexcept>
#include <string>
#include "performance.h"
#include "opencv2/core/cuda.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
void TestSystem::run()
{
if (is_list_mode_)
{
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
cout << (*it)->name() << endl;
return;
}
// Run test initializers
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
(*it)->run();
}
printHeading();
// Run tests
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
try
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
cout << endl << (*it)->name() << ":\n";
(*it)->run();
finishCurrentSubtest();
}
}
catch (const Exception&)
{
// Message is printed via callback
resetCurrentSubtest();
}
catch (const runtime_error& e)
{
printError(e.what());
resetCurrentSubtest();
}
}
printSummary();
}
void TestSystem::finishCurrentSubtest()
{
if (cur_subtest_is_empty_)
// There is no need to print subtest statistics
return;
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
speedup_total_ += speedup;
printMetrics(cpu_time, gpu_time, speedup);
num_subtests_called_++;
resetCurrentSubtest();
}
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
if (samples.size() > 1)
return (sum - samples[0]) / (samples.size() - 1);
return sum;
}
void TestSystem::printHeading()
{
cout << endl;
cout << setiosflags(ios_base::left);
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
<< setw(14) << "SPEEDUP"
<< "DESCRIPTION\n";
cout << resetiosflags(ios_base::left);
}
void TestSystem::printSummary()
{
cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x"
<< setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << resetiosflags(ios_base::fixed);
}
void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
{
cout << TAB << setiosflags(ios_base::left);
stringstream stream;
stream << cpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << gpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << speedup;
cout << setw(14) << stream.str();
cout << cur_subtest_description_.str();
cout << resetiosflags(ios_base::left) << endl;
}
void TestSystem::printError(const std::string& msg)
{
cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
}
void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
{
mat.create(rows, cols, type);
RNG rng(0);
rng.fill(mat, RNG::UNIFORM, low, high);
}
string abspath(const string& relpath)
{
return TestSystem::instance().workingDir() + relpath;
}
static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
const char* err_msg, const char* /*file_name*/,
int /*line*/, void* /*userdata*/)
{
TestSystem::instance().printError(err_msg);
return 0;
}
int main(int argc, const char* argv[])
{
int num_devices = getCudaEnabledDeviceCount();
if (num_devices == 0)
{
cerr << "No GPU found or the library was compiled without CUDA support";
return -1;
}
redirectError(cvErrorCallback);
const char* keys =
"{ h help | | print help message }"
"{ f filter | | filter for test }"
"{ w workdir | | set working directory }"
"{ l list | | show all tests }"
"{ d device | 0 | device id }"
"{ i iters | 10 | iteration count }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help") || !cmd.check())
{
cmd.printMessage();
cmd.printErrors();
return 0;
}
int device = cmd.get<int>("device");
if (device < 0 || device >= num_devices)
{
cerr << "Invalid device ID" << endl;
return -1;
}
DeviceInfo dev_info(device);
if (!dev_info.isCompatible())
{
cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
return -1;
}
setDevice(device);
printShortCudaDeviceInfo(device);
string filter = cmd.get<string>("filter");
string workdir = cmd.get<string>("workdir");
bool list = cmd.has("list");
int iters = cmd.get<int>("iters");
if (!filter.empty())
TestSystem::instance().setTestFilter(filter);
if (!workdir.empty())
{
if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
workdir += '/';
TestSystem::instance().setWorkingDir(workdir);
}
if (list)
TestSystem::instance().setListMode(true);
TestSystem::instance().setNumIters(iters);
cout << "\nNote: the timings for GPU don't include data transfer" << endl;
TestSystem::instance().run();
return 0;
}
#ifndef OPENCV_CUDA_SAMPLE_PERFORMANCE_H_
#define OPENCV_CUDA_SAMPLE_PERFORMANCE_H_
#include <iostream>
#include <cstdio>
#include <vector>
#include <numeric>
#include <string>
#include <opencv2/core/utility.hpp>
#define TAB " "
class Runnable
{
public:
explicit Runnable(const std::string& nameStr): name_(nameStr) {}
virtual ~Runnable() {}
const std::string& name() const { return name_; }
virtual void run() = 0;
private:
std::string name_;
};
class TestSystem
{
public:
static TestSystem& instance()
{
static TestSystem me;
return me;
}
void setWorkingDir(const std::string& val) { working_dir_ = val; }
const std::string& workingDir() const { return working_dir_; }
void setTestFilter(const std::string& val) { test_filter_ = val; }
const std::string& testFilter() const { return test_filter_; }
void setNumIters(int num_iters) { num_iters_ = num_iters; }
void addInit(Runnable* init) { inits_.push_back(init); }
void addTest(Runnable* test) { tests_.push_back(test); }
void run();
// It's public because OpenCV callback uses it
void printError(const std::string& msg);
std::stringstream& startNewSubtest()
{
finishCurrentSubtest();
return cur_subtest_description_;
}
bool stop() const { return cur_iter_idx_ >= num_iters_; }
void cpuOn() { cpu_started_ = cv::getTickCount(); }
void cpuOff()
{
int64 delta = cv::getTickCount() - cpu_started_;
cpu_times_.push_back(delta);
++cur_iter_idx_;
}
void cpuComplete()
{
cpu_elapsed_ += meanTime(cpu_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
void gpuOn() { gpu_started_ = cv::getTickCount(); }
void gpuOff()
{
int64 delta = cv::getTickCount() - gpu_started_;
gpu_times_.push_back(delta);
++cur_iter_idx_;
}
void gpuComplete()
{
gpu_elapsed_ += meanTime(gpu_times_);
cur_subtest_is_empty_ = false;
cur_iter_idx_ = 0;
}
bool isListMode() const { return is_list_mode_; }
void setListMode(bool value) { is_list_mode_ = value; }
private:
TestSystem():
cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0), is_list_mode_(false),
num_iters_(10), cur_iter_idx_(0)
{
cpu_times_.reserve(num_iters_);
gpu_times_.reserve(num_iters_);
}
void finishCurrentSubtest();
void resetCurrentSubtest()
{
cpu_elapsed_ = 0;
gpu_elapsed_ = 0;
cur_subtest_description_.str("");
cur_subtest_is_empty_ = true;
cur_iter_idx_ = 0;
cpu_times_.clear();
gpu_times_.clear();
}
double meanTime(const std::vector<int64> &samples);
void printHeading();
void printSummary();
void printMetrics(double cpu_time, double gpu_time, double speedup);
std::string working_dir_;
std::string test_filter_;
std::vector<Runnable*> inits_;
std::vector<Runnable*> tests_;
std::stringstream cur_subtest_description_;
bool cur_subtest_is_empty_;
int64 cpu_started_;
int64 gpu_started_;
double cpu_elapsed_;
double gpu_elapsed_;
double speedup_total_;
int num_subtests_called_;
bool is_list_mode_;
int num_iters_;
int cur_iter_idx_;
std::vector<int64> cpu_times_;
std::vector<int64> gpu_times_;
};
#define GLOBAL_INIT(name) \
struct name##_init: Runnable { \
name##_init(): Runnable(#name) { \
TestSystem::instance().addInit(this); \
} \
void run(); \
} name##_init_instance; \
void name##_init::run()
#define TEST(name) \
struct name##_test: Runnable { \
name##_test(): Runnable(#name) { \
TestSystem::instance().addTest(this); \
} \
void run(); \
} name##_test_instance; \
void name##_test::run()
#define SUBTEST TestSystem::instance().startNewSubtest()
#define CPU_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().cpuOn()
#define CPU_OFF \
TestSystem::instance().cpuOff(); \
} TestSystem::instance().cpuComplete()
#define CUDA_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpuOn()
#define CUDA_OFF \
TestSystem::instance().gpuOff(); \
} TestSystem::instance().gpuComplete()
// Generates a matrix
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
cv::Scalar high);
// Returns abs path taking into account test system working dir
std::string abspath(const std::string& relpath);
#endif // OPENCV_CUDA_SAMPLE_PERFORMANCE_H_
#include <stdexcept>
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/video.hpp"
#include "opencv2/cudalegacy.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/cudafeatures2d.hpp"
#include "opencv2/cudafilters.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudabgsegm.hpp"
#include "performance.h"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_XFEATURES2D
#include "opencv2/xfeatures2d/cuda.hpp"
#include "opencv2/xfeatures2d/nonfree.hpp"
#endif
#ifdef HAVE_OPENCV_BGSEGM
#include "opencv2/bgsegm.hpp"
#endif
using namespace std;
using namespace cv;
TEST(matchTemplate)
{
Mat src, templ, dst;
gen(src, 3000, 3000, CV_32F, 0, 1);
cuda::GpuMat d_src(src), d_templ, d_dst;
Ptr<cuda::TemplateMatching> alg = cuda::createTemplateMatching(src.type(), TM_CCORR);
for (int templ_size = 5; templ_size < 200; templ_size *= 5)
{
SUBTEST << src.cols << 'x' << src.rows << ", 32FC1" << ", templ " << templ_size << 'x' << templ_size << ", CCORR";
gen(templ, templ_size, templ_size, CV_32F, 0, 1);
matchTemplate(src, templ, dst, TM_CCORR);
CPU_ON;
matchTemplate(src, templ, dst, TM_CCORR);
CPU_OFF;
d_templ.upload(templ);
alg->match(d_src, d_templ, d_dst);
CUDA_ON;
alg->match(d_src, d_templ, d_dst);
CUDA_OFF;
}
}
TEST(minMaxLoc)
{
Mat src;
cuda::GpuMat d_src;
double min_val, max_val;
Point min_loc, max_loc;
for (int size = 2000; size <= 8000; size *= 2)
{
SUBTEST << size << 'x' << size << ", 32F";
gen(src, size, size, CV_32F, 0, 1);
CPU_ON;
minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
CPU_OFF;
d_src.upload(src);
CUDA_ON;
cuda::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
CUDA_OFF;
}
}
TEST(remap)
{
Mat src, dst, xmap, ymap;
cuda::GpuMat d_src, d_dst, d_xmap, d_ymap;
int interpolation = INTER_LINEAR;
int borderMode = BORDER_REPLICATE;
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << ", 8UC4, INTER_LINEAR, BORDER_REPLICATE";
gen(src, size, size, CV_8UC4, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
remap(src, dst, xmap, ymap, interpolation, borderMode);
CPU_ON;
remap(src, dst, xmap, ymap, interpolation, borderMode);
CPU_OFF;
d_src.upload(src);
d_xmap.upload(xmap);
d_ymap.upload(ymap);
cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
CUDA_ON;
cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
CUDA_OFF;
}
}
TEST(dft)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << ", 32FC2, complex-to-complex";
gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
dft(src, dst);
CPU_ON;
dft(src, dst);
CPU_OFF;
d_src.upload(src);
cuda::dft(d_src, d_dst, Size(size, size));
CUDA_ON;
cuda::dft(d_src, d_dst, Size(size, size));
CUDA_OFF;
}
}
TEST(cornerHarris)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << ", 32FC1, BORDER_REFLECT101";
gen(src, size, size, CV_32F, 0, 1);
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
CPU_ON;
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
CPU_OFF;
d_src.upload(src);
Ptr<cuda::CornernessCriteria> harris = cuda::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101);
harris->compute(d_src, d_dst);
CUDA_ON;
harris->compute(d_src, d_dst);
CUDA_OFF;
}
}
TEST(integral)
{
Mat src, sum;
cuda::GpuMat d_src, d_sum;
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << size << 'x' << size << ", 8UC1";
gen(src, size, size, CV_8U, 0, 256);
integral(src, sum);
CPU_ON;
integral(src, sum);
CPU_OFF;
d_src.upload(src);
cuda::integral(d_src, d_sum);
CUDA_ON;
cuda::integral(d_src, d_sum);
CUDA_OFF;
}
}
TEST(norm)
{
Mat src;
cuda::GpuMat d_src, d_buf;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 32FC4, NORM_INF";
gen(src, size, size, CV_32FC4, Scalar::all(0), Scalar::all(1));
norm(src, NORM_INF);
CPU_ON;
norm(src, NORM_INF);
CPU_OFF;
d_src.upload(src);
cuda::norm(d_src, NORM_INF, d_buf);
CUDA_ON;
cuda::norm(d_src, NORM_INF, d_buf);
CUDA_OFF;
}
}
TEST(meanShift)
{
int sp = 10, sr = 10;
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 400; size <= 800; size *= 2)
{
SUBTEST << size << 'x' << size << ", 8UC3 vs 8UC4";
gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
pyrMeanShiftFiltering(src, dst, sp, sr);
CPU_ON;
pyrMeanShiftFiltering(src, dst, sp, sr);
CPU_OFF;
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
d_src.upload(src);
cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
CUDA_ON;
cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
CUDA_OFF;
}
}
#ifdef HAVE_OPENCV_XFEATURES2D
TEST(SURF)
{
Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
Ptr<Feature2D> surf = xfeatures2d::SURF::create();
vector<KeyPoint> keypoints;
Mat descriptors;
surf->detectAndCompute(src, Mat(), keypoints, descriptors);
CPU_ON;
surf->detectAndCompute(src, Mat(), keypoints, descriptors);
CPU_OFF;
cuda::SURF_CUDA d_surf;
cuda::GpuMat d_src(src);
cuda::GpuMat d_keypoints;
cuda::GpuMat d_descriptors;
d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_ON;
d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_OFF;
}
#endif
TEST(FAST)
{
Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
vector<KeyPoint> keypoints;
FAST(src, keypoints, 20);
CPU_ON;
FAST(src, keypoints, 20);
CPU_OFF;
cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
cuda::GpuMat d_src(src);
cuda::GpuMat d_keypoints;
d_FAST->detectAsync(d_src, d_keypoints);
CUDA_ON;
d_FAST->detectAsync(d_src, d_keypoints);
CUDA_OFF;
}
TEST(ORB)
{
Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
Ptr<ORB> orb = ORB::create(4000);
vector<KeyPoint> keypoints;
Mat descriptors;
orb->detectAndCompute(src, Mat(), keypoints, descriptors);
CPU_ON;
orb->detectAndCompute(src, Mat(), keypoints, descriptors);
CPU_OFF;
Ptr<cuda::ORB> d_orb = cuda::ORB::create();
cuda::GpuMat d_src(src);
cuda::GpuMat d_keypoints;
cuda::GpuMat d_descriptors;
d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_ON;
d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_OFF;
}
TEST(BruteForceMatcher)
{
// Init CPU matcher
int desc_len = 64;
BFMatcher matcher(NORM_L2);
Mat query;
gen(query, 3000, desc_len, CV_32F, 0, 1);
Mat train;
gen(train, 3000, desc_len, CV_32F, 0, 1);
// Init CUDA matcher
Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
cuda::GpuMat d_query(query);
cuda::GpuMat d_train(train);
// Output
vector< vector<DMatch> > matches(2);
cuda::GpuMat d_matches;
SUBTEST << "match";
matcher.match(query, train, matches[0]);
CPU_ON;
matcher.match(query, train, matches[0]);
CPU_OFF;
d_matcher->matchAsync(d_query, d_train, d_matches);
CUDA_ON;
d_matcher->matchAsync(d_query, d_train, d_matches);
CUDA_OFF;
SUBTEST << "knnMatch";
matcher.knnMatch(query, train, matches, 2);
CPU_ON;
matcher.knnMatch(query, train, matches, 2);
CPU_OFF;
d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
CUDA_ON;
d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
CUDA_OFF;
SUBTEST << "radiusMatch";
float max_distance = 2.0f;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_ON;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_OFF;
d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
CUDA_ON;
d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
CUDA_OFF;
}
TEST(magnitude)
{
Mat x, y, mag;
cuda::GpuMat d_x, d_y, d_mag;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 32FC1";
gen(x, size, size, CV_32F, 0, 1);
gen(y, size, size, CV_32F, 0, 1);
magnitude(x, y, mag);
CPU_ON;
magnitude(x, y, mag);
CPU_OFF;
d_x.upload(x);
d_y.upload(y);
cuda::magnitude(d_x, d_y, d_mag);
CUDA_ON;
cuda::magnitude(d_x, d_y, d_mag);
CUDA_OFF;
}
}
TEST(add)
{
Mat src1, src2, dst;
cuda::GpuMat d_src1, d_src2, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 32FC1";
gen(src1, size, size, CV_32F, 0, 1);
gen(src2, size, size, CV_32F, 0, 1);
add(src1, src2, dst);
CPU_ON;
add(src1, src2, dst);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
cuda::add(d_src1, d_src2, d_dst);
CUDA_ON;
cuda::add(d_src1, d_src2, d_dst);
CUDA_OFF;
}
}
TEST(log)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 32F";
gen(src, size, size, CV_32F, 1, 10);
log(src, dst);
CPU_ON;
log(src, dst);
CPU_OFF;
d_src.upload(src);
cuda::log(d_src, d_dst);
CUDA_ON;
cuda::log(d_src, d_dst);
CUDA_OFF;
}
}
TEST(mulSpectrums)
{
Mat src1, src2, dst;
cuda::GpuMat d_src1, d_src2, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size;
gen(src1, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
gen(src2, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
mulSpectrums(src1, src2, dst, 0, true);
CPU_ON;
mulSpectrums(src1, src2, dst, 0, true);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
CUDA_ON;
cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
CUDA_OFF;
}
}
TEST(resize)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 1000; size <= 3000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 8UC4, up";
gen(src, size, size, CV_8UC4, 0, 256);
resize(src, dst, Size(), 2.0, 2.0);
CPU_ON;
resize(src, dst, Size(), 2.0, 2.0);
CPU_OFF;
d_src.upload(src);
cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
CUDA_ON;
cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
CUDA_OFF;
}
for (int size = 1000; size <= 3000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 8UC4, down";
gen(src, size, size, CV_8UC4, 0, 256);
resize(src, dst, Size(), 0.5, 0.5);
CPU_ON;
resize(src, dst, Size(), 0.5, 0.5);
CPU_OFF;
d_src.upload(src);
cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
CUDA_ON;
cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
CUDA_OFF;
}
}
TEST(cvtColor)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
gen(src, 4000, 4000, CV_8UC1, 0, 255);
d_src.upload(src);
SUBTEST << "4000x4000, 8UC1, COLOR_GRAY2BGRA";
cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
CPU_ON;
cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2YCrCb";
cvtColor(src, dst, COLOR_BGR2YCrCb);
CPU_ON;
cvtColor(src, dst, COLOR_BGR2YCrCb);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
SUBTEST << "4000x4000, 8UC4, COLOR_YCrCb2BGR";
cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
CPU_ON;
cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2XYZ";
cvtColor(src, dst, COLOR_BGR2XYZ);
CPU_ON;
cvtColor(src, dst, COLOR_BGR2XYZ);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
SUBTEST << "4000x4000, 8UC4, COLOR_XYZ2BGR";
cvtColor(src, dst, COLOR_XYZ2BGR, 4);
CPU_ON;
cvtColor(src, dst, COLOR_XYZ2BGR, 4);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2HSV";
cvtColor(src, dst, COLOR_BGR2HSV);
CPU_ON;
cvtColor(src, dst, COLOR_BGR2HSV);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
SUBTEST << "4000x4000, 8UC4, COLOR_HSV2BGR";
cvtColor(src, dst, COLOR_HSV2BGR, 4);
CPU_ON;
cvtColor(src, dst, COLOR_HSV2BGR, 4);
CPU_OFF;
cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
CUDA_ON;
cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
CUDA_OFF;
cv::swap(src, dst);
d_src.swap(d_dst);
}
TEST(erode)
{
Mat src, dst, ker;
cuda::GpuMat d_src, d_buf, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size;
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
ker = getStructuringElement(MORPH_RECT, Size(3, 3));
erode(src, dst, ker);
CPU_ON;
erode(src, dst, ker);
CPU_OFF;
d_src.upload(src);
Ptr<cuda::Filter> erode = cuda::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker);
erode->apply(d_src, d_dst);
CUDA_ON;
erode->apply(d_src, d_dst);
CUDA_OFF;
}
}
TEST(threshold)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 8UC1, THRESH_BINARY";
gen(src, size, size, CV_8U, 0, 100);
threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
CPU_OFF;
d_src.upload(src);
cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
CUDA_ON;
cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
CUDA_OFF;
}
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 32FC1, THRESH_TRUNC [NPP]";
gen(src, size, size, CV_32FC1, 0, 100);
threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
CPU_OFF;
d_src.upload(src);
cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
CUDA_ON;
cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
CUDA_OFF;
}
}
TEST(pow)
{
Mat src, dst;
cuda::GpuMat d_src, d_dst;
for (int size = 1000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 32F";
gen(src, size, size, CV_32F, 0, 100);
pow(src, -2.0, dst);
CPU_ON;
pow(src, -2.0, dst);
CPU_OFF;
d_src.upload(src);
cuda::pow(d_src, -2.0, d_dst);
CUDA_ON;
cuda::pow(d_src, -2.0, d_dst);
CUDA_OFF;
}
}
TEST(projectPoints)
{
Mat src;
vector<Point2f> dst;
cuda::GpuMat d_src, d_dst;
Mat rvec; gen(rvec, 1, 3, CV_32F, 0, 1);
Mat tvec; gen(tvec, 1, 3, CV_32F, 0, 1);
Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
for (int size = (int)1e6, count = 0; size >= 1e5 && count < 5; size = int(size / 1.4), count++)
{
SUBTEST << size;
gen(src, 1, size, CV_32FC3, Scalar::all(0), Scalar::all(10));
projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
CPU_ON;
projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
CPU_OFF;
d_src.upload(src);
cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
CUDA_ON;
cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
CUDA_OFF;
}
}
static void InitSolvePnpRansac()
{
Mat object; gen(object, 1, 4, CV_32FC3, Scalar::all(0), Scalar::all(100));
Mat image; gen(image, 1, 4, CV_32FC2, Scalar::all(0), Scalar::all(100));
Mat rvec, tvec;
cuda::solvePnPRansac(object, image, Mat::eye(3, 3, CV_32F), Mat(), rvec, tvec);
}
TEST(solvePnPRansac)
{
InitSolvePnpRansac();
for (int num_points = 5000; num_points <= 300000; num_points = int(num_points * 3.76))
{
SUBTEST << num_points;
Mat object; gen(object, 1, num_points, CV_32FC3, Scalar::all(10), Scalar::all(100));
Mat image; gen(image, 1, num_points, CV_32FC2, Scalar::all(10), Scalar::all(100));
Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
Mat rvec, tvec;
const int num_iters = 200;
const float max_dist = 2.0f;
vector<int> inliers_cpu, inliers_gpu;
CPU_ON;
solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
max_dist, int(num_points * 0.05), inliers_cpu);
CPU_OFF;
CUDA_ON;
cuda::solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
max_dist, int(num_points * 0.05), &inliers_gpu);
CUDA_OFF;
}
}
TEST(GaussianBlur)
{
for (int size = 1000; size <= 4000; size += 1000)
{
SUBTEST << size << 'x' << size << ", 8UC4";
Mat src, dst;
gen(src, size, size, CV_8UC4, 0, 256);
GaussianBlur(src, dst, Size(3, 3), 1);
CPU_ON;
GaussianBlur(src, dst, Size(3, 3), 1);
CPU_OFF;
cuda::GpuMat d_src(src);
cuda::GpuMat d_dst(src.size(), src.type());
cuda::GpuMat d_buf;
cv::Ptr<cv::cuda::Filter> gauss = cv::cuda::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1);
gauss->apply(d_src, d_dst);
CUDA_ON;
gauss->apply(d_src, d_dst);
CUDA_OFF;
}
}
TEST(filter2D)
{
for (int size = 512; size <= 2048; size *= 2)
{
Mat src;
gen(src, size, size, CV_8UC4, 0, 256);
for (int ksize = 3; ksize <= 16; ksize += 2)
{
SUBTEST << "ksize = " << ksize << ", " << size << 'x' << size << ", 8UC4";
Mat kernel;
gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
Mat dst;
cv::filter2D(src, dst, -1, kernel);
CPU_ON;
cv::filter2D(src, dst, -1, kernel);
CPU_OFF;
cuda::GpuMat d_src(src);
cuda::GpuMat d_dst;
Ptr<cuda::Filter> filter2D = cuda::createLinearFilter(d_src.type(), -1, kernel);
filter2D->apply(d_src, d_dst);
CUDA_ON;
filter2D->apply(d_src, d_dst);
CUDA_OFF;
}
}
}
TEST(pyrDown)
{
for (int size = 4000; size >= 1000; size -= 1000)
{
SUBTEST << size << 'x' << size << ", 8UC4";
Mat src, dst;
gen(src, size, size, CV_8UC4, 0, 256);
pyrDown(src, dst);
CPU_ON;
pyrDown(src, dst);
CPU_OFF;
cuda::GpuMat d_src(src);
cuda::GpuMat d_dst;
cuda::pyrDown(d_src, d_dst);
CUDA_ON;
cuda::pyrDown(d_src, d_dst);
CUDA_OFF;
}
}
TEST(pyrUp)
{
for (int size = 2000; size >= 1000; size -= 1000)
{
SUBTEST << size << 'x' << size << ", 8UC4";
Mat src, dst;
gen(src, size, size, CV_8UC4, 0, 256);
pyrUp(src, dst);
CPU_ON;
pyrUp(src, dst);
CPU_OFF;
cuda::GpuMat d_src(src);
cuda::GpuMat d_dst;
cuda::pyrUp(d_src, d_dst);
CUDA_ON;
cuda::pyrUp(d_src, d_dst);
CUDA_OFF;
}
}
TEST(equalizeHist)
{
for (int size = 1000; size < 4000; size += 1000)
{
SUBTEST << size << 'x' << size;
Mat src, dst;
gen(src, size, size, CV_8UC1, 0, 256);
equalizeHist(src, dst);
CPU_ON;
equalizeHist(src, dst);
CPU_OFF;
cuda::GpuMat d_src(src);
cuda::GpuMat d_dst;
cuda::equalizeHist(d_src, d_dst);
CUDA_ON;
cuda::equalizeHist(d_src, d_dst);
CUDA_OFF;
}
}
TEST(Canny)
{
Mat img = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
if (img.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
Mat edges(img.size(), CV_8UC1);
CPU_ON;
Canny(img, edges, 50.0, 100.0);
CPU_OFF;
cuda::GpuMat d_img(img);
cuda::GpuMat d_edges;
Ptr<cuda::CannyEdgeDetector> canny = cuda::createCannyEdgeDetector(50.0, 100.0);
canny->detect(d_img, d_edges);
CUDA_ON;
canny->detect(d_img, d_edges);
CUDA_OFF;
}
TEST(reduce)
{
for (int size = 1000; size < 4000; size += 1000)
{
Mat src;
gen(src, size, size, CV_32F, 0, 255);
Mat dst0;
Mat dst1;
cuda::GpuMat d_src(src);
cuda::GpuMat d_dst0;
cuda::GpuMat d_dst1;
SUBTEST << size << 'x' << size << ", dim = 0";
reduce(src, dst0, 0, REDUCE_MIN);
CPU_ON;
reduce(src, dst0, 0, REDUCE_MIN);
CPU_OFF;
cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
CUDA_ON;
cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
CUDA_OFF;
SUBTEST << size << 'x' << size << ", dim = 1";
reduce(src, dst1, 1, REDUCE_MIN);
CPU_ON;
reduce(src, dst1, 1, REDUCE_MIN);
CPU_OFF;
cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
CUDA_ON;
cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
CUDA_OFF;
}
}
TEST(gemm)
{
Mat src1, src2, src3, dst;
cuda::GpuMat d_src1, d_src2, d_src3, d_dst;
for (int size = 512; size <= 1024; size *= 2)
{
SUBTEST << size << 'x' << size;
gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_ON;
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
CUDA_ON;
cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
CUDA_OFF;
}
}
TEST(GoodFeaturesToTrack)
{
Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
vector<Point2f> pts;
goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
CPU_ON;
goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
CPU_OFF;
Ptr<cuda::CornersDetector> detector = cuda::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0);
cuda::GpuMat d_src(src);
cuda::GpuMat d_pts;
detector->detect(d_src, d_pts);
CUDA_ON;
detector->detect(d_src, d_pts);
CUDA_OFF;
}
#ifdef HAVE_OPENCV_BGSEGM
TEST(MOG)
{
const std::string inputFile = abspath("../data/vtest.avi");
cv::VideoCapture cap(inputFile);
if (!cap.isOpened()) throw runtime_error("can't open ../data/vtest.avi");
cv::Mat frame;
cap >> frame;
cv::Ptr<cv::BackgroundSubtractor> mog = cv::bgsegm::createBackgroundSubtractorMOG();
cv::Mat foreground;
mog->apply(frame, foreground, 0.01);
while (!TestSystem::instance().stop())
{
cap >> frame;
TestSystem::instance().cpuOn();
mog->apply(frame, foreground, 0.01);
TestSystem::instance().cpuOff();
}
TestSystem::instance().cpuComplete();
cap.open(inputFile);
cap >> frame;
cv::cuda::GpuMat d_frame(frame);
cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::cuda::createBackgroundSubtractorMOG();
cv::cuda::GpuMat d_foreground;
d_mog->apply(d_frame, d_foreground, 0.01);
while (!TestSystem::instance().stop())
{
cap >> frame;
d_frame.upload(frame);
TestSystem::instance().gpuOn();
d_mog->apply(d_frame, d_foreground, 0.01);
TestSystem::instance().gpuOff();
}
TestSystem::instance().gpuComplete();
}
#endif
TEST(MOG2)
{
const std::string inputFile = abspath("../data/768x576.avi");
cv::VideoCapture cap(inputFile);
if (!cap.isOpened()) throw runtime_error("can't open ../data/768x576.avi");
cv::Mat frame;
cap >> frame;
cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
cv::Mat foreground;
cv::Mat background;
mog2->apply(frame, foreground);
mog2->getBackgroundImage(background);
while (!TestSystem::instance().stop())
{
cap >> frame;
TestSystem::instance().cpuOn();
mog2->apply(frame, foreground);
mog2->getBackgroundImage(background);
TestSystem::instance().cpuOff();
}
TestSystem::instance().cpuComplete();
cap.open(inputFile);
cap >> frame;
cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::cuda::createBackgroundSubtractorMOG2();
cv::cuda::GpuMat d_frame(frame);
cv::cuda::GpuMat d_foreground;
cv::cuda::GpuMat d_background;
d_mog2->apply(d_frame, d_foreground);
d_mog2->getBackgroundImage(d_background);
while (!TestSystem::instance().stop())
{
cap >> frame;
d_frame.upload(frame);
TestSystem::instance().gpuOn();
d_mog2->apply(d_frame, d_foreground);
d_mog2->getBackgroundImage(d_background);
TestSystem::instance().gpuOff();
}
TestSystem::instance().gpuComplete();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment