Commit 8a799aa8 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

Updated optimal block size estimation for the convolve() function

parent bee68e51
...@@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()), ...@@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()), PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()),
testing::Values(GPU_TYPICAL_MAT_SIZES))) testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584),
testing::Values(27, 32, 64)))
{ {
DeviceInfo devInfo = std::tr1::get<0>(GetParam()); DeviceInfo devInfo = std::tr1::get<0>(GetParam());
Size size = std::tr1::get<1>(GetParam()); int image_size = std::tr1::get<1>(GetParam());
int templ_size = std::tr1::get<2>(GetParam());
setDevice(devInfo.deviceID()); setDevice(devInfo.deviceID());
Mat image_host(size, CV_32FC1); Mat image_host(image_size, image_size, CV_32FC1);
Mat templ_host(size, CV_32FC1); Mat templ_host(templ_size, templ_size, CV_32FC1);
declare.in(image_host, templ_host, WARMUP_RNG); declare.in(image_host, templ_host, WARMUP_RNG);
......
...@@ -32,6 +32,7 @@ struct CvtColorInfo ...@@ -32,6 +32,7 @@ struct CvtColorInfo
typedef TestBaseWithParam<DeviceInfo> DevInfo; typedef TestBaseWithParam<DeviceInfo> DevInfo;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;
......
...@@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) ...@@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size) Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
{ {
int scale = 40; int scale = 40;
Size bsize_min(1024, 1024); Size bsize_min(512, 512);
// Check whether we use Fermi generation or newer GPU // Check whether we use Fermi generation or newer GPU
if (DeviceInfo().majorVersion() >= 2) if (DeviceInfo().majorVersion() >= 2)
{ {
bsize_min.width = 2048; bsize_min.width = 1024;
bsize_min.height = 2048; bsize_min.height = 1024;
} }
Size bsize(std::max(templ_size.width * scale, bsize_min.width), Size bsize(std::max(templ_size.width * scale, bsize_min.width),
std::max(templ_size.height * scale, bsize_min.height)); std::max(templ_size.height * scale, bsize_min.height));
int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;
int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;
bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;
bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;
bsize.width = std::min(bsize.width, result_size.width); bsize.width = std::min(bsize.width, result_size.width);
bsize.height = std::min(bsize.height, result_size.height); bsize.height = std::min(bsize.height, result_size.height);
return bsize; return bsize;
......
...@@ -8,9 +8,15 @@ using namespace cv; ...@@ -8,9 +8,15 @@ using namespace cv;
void TestSystem::run() void TestSystem::run()
{ {
// Run test initializers if (is_list_mode_)
vector<Runnable*>::iterator it = inits_.begin(); {
for (; it != inits_.end(); ++it) for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
cout << (*it)->name() << endl;
return;
}
// Run test initializers
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{ {
if ((*it)->name().find(test_filter_, 0) != string::npos) if ((*it)->name().find(test_filter_, 0) != string::npos)
(*it)->run(); (*it)->run();
...@@ -19,8 +25,7 @@ void TestSystem::run() ...@@ -19,8 +25,7 @@ void TestSystem::run()
printHeading(); printHeading();
// Run tests // Run tests
it = tests_.begin(); for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
for (; it != tests_.end(); ++it)
{ {
try try
{ {
...@@ -145,13 +150,15 @@ int main(int argc, char** argv) ...@@ -145,13 +150,15 @@ int main(int argc, char** argv)
string key = argv[i]; string key = argv[i];
if (key == "--help") if (key == "--help")
{ {
cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n"; cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n";
return 0; return 0;
} }
if (key == "--filter" && i + 1 < argc) if (key == "--filter" && i + 1 < argc)
TestSystem::instance().setTestFilter(argv[++i]); TestSystem::instance().setTestFilter(argv[++i]);
else if (key == "--working-dir" && i + 1 < argc) else if (key == "--workdir" && i + 1 < argc)
TestSystem::instance().setWorkingDir(argv[++i]); TestSystem::instance().setWorkingDir(argv[++i]);
else if (key == "--ls")
TestSystem::instance().setListMode(true);
else else
{ {
cout << "Unknown parameter: '" << key << "'" << endl; cout << "Unknown parameter: '" << key << "'" << endl;
......
...@@ -68,10 +68,14 @@ public: ...@@ -68,10 +68,14 @@ public:
cur_subtest_is_empty_ = false; cur_subtest_is_empty_ = false;
} }
bool isListMode() const { return is_list_mode_; }
void setListMode(bool value) { is_list_mode_ = value; }
private: private:
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0), TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), speedup_total_(0.0), gpu_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0) {} num_subtests_called_(0),
is_list_mode_(false) {}
void finishCurrentSubtest(); void finishCurrentSubtest();
void resetCurrentSubtest() void resetCurrentSubtest()
...@@ -100,6 +104,8 @@ private: ...@@ -100,6 +104,8 @@ private:
double speedup_total_; double speedup_total_;
int num_subtests_called_; int num_subtests_called_;
bool is_list_mode_;
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment