Commit 48183f10 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

optimized memory requirements for gpu::minMax's buffers, added support of compute capability 1.0

parent c4654620
......@@ -490,44 +490,64 @@ Scalar cv::gpu::sum(const GpuMat& src)
////////////////////////////////////////////////////////////////////////
// minMax
namespace cv { namespace gpu { namespace mathfunc {
namespace cv { namespace gpu { namespace mathfunc { namespace minmax {
void get_buf_size_required(int elem_size, int& b1cols, int& b1rows,
int& b2cols, int& b2rows);
template <typename T>
void min_max_caller(const DevMem2D src, double* minval, double* maxval);
}}}
void min_max_caller(const DevMem2D src, double* minval, double* maxval,
unsigned char* minval_buf, unsigned char* maxval_buf);
template <typename T>
void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval,
unsigned char* minval_buf, unsigned char* maxval_buf);
}}}}
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
{
GpuMat src_ = src.reshape(1);
using namespace mathfunc::minmax;
double maxVal_;
if (!maxVal)
maxVal = &maxVal_;
if (!maxVal) maxVal = &maxVal_;
GpuMat src_ = src.reshape(1);
// Allocate GPU buffers
Size b1size, b2size;
get_buf_size_required(src.elemSize(), b1size.width, b1size.height, b2size.width, b2size.height);
GpuMat b1(b1size, CV_8U), b2(b2size, CV_8U);
int major, minor;
getComputeCapability(getDevice(), major, minor);
if (major >= 1 && minor >= 1)
{
switch (src_.type())
{
case CV_8U:
mathfunc::min_max_caller<unsigned char>(src_, minVal, maxVal);
break;
case CV_8S:
mathfunc::min_max_caller<signed char>(src_, minVal, maxVal);
break;
case CV_16U:
mathfunc::min_max_caller<unsigned short>(src_, minVal, maxVal);
break;
case CV_16S:
mathfunc::min_max_caller<signed short>(src_, minVal, maxVal);
break;
case CV_32S:
mathfunc::min_max_caller<int>(src_, minVal, maxVal);
break;
case CV_32F:
mathfunc::min_max_caller<float>(src_, minVal, maxVal);
break;
case CV_64F:
mathfunc::min_max_caller<double>(src_, minVal, maxVal);
break;
default:
CV_Error(CV_StsBadArg, "Unsupported type");
case CV_8U: min_max_caller<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_8S: min_max_caller<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_16U: min_max_caller<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_16S: min_max_caller<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_32S: min_max_caller<int>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_32F: min_max_caller<float>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_64F: min_max_caller<double>(src_, minVal, maxVal, b1.data, b2.data); break;
default: CV_Error(CV_StsBadArg, "Unsupported type");
}
}
else
{
switch (src_.type())
{
case CV_8U: min_max_caller_2steps<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_8S: min_max_caller_2steps<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_16U: min_max_caller_2steps<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_16S: min_max_caller_2steps<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_32S: min_max_caller_2steps<int>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_32F: min_max_caller_2steps<float>(src_, minVal, maxVal, b1.data, b2.data); break;
default: CV_Error(CV_StsBadArg, "Unsupported type");
}
}
}
......@@ -535,14 +555,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
////////////////////////////////////////////////////////////////////////
// minMaxLoc
namespace cv { namespace gpu { namespace mathfunc {
namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
template <typename T>
void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval, int* minlocx, int* minlocy,
int* maxlocx, int* maxlocy);
}}}
void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval,
int* minlocx, int* minlocy, int* maxlocx, int* maxlocy);
}}}}
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc)
{
using namespace mathfunc::minmaxloc;
CV_Assert(src.channels() == 1);
double maxVal_;
......@@ -557,25 +581,25 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
switch (src.type())
{
case CV_8U:
mathfunc::min_max_loc_caller<unsigned char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<unsigned char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
case CV_8S:
mathfunc::min_max_loc_caller<signed char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<signed char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
case CV_16U:
mathfunc::min_max_loc_caller<unsigned short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<unsigned short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
case CV_16S:
mathfunc::min_max_loc_caller<signed short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<signed short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
case CV_32S:
mathfunc::min_max_loc_caller<int>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<int>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
case CV_32F:
mathfunc::min_max_loc_caller<float>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<float>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
case CV_64F:
mathfunc::min_max_loc_caller<double>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
min_max_loc_caller<double>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break;
default:
CV_Error(CV_StsBadArg, "Unsupported type");
......
This diff is collapsed.
......@@ -678,8 +678,14 @@ struct CV_GpuMinMaxTest: public CvTest
void run(int)
{
int depth_end;
int major, minor;
cv::gpu::getComputeCapability(getDevice(), major, minor);
minor = 0;
if (minor >= 1) depth_end = CV_64F; else depth_end = CV_32F;
for (int cn = 1; cn <= 4; ++cn)
for (int depth = CV_8U; depth <= CV_64F; ++depth)
for (int depth = CV_8U; depth <= depth_end; ++depth)
{
int rows = 1, cols = 3;
test(rows, cols, cn, depth);
......@@ -703,10 +709,11 @@ struct CV_GpuMinMaxTest: public CvTest
}
double minVal, maxVal;
cv::Point minLoc, maxLoc;
Mat src_ = src.reshape(1);
if (depth != CV_8S)
{
cv::Point minLoc, maxLoc;
cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc);
}
else
......@@ -727,8 +734,16 @@ struct CV_GpuMinMaxTest: public CvTest
cv::Point minLoc_, maxLoc_;
cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_);
CHECK(minVal == minVal_, CvTS::FAIL_INVALID_OUTPUT);
CHECK(maxVal == maxVal_, CvTS::FAIL_INVALID_OUTPUT);
if (abs(minVal - minVal_) > 1e-3f)
{
ts->printf(CvTS::CONSOLE, "\nfail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d\n", minVal, minVal_, rows, cols, depth, cn);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
}
if (abs(maxVal - maxVal_) > 1e-3f)
{
ts->printf(CvTS::CONSOLE, "\nfail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d\n", maxVal, maxVal_, rows, cols, depth, cn);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
}
}
};
......@@ -742,7 +757,11 @@ struct CV_GpuMinMaxLocTest: public CvTest
void run(int)
{
for (int depth = CV_8U; depth <= CV_64F; ++depth)
int depth_end;
int major, minor;
cv::gpu::getComputeCapability(getDevice(), major, minor);
if (minor >= 1) depth_end = CV_64F; else depth_end = CV_32F;
for (int depth = CV_8U; depth <= depth_end; ++depth)
{
int rows = 1, cols = 3;
test(rows, cols, depth);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment