Commit 48183f10 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

optimized memory requirements for gpu::minMax's buffers, added support of compute capability 1.0

parent c4654620
...@@ -490,44 +490,64 @@ Scalar cv::gpu::sum(const GpuMat& src) ...@@ -490,44 +490,64 @@ Scalar cv::gpu::sum(const GpuMat& src)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// minMax // minMax
namespace cv { namespace gpu { namespace mathfunc { namespace cv { namespace gpu { namespace mathfunc { namespace minmax {
void get_buf_size_required(int elem_size, int& b1cols, int& b1rows,
int& b2cols, int& b2rows);
template <typename T> template <typename T>
void min_max_caller(const DevMem2D src, double* minval, double* maxval); void min_max_caller(const DevMem2D src, double* minval, double* maxval,
}}} unsigned char* minval_buf, unsigned char* maxval_buf);
template <typename T>
void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval,
unsigned char* minval_buf, unsigned char* maxval_buf);
}}}}
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
{ {
GpuMat src_ = src.reshape(1); using namespace mathfunc::minmax;
double maxVal_; double maxVal_;
if (!maxVal) if (!maxVal) maxVal = &maxVal_;
maxVal = &maxVal_;
GpuMat src_ = src.reshape(1);
// Allocate GPU buffers
Size b1size, b2size;
get_buf_size_required(src.elemSize(), b1size.width, b1size.height, b2size.width, b2size.height);
GpuMat b1(b1size, CV_8U), b2(b2size, CV_8U);
int major, minor;
getComputeCapability(getDevice(), major, minor);
switch (src_.type()) if (major >= 1 && minor >= 1)
{ {
case CV_8U: switch (src_.type())
mathfunc::min_max_caller<unsigned char>(src_, minVal, maxVal); {
break; case CV_8U: min_max_caller<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_8S: case CV_8S: min_max_caller<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;
mathfunc::min_max_caller<signed char>(src_, minVal, maxVal); case CV_16U: min_max_caller<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;
break; case CV_16S: min_max_caller<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_16U: case CV_32S: min_max_caller<int>(src_, minVal, maxVal, b1.data, b2.data); break;
mathfunc::min_max_caller<unsigned short>(src_, minVal, maxVal); case CV_32F: min_max_caller<float>(src_, minVal, maxVal, b1.data, b2.data); break;
break; case CV_64F: min_max_caller<double>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_16S: default: CV_Error(CV_StsBadArg, "Unsupported type");
mathfunc::min_max_caller<signed short>(src_, minVal, maxVal); }
break; }
case CV_32S: else
mathfunc::min_max_caller<int>(src_, minVal, maxVal); {
break; switch (src_.type())
case CV_32F: {
mathfunc::min_max_caller<float>(src_, minVal, maxVal); case CV_8U: min_max_caller_2steps<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;
break; case CV_8S: min_max_caller_2steps<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;
case CV_64F: case CV_16U: min_max_caller_2steps<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;
mathfunc::min_max_caller<double>(src_, minVal, maxVal); case CV_16S: min_max_caller_2steps<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;
break; case CV_32S: min_max_caller_2steps<int>(src_, minVal, maxVal, b1.data, b2.data); break;
default: case CV_32F: min_max_caller_2steps<float>(src_, minVal, maxVal, b1.data, b2.data); break;
CV_Error(CV_StsBadArg, "Unsupported type"); default: CV_Error(CV_StsBadArg, "Unsupported type");
}
} }
} }
...@@ -535,14 +555,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) ...@@ -535,14 +555,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// minMaxLoc // minMaxLoc
namespace cv { namespace gpu { namespace mathfunc { namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
template <typename T> template <typename T>
void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval, int* minlocx, int* minlocy, void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval,
int* maxlocx, int* maxlocy); int* minlocx, int* minlocy, int* maxlocx, int* maxlocy);
}}}
}}}}
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc) void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc)
{ {
using namespace mathfunc::minmaxloc;
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
double maxVal_; double maxVal_;
...@@ -557,25 +581,25 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -557,25 +581,25 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
switch (src.type()) switch (src.type())
{ {
case CV_8U: case CV_8U:
mathfunc::min_max_loc_caller<unsigned char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<unsigned char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
case CV_8S: case CV_8S:
mathfunc::min_max_loc_caller<signed char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<signed char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
case CV_16U: case CV_16U:
mathfunc::min_max_loc_caller<unsigned short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<unsigned short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
case CV_16S: case CV_16S:
mathfunc::min_max_loc_caller<signed short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<signed short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
case CV_32S: case CV_32S:
mathfunc::min_max_loc_caller<int>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<int>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
case CV_32F: case CV_32F:
mathfunc::min_max_loc_caller<float>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<float>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
case CV_64F: case CV_64F:
mathfunc::min_max_loc_caller<double>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y); min_max_loc_caller<double>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
break; break;
default: default:
CV_Error(CV_StsBadArg, "Unsupported type"); CV_Error(CV_StsBadArg, "Unsupported type");
......
This diff is collapsed.
...@@ -678,8 +678,14 @@ struct CV_GpuMinMaxTest: public CvTest ...@@ -678,8 +678,14 @@ struct CV_GpuMinMaxTest: public CvTest
void run(int) void run(int)
{ {
int depth_end;
int major, minor;
cv::gpu::getComputeCapability(getDevice(), major, minor);
minor = 0;
if (minor >= 1) depth_end = CV_64F; else depth_end = CV_32F;
for (int cn = 1; cn <= 4; ++cn) for (int cn = 1; cn <= 4; ++cn)
for (int depth = CV_8U; depth <= CV_64F; ++depth) for (int depth = CV_8U; depth <= depth_end; ++depth)
{ {
int rows = 1, cols = 3; int rows = 1, cols = 3;
test(rows, cols, cn, depth); test(rows, cols, cn, depth);
...@@ -703,10 +709,11 @@ struct CV_GpuMinMaxTest: public CvTest ...@@ -703,10 +709,11 @@ struct CV_GpuMinMaxTest: public CvTest
} }
double minVal, maxVal; double minVal, maxVal;
cv::Point minLoc, maxLoc;
Mat src_ = src.reshape(1); Mat src_ = src.reshape(1);
if (depth != CV_8S) if (depth != CV_8S)
{ {
cv::Point minLoc, maxLoc;
cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc); cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc);
} }
else else
...@@ -727,8 +734,16 @@ struct CV_GpuMinMaxTest: public CvTest ...@@ -727,8 +734,16 @@ struct CV_GpuMinMaxTest: public CvTest
cv::Point minLoc_, maxLoc_; cv::Point minLoc_, maxLoc_;
cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_); cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_);
CHECK(minVal == minVal_, CvTS::FAIL_INVALID_OUTPUT); if (abs(minVal - minVal_) > 1e-3f)
CHECK(maxVal == maxVal_, CvTS::FAIL_INVALID_OUTPUT); {
ts->printf(CvTS::CONSOLE, "\nfail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d\n", minVal, minVal_, rows, cols, depth, cn);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
}
if (abs(maxVal - maxVal_) > 1e-3f)
{
ts->printf(CvTS::CONSOLE, "\nfail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d\n", maxVal, maxVal_, rows, cols, depth, cn);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
}
} }
}; };
...@@ -742,7 +757,11 @@ struct CV_GpuMinMaxLocTest: public CvTest ...@@ -742,7 +757,11 @@ struct CV_GpuMinMaxLocTest: public CvTest
void run(int) void run(int)
{ {
for (int depth = CV_8U; depth <= CV_64F; ++depth) int depth_end;
int major, minor;
cv::gpu::getComputeCapability(getDevice(), major, minor);
if (minor >= 1) depth_end = CV_64F; else depth_end = CV_32F;
for (int depth = CV_8U; depth <= depth_end; ++depth)
{ {
int rows = 1, cols = 3; int rows = 1, cols = 3;
test(rows, cols, depth); test(rows, cols, depth);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment