Commit d8a7ff1e authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

refactored gpu module, added vec math operators for uint, added support of 2…

refactored gpu module, added vec math operators for uint, added support of 2 channel images into gpu::sum (removed support of double)
parent e5eec31b
......@@ -486,10 +486,10 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
namespace cv { namespace gpu { namespace mathfunc
{
template <typename T>
void sum_caller(const DevMem2D src, PtrStep buf, double* sum);
void sum_caller(const DevMem2D src, PtrStep buf, double* sum, int cn);
template <typename T>
void sum_multipass_caller(const DevMem2D src, PtrStep buf, double* sum);
void sum_multipass_caller(const DevMem2D src, PtrStep buf, double* sum, int cn);
template <typename T>
void sqsum_caller(const DevMem2D src, PtrStep buf, double* sum);
......@@ -499,7 +499,7 @@ namespace cv { namespace gpu { namespace mathfunc
namespace sum
{
void get_buf_size_required(int cols, int rows, int& bufcols, int& bufrows);
void get_buf_size_required(int cols, int rows, int cn, int& bufcols, int& bufrows);
}
}}}
......@@ -512,27 +512,26 @@ Scalar cv::gpu::sum(const GpuMat& src)
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
{
using namespace mathfunc;
CV_Assert(src.channels() == 1);
typedef void (*Caller)(const DevMem2D, PtrStep, double*);
typedef void (*Caller)(const DevMem2D, PtrStep, double*, int);
static const Caller callers[2][7] =
{ { sum_multipass_caller<unsigned char>, sum_multipass_caller<char>,
sum_multipass_caller<unsigned short>, sum_multipass_caller<short>,
sum_multipass_caller<int>, sum_multipass_caller<float>, 0 },
{ sum_caller<unsigned char>, sum_caller<char>,
sum_caller<unsigned short>, sum_caller<short>,
sum_caller<int>, sum_caller<float>, sum_caller<double> } };
sum_caller<int>, sum_caller<float>, 0 } };
Size bufSize;
sum::get_buf_size_required(src.cols, src.rows, bufSize.width, bufSize.height);
sum::get_buf_size_required(src.cols, src.rows, src.channels(), bufSize.width, bufSize.height);
buf.create(bufSize, CV_8U);
Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
Caller caller = callers[hasAtomicsSupport(getDevice())][src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "sum: unsupported type");
double result;
caller(src, buf, &result);
return result;
double result[4];
caller(src, buf, result, src.channels());
return Scalar(result[0], result[1], result[2], result[3]);
}
Scalar cv::gpu::sqrSum(const GpuMat& src)
......@@ -553,10 +552,10 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
sqsum_multipass_caller<int>, sqsum_multipass_caller<float>, 0 },
{ sqsum_caller<unsigned char>, sqsum_caller<char>,
sqsum_caller<unsigned short>, sqsum_caller<short>,
sqsum_caller<int>, sqsum_caller<float>, sqsum_caller<double> } };
sqsum_caller<int>, sqsum_caller<float>, 0 } };
Size bufSize;
sum::get_buf_size_required(src.cols, src.rows, bufSize.width, bufSize.height);
sum::get_buf_size_required(src.cols, src.rows, 1, bufSize.width, bufSize.height);
buf.create(bufSize, CV_8U);
Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
......
This diff is collapsed.
......@@ -866,6 +866,91 @@ namespace cv
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
}
static __device__ uint1 operator+(const uint1& a, const uint1& b)
{
return make_uint1(a.x + b.x);
}
static __device__ uint1 operator-(const uint1& a, const uint1& b)
{
return make_uint1(a.x - b.x);
}
static __device__ uint1 operator*(const uint1& a, const uint1& b)
{
return make_uint1(a.x * b.x);
}
static __device__ uint1 operator/(const uint1& a, const uint1& b)
{
return make_uint1(a.x / b.x);
}
static __device__ float1 operator*(const uint1& a, float s)
{
return make_float1(a.x * s);
}
static __device__ uint2 operator+(const uint2& a, const uint2& b)
{
return make_uint2(a.x + b.x, a.y + b.y);
}
static __device__ uint2 operator-(const uint2& a, const uint2& b)
{
return make_uint2(a.x - b.x, a.y - b.y);
}
static __device__ uint2 operator*(const uint2& a, const uint2& b)
{
return make_uint2(a.x * b.x, a.y * b.y);
}
static __device__ uint2 operator/(const uint2& a, const uint2& b)
{
return make_uint2(a.x / b.x, a.y / b.y);
}
static __device__ float2 operator*(const uint2& a, float s)
{
return make_float2(a.x * s, a.y * s);
}
static __device__ uint3 operator+(const uint3& a, const uint3& b)
{
return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
}
static __device__ uint3 operator-(const uint3& a, const uint3& b)
{
return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
}
static __device__ uint3 operator*(const uint3& a, const uint3& b)
{
return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
}
static __device__ uint3 operator/(const uint3& a, const uint3& b)
{
return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
}
static __device__ float3 operator*(const uint3& a, float s)
{
return make_float3(a.x * s, a.y * s, a.z * s);
}
static __device__ uint4 operator+(const uint4& a, const uint4& b)
{
return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
static __device__ uint4 operator-(const uint4& a, const uint4& b)
{
return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
static __device__ uint4 operator*(const uint4& a, const uint4& b)
{
return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
static __device__ uint4 operator/(const uint4& a, const uint4& b)
{
return make_uint4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
}
static __device__ float4 operator*(const uint4& a, float s)
{
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
}
static __device__ float1 operator+(const float1& a, const float1& b)
{
return make_float1(a.x + b.x);
......
......@@ -942,9 +942,18 @@ struct CV_GpuSumTest: CvTest
Scalar a, b;
double max_err = 1e-5;
int typemax = hasNativeDoubleSupport(getDevice()) ? CV_64F : CV_32F;
int typemax = CV_32F;
for (int type = CV_8U; type <= typemax; ++type)
{
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
a = sum(src);
b = sum(GpuMat(src));
if (abs(a[0] - b[0]) + abs(a[1] - b[1]) > src.size().area() * max_err)
{
ts->printf(CvTS::CONSOLE, "cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
return;
}
gen(1 + rand() % 500, 1 + rand() % 500, type, src);
a = sum(src);
b = sum(GpuMat(src));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment