Commit 5f819886 authored by Ilya Lavrenov's avatar Ilya Lavrenov

refactored arithm binary operations in order to make them more scalable

parent 8224f984
...@@ -89,11 +89,11 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const ...@@ -89,11 +89,11 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
size_t localThreads[3] = { 16, 16, 1 }; size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
std::string kernelName = op_type == ABS_DIFF ? "arithm_absdiff" : "arithm_binary_op"; std::string kernelName = "arithm_binary_op";
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const WTypeMap[] = { "short", "short", "int", "int", "int", "float", "double" }; const char * const WTypeMap[] = { "short", "short", "int", "int", "int", "float", "double" };
const char operationsMap[] = { '+', '-', '*', '/', '-' }; const char * const funcMap[] = { "FUNC_ADD", "FUNC_SUB", "FUNC_MUL", "FUNC_DIV", "FUNC_ABS_DIFF" };
const char * const channelMap[] = { "", "", "2", "4", "4" }; const char * const channelMap[] = { "", "", "2", "4", "4" };
bool haveScalar = use_scalar || src2.empty(); bool haveScalar = use_scalar || src2.empty();
...@@ -105,12 +105,12 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const ...@@ -105,12 +105,12 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
else if (op_type == MUL) else if (op_type == MUL)
WDepth = hasDouble && (depth == CV_32S || depth == CV_64F) ? CV_64F : CV_32F; WDepth = hasDouble && (depth == CV_32S || depth == CV_64F) ? CV_64F : CV_32F;
std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D Operation=%c" std::string buildOptions = format("-D T=%s%s -D WT=%s%s -D convertToT=convert_%s%s%s -D %s "
" -D convertToWT=convert_%s%s", "-D convertToWT=convert_%s%s",
typeMap[depth], channelMap[oclChannels], typeMap[depth], channelMap[oclChannels],
WTypeMap[WDepth], channelMap[oclChannels], WTypeMap[WDepth], channelMap[oclChannels],
typeMap[depth], channelMap[oclChannels], (depth >= CV_32F ? "" : (depth == CV_32S ? "_rte" : "_sat_rte")), typeMap[depth], channelMap[oclChannels], (depth >= CV_32F ? "" : (depth == CV_32S ? "_rte" : "_sat_rte")),
operationsMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]); funcMap[op_type], WTypeMap[WDepth], channelMap[oclChannels]);
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
...@@ -124,6 +124,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const ...@@ -124,6 +124,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
kernelName += "_mat"; kernelName += "_mat";
if (haveScalar)
buildOptions += " -D HAVE_SCALAR";
} }
if (haveScalar) if (haveScalar)
...@@ -146,9 +149,6 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const ...@@ -146,9 +149,6 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
kernelName += "_mask"; kernelName += "_mask";
} }
if (op_type == DIV)
kernelName += "_div";
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
......
...@@ -366,23 +366,23 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri ...@@ -366,23 +366,23 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
// this enables backwards portability to // this enables backwards portability to
// run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && // if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
dst.offset == 0 && dst.cols == dst.wholecols) // dst.offset == 0 && dst.cols == dst.wholecols)
{ // {
const int sizeofMap[][7] = // const int sizeofMap[][7] =
{ // {
{ sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, // { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) },
{ sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, // { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) },
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 }, // { 0 , 0 , 0 , 0 , 0 , 0 , 0 },
{ sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, // { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) },
}; // };
int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; // int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];
clEnqueueFillBuffer(getClCommandQueue(dst.clCxt), // clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
(cl_mem)dst.data, (void*)mat.data, sizeofGeneric, // (cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
0, dst.step * dst.rows, 0, NULL, NULL); // 0, dst.step * dst.rows, 0, NULL, NULL);
} // }
else // else
#endif #endif
{ {
oclMat m(mat); oclMat m(mat);
......
...@@ -52,48 +52,47 @@ ...@@ -52,48 +52,47 @@
#endif #endif
#endif #endif
////////////////////////////////////////////////////////////////////////////////////////////////////// #if defined (FUNC_ADD)
///////////////////////////////////////////// ADD //////////////////////////////////////////////////// #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index]));
////////////////////////////////////////////////////////////////////////////////////////////////////// #endif
__kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset, #if defined (FUNC_SUB)
__global T *src2, int src2_step, int src2_offset, #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]));
__global T *dst, int dst_step, int dst_offset, #endif
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows) #if defined (FUNC_MUL)
{ #if defined (HAVE_SCALAR)
int src1_index = mad24(y, src1_step, x + src1_offset); #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index]));
int src2_index = mad24(y, src2_step, x + src2_offset); #else
int dst_index = mad24(y, dst_step, x + dst_offset); #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index]));
#endif
#endif
dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation convertToWT(src2[src2_index])); #if defined (FUNC_DIV)
} #if defined (HAVE_SCALAR)
} #define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index]));
#else
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
#endif
#endif
__kernel void arithm_binary_op_mat_div(__global T *src1, int src1_step, int src1_offset, #if defined (FUNC_ABS_DIFF)
__global T *src2, int src2_step, int src2_offset, #define EXPRESSION WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); \
__global T *dst, int dst_step, int dst_offset, value = value > (WT)(0) ? value : -value; \
int cols, int rows) dst[dst_index] = convertToT(value);
{ #endif
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows) //////////////////////////////////////////////////////////////////////////////////////////////////////
{ ///////////////////////////////////////////// ADD ////////////////////////////////////////////////////
int src1_index = mad24(y, src1_step, x + src1_offset); //////////////////////////////////////////////////////////////////////////////////////////////////////
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
T zero = (T)(0); #ifndef HAVE_SCALAR
dst[dst_index] = src2[src2_index] == zero ? zero : convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
}
}
__kernel void arithm_absdiff_mat(__global T *src1, int src1_step, int src1_offset, __kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset,
__global T *src2, int src2_step, int src2_offset, __global T *src2, int src2_step, int src2_offset,
__global T *dst, int dst_step, int dst_offset, __global T *dst, int dst_step, int dst_offset,
int cols, int rows) int cols, int rows)
...@@ -107,13 +106,13 @@ __kernel void arithm_absdiff_mat(__global T *src1, int src1_step, int src1_offse ...@@ -107,13 +106,13 @@ __kernel void arithm_absdiff_mat(__global T *src1, int src1_step, int src1_offse
int src2_index = mad24(y, src2_step, x + src2_offset); int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset); int dst_index = mad24(y, dst_step, x + dst_offset);
WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); EXPRESSION
value = value > (WT)(0) ? value : -value;
dst[dst_index] = convertToT(value);
} }
} }
// add mat with scale for multiply #else
// add mat with scale
__kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset, __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset,
__global T *src2, int src2_step, int src2_offset, __global T *src2, int src2_step, int src2_offset,
__global WT *scalar, __global WT *scalar,
...@@ -129,28 +128,8 @@ __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int s ...@@ -129,28 +128,8 @@ __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int s
int src2_index = mad24(y, src2_step, x + src2_offset); int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset); int dst_index = mad24(y, dst_step, x + dst_offset);
dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index])); EXPRESSION
} }
} }
// add mat with scale for divide #endif
__kernel void arithm_binary_op_mat_scalar_div(__global T *src1, int src1_step, int src1_offset,
__global T *src2, int src2_step, int src2_offset,
__global WT *scalar,
__global T *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
T zero = (T)(0);
dst[dst_index] = src2[src2_index] == zero ? zero :
convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index]));
}
}
...@@ -51,6 +51,24 @@ ...@@ -51,6 +51,24 @@
#endif #endif
#endif #endif
#if defined (FUNC_ADD)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index]));
#endif
#if defined (FUNC_SUB)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]));
#endif
#if defined (FUNC_MUL)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index]));
#endif
#if defined (FUNC_DIV)
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
#endif
////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////
///////////////////////////////// add with mask ////////////////////////////////// ///////////////////////////////// add with mask //////////////////////////////////
////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////
...@@ -73,7 +91,7 @@ __kernel void arithm_binary_op_mat_mask(__global T * src1, int src1_step, int sr ...@@ -73,7 +91,7 @@ __kernel void arithm_binary_op_mat_mask(__global T * src1, int src1_step, int sr
int src2_index = mad24(y, src2_step, x + src2_offset); int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, dst_offset + x); int dst_index = mad24(y, dst_step, dst_offset + x);
dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation convertToWT(src2[src2_index])); EXPRESSION
} }
} }
} }
...@@ -51,48 +51,34 @@ ...@@ -51,48 +51,34 @@
#endif #endif
#endif #endif
/////////////////////////////////////////////////////////////////////////////////// #if defined (FUNC_ADD)
///////////////////////////////// Add with scalar ///////////////////////////////// #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]);
/////////////////////////////////////////////////////////////////////////////////// #endif
__kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1_offset,
__global WT *scalar,
__global T *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation scalar[0]); #if defined (FUNC_SUB)
} #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]);
} #endif
__kernel void arithm_absdiff_scalar(__global T *src1, int src1_step, int src1_offset, #if defined (FUNC_MUL)
__global WT *src2, #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]);
__global T *dst, int dst_step, int dst_offset, #endif
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows) #if defined (FUNC_DIV)
{ #define EXPRESSION T zero = (T)(0); \
int src1_index = mad24(y, src1_step, x + src1_offset); dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index]));
int dst_index = mad24(y, dst_step, x + dst_offset); #endif
WT value = convertToWT(src1[src1_index]) - src2[0]; #if defined (FUNC_ABS_DIFF)
value = value > (WT)(0) ? value : -value; #define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar[0]; \
value = value > (WT)(0) ? value : -value; \
dst[dst_index] = convertToT(value); dst[dst_index] = convertToT(value);
} #endif
}
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////// Add with scalar /////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
// scalar divide to matrix __kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1_offset,
__kernel void arithm_binary_op_scalar_div(__global T *src1, int src1_step, int src1_offset,
__global WT *scalar, __global WT *scalar,
__global T *dst, int dst_step, int dst_offset, __global T *dst, int dst_step, int dst_offset,
int cols, int rows) int cols, int rows)
...@@ -105,7 +91,6 @@ __kernel void arithm_binary_op_scalar_div(__global T *src1, int src1_step, int s ...@@ -105,7 +91,6 @@ __kernel void arithm_binary_op_scalar_div(__global T *src1, int src1_step, int s
int src1_index = mad24(y, src1_step, x + src1_offset); int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset); int dst_index = mad24(y, dst_step, x + dst_offset);
T zero = (T)(0); EXPRESSION
dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index]));
} }
} }
...@@ -51,6 +51,24 @@ ...@@ -51,6 +51,24 @@
#endif #endif
#endif #endif
#if defined (FUNC_ADD)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]);
#endif
#if defined (FUNC_SUB)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]);
#endif
#if defined (FUNC_MUL)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]);
#endif
#if defined (FUNC_DIV)
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) / scalar[0]);
#endif
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Add with scalar with mask //////////////////////////// //////////////////////////// Add with scalar with mask ////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
...@@ -72,7 +90,7 @@ __kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int ...@@ -72,7 +90,7 @@ __kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int
int src1_index = mad24(y, src1_step, x + src1_offset); int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, dst_offset + x); int dst_index = mad24(y, dst_step, dst_offset + x);
dst[dst_index] = convertToT(convertToWT(src1[src1_index]) Operation scalar[0]); EXPRESSION
} }
} }
} }
...@@ -535,7 +535,7 @@ TEST_P(Absdiff, Mat) ...@@ -535,7 +535,7 @@ TEST_P(Absdiff, Mat)
} }
} }
TEST_P(Absdiff, Mat_Scalar) TEST_P(Absdiff, Scalar)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment