Commit 06738468 authored by Alexander Alekhin's avatar Alexander Alekhin Committed by Andrey Pavlenko

TAPI: stiching: add custom OpenCL kernels for MultiBandBlender

parent c22d92c1
...@@ -598,6 +598,8 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr ...@@ -598,6 +598,8 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
class CV_EXPORTS Image2D class CV_EXPORTS Image2D
{ {
public: public:
......
...@@ -495,6 +495,11 @@ template<> inline std::string CommandLineParser::get<std::string>(const String& ...@@ -495,6 +495,11 @@ template<> inline std::string CommandLineParser::get<std::string>(const String&
} }
#endif // OPENCV_NOSTL #endif // OPENCV_NOSTL
#if !defined(OPENCV_SKIP_SUPPRESS_WARNING) || !OPENCV_SKIP_SUPPRESS_WARNING
// Use this to bypass "warning C4127: conditional expression is constant"
template <typename T> T SuppressWarning(T v) { return v; }
#endif
} //namespace cv } //namespace cv
#endif //__OPENCV_CORE_UTILITY_H__ #endif //__OPENCV_CORE_UTILITY_H__
...@@ -4404,7 +4404,24 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, ...@@ -4404,7 +4404,24 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
#undef PROCESS_SRC #undef PROCESS_SRC
/////////////////////////////////////////// Image2D ////////////////////////////////////////////////////
// TODO Make this as a method of OpenCL "BuildOptions" class
void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
{
if (!buildOptions.empty())
buildOptions += " ";
int type = _m.type(), depth = CV_MAT_DEPTH(type);
buildOptions += format(
"-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
name.c_str(), ocl::typeToStr(type),
name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
name.c_str(), (int)CV_MAT_CN(type),
name.c_str(), (int)CV_ELEM_SIZE(type),
name.c_str(), (int)CV_ELEM_SIZE1(type),
name.c_str(), (int)depth
);
}
struct Image2D::Impl struct Image2D::Impl
{ {
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels.hpp"
namespace cv { namespace cv {
namespace detail { namespace detail {
...@@ -245,6 +246,31 @@ void MultiBandBlender::prepare(Rect dst_roi) ...@@ -245,6 +246,31 @@ void MultiBandBlender::prepare(Rect dst_roi)
} }
} }
#ifdef HAVE_OPENCL
static bool ocl_MultiBandBlender_feed(InputArray _src, InputArray _weight,
InputOutputArray _dst, InputOutputArray _dst_weight)
{
String buildOptions = "-D DEFINE_feed";
ocl::buildOptionsAddMatrixDescription(buildOptions, "src", _src);
ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
ocl::buildOptionsAddMatrixDescription(buildOptions, "dst", _dst);
ocl::buildOptionsAddMatrixDescription(buildOptions, "dstWeight", _dst_weight);
ocl::Kernel k("feed", ocl::stitching::multibandblend_oclsrc, buildOptions);
if (k.empty())
return false;
UMat src = _src.getUMat();
k.args(ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::ReadOnly(_weight.getUMat()),
ocl::KernelArg::ReadWrite(_dst.getUMat()),
ocl::KernelArg::ReadWrite(_dst_weight.getUMat())
);
size_t globalsize[2] = {src.cols, src.rows };
return k.run(2, globalsize, NULL, false);
}
#endif
void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
{ {
...@@ -338,64 +364,62 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -338,64 +364,62 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
int x_br = br_new.x - dst_roi_.x; int x_br = br_new.x - dst_roi_.x;
// Add weighted layer of the source image to the final Laplacian pyramid layer // Add weighted layer of the source image to the final Laplacian pyramid layer
if(weight_type_ == CV_32F)
{
for (int i = 0; i <= num_bands_; ++i) for (int i = 0; i <= num_bands_; ++i)
{
Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
CV_OPENCL_RUN(SuppressWarning(true),
ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i],
dst_pyr_laplace_[i](rc),
dst_band_weights_[i](rc)),
goto next_band;)
{ {
Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ); Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ);
Mat _dst_pyr_laplace = dst_pyr_laplace_[i].getMat(ACCESS_RW); Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW);
Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ); Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
Mat _dst_band_weights = dst_band_weights_[i].getMat(ACCESS_RW); Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW);
for (int y = y_tl; y < y_br; ++y) if(weight_type_ == CV_32F)
{
for (int y = 0; y < rc.height; ++y)
{ {
int y_ = y - y_tl; const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y_);
Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y); Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
const float* weight_row = _weight_pyr_gauss.ptr<float>(y_); const float* weight_row = _weight_pyr_gauss.ptr<float>(y);
float* dst_weight_row = _dst_band_weights.ptr<float>(y); float* dst_weight_row = _dst_band_weights.ptr<float>(y);
for (int x = x_tl; x < x_br; ++x) for (int x = 0; x < rc.width; ++x)
{ {
int x_ = x - x_tl; dst_row[x].x += static_cast<short>(src_row[x].x * weight_row[x]);
dst_row[x].x += static_cast<short>(src_row[x_].x * weight_row[x_]); dst_row[x].y += static_cast<short>(src_row[x].y * weight_row[x]);
dst_row[x].y += static_cast<short>(src_row[x_].y * weight_row[x_]); dst_row[x].z += static_cast<short>(src_row[x].z * weight_row[x]);
dst_row[x].z += static_cast<short>(src_row[x_].z * weight_row[x_]); dst_weight_row[x] += weight_row[x];
dst_weight_row[x] += weight_row[x_];
} }
} }
x_tl /= 2; y_tl /= 2;
x_br /= 2; y_br /= 2;
}
} }
else // weight_type_ == CV_16S else // weight_type_ == CV_16S
{ {
for (int i = 0; i <= num_bands_; ++i) for (int y = 0; y < y_br - y_tl; ++y)
{
Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ);
Mat _dst_pyr_laplace = dst_pyr_laplace_[i].getMat(ACCESS_RW);
Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
Mat _dst_band_weights = dst_band_weights_[i].getMat(ACCESS_RW);
for (int y = y_tl; y < y_br; ++y)
{ {
int y_ = y - y_tl; const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y_);
Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y); Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
const short* weight_row = _weight_pyr_gauss.ptr<short>(y_); const short* weight_row = _weight_pyr_gauss.ptr<short>(y);
short* dst_weight_row = _dst_band_weights.ptr<short>(y); short* dst_weight_row = _dst_band_weights.ptr<short>(y);
for (int x = x_tl; x < x_br; ++x) for (int x = 0; x < x_br - x_tl; ++x)
{ {
int x_ = x - x_tl; dst_row[x].x += short((src_row[x].x * weight_row[x]) >> 8);
dst_row[x].x += short((src_row[x_].x * weight_row[x_]) >> 8); dst_row[x].y += short((src_row[x].y * weight_row[x]) >> 8);
dst_row[x].y += short((src_row[x_].y * weight_row[x_]) >> 8); dst_row[x].z += short((src_row[x].z * weight_row[x]) >> 8);
dst_row[x].z += short((src_row[x_].z * weight_row[x_]) >> 8); dst_weight_row[x] += weight_row[x];
dst_weight_row[x] += weight_row[x_];
} }
} }
}
}
#ifdef HAVE_OPENCL
next_band:
#endif
x_tl /= 2; y_tl /= 2; x_tl /= 2; y_tl /= 2;
x_br /= 2; y_br /= 2; x_br /= 2; y_br /= 2;
} }
}
LOGLN(" Add weighted layer of the source image to the final Laplacian pyramid layer, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); LOGLN(" Add weighted layer of the source image to the final Laplacian pyramid layer, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
} }
...@@ -411,10 +435,10 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) ...@@ -411,10 +435,10 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
else else
restoreImageFromLaplacePyr(dst_pyr_laplace_); restoreImageFromLaplacePyr(dst_pyr_laplace_);
dst_ = dst_pyr_laplace_[0]; Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
dst_ = dst_(Range(0, dst_roi_final_.height), Range(0, dst_roi_final_.width)); dst_ = dst_pyr_laplace_[0](dst_rc);
UMat _dst_mask; UMat _dst_mask;
compare(dst_band_weights_[0](Range(0, dst_roi_final_.height), Range(0, dst_roi_final_.width)), WEIGHT_EPS, dst_mask_, CMP_GT); compare(dst_band_weights_[0](dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
dst_pyr_laplace_.clear(); dst_pyr_laplace_.clear();
dst_band_weights_.clear(); dst_band_weights_.clear();
...@@ -425,12 +449,38 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) ...@@ -425,12 +449,38 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Auxiliary functions // Auxiliary functions
#ifdef HAVE_OPENCL
static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _mat)
{
String buildOptions = "-D DEFINE_normalizeUsingWeightMap";
ocl::buildOptionsAddMatrixDescription(buildOptions, "mat", _mat);
ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
ocl::Kernel k("normalizeUsingWeightMap", ocl::stitching::multibandblend_oclsrc, buildOptions);
if (k.empty())
return false;
UMat mat = _mat.getUMat();
k.args(ocl::KernelArg::ReadWrite(mat),
ocl::KernelArg::ReadOnly(_weight.getUMat())
);
size_t globalsize[2] = {mat.cols, mat.rows };
return k.run(2, globalsize, NULL, false);
}
#endif
void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src) void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
{ {
#ifdef HAVE_TEGRA_OPTIMIZATION #ifdef HAVE_TEGRA_OPTIMIZATION
if(tegra::normalizeUsingWeightMap(weight, src)) if(tegra::normalizeUsingWeightMap(weight, src))
return; return;
#endif #endif
CV_OPENCL_RUN(SuppressWarning(true),
ocl_normalizeUsingWeightMap(_weight, _src),
return;)
{
Mat weight = _weight.getMat(); Mat weight = _weight.getMat();
Mat src = _src.getMat(); Mat src = _src.getMat();
...@@ -469,6 +519,7 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src) ...@@ -469,6 +519,7 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
} }
} }
} }
}
} }
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2014, Itseez, Inc, all rights reserved.
//
// Common preprocessors macro
//
//
// TODO: Move this common code into "header" file
//
#ifndef NL // New Line: for preprocessor debugging
#define NL
#endif
#define REF(x) x
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
//
// All matrixes are come with this description ("name" is a name of matrix):
// * name_CN - number of channels (1,2,3,4)
// * name_DEPTH - numeric value of CV_MAT_DEPTH(type). See CV_8U, CV_32S, etc macro below.
//
// Currently we also pass these attributes (to reduce this macro block):
// * name_T - datatype (int, float, uchar4, float4)
// * name_T1 - datatype for one channel (int, float, uchar).
// It is equal to result of "T1(name_T)" macro
// * name_TSIZE - CV_ELEM_SIZE(type).
// We can't use sizeof(name_T) here, because sizeof(float3) is usually equal to 8, not 6.
// * name_T1SIZE - CV_ELEM_SIZE1(type)
//
//
// Usage sample:
//
// #define workType TYPE(float, src_CN)
// #define convertToWorkType CONVERT_TO(workType)
// #define convertWorkTypeToDstType CONVERT(workType, dst_T)
//
// __kernel void kernelFn(DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(dst))
// {
// const int x = get_global_id(0);
// const int y = get_global_id(1);
//
// if (x < srcWidth && y < srcHeight)
// {
// int src_byteOffset = MAT_BYTE_OFFSET(src, x, y);
// int dst_byteOffset = MAT_BYTE_OFFSET(dst, x, y);
// workType value = convertToWorkType(LOAD_MAT_AT(src, src_byteOffset));
//
// ... value processing ...
//
// STORE_MAT_AT(dst, dst_byteOffset, convertWorkTypeToDstType(value));
// }
// }
//
#define DECLARE_MAT_ARG(name) \
__global uchar* restrict name ## Ptr, \
int name ## StepBytes, \
int name ## Offset, \
int name ## Height, \
int name ## Width NL
#define MAT_BYTE_OFFSET(name, x, y) mad24((y)/* + name ## OffsetY*/, name ## StepBytes, ((x)/* + name ## OffsetX*/) * (int)(name ## _TSIZE) + name ## Offset)
#define MAT_RELATIVE_BYTE_OFFSET(name, x, y) mad24(y, name ## StepBytes, (x) * (int)(name ## _TSIZE))
#define __LOAD_MAT_AT(name, byteOffset) *((const __global name ## _T*)(name ## Ptr + (byteOffset)))
#define __vload_CN__(name_cn) vload ## name_cn
#define __vload_CN_(name_cn) __vload_CN__(name_cn)
#define __vload_CN(name) __vload_CN_(name ## _CN)
#define __LOAD_MAT_AT_vload(name, byteOffset) __vload_CN(name)(0, ((const __global name ## _T1*)(name ## Ptr + (byteOffset))))
#define __LOAD_MAT_AT_1 __LOAD_MAT_AT
#define __LOAD_MAT_AT_2 __LOAD_MAT_AT
#define __LOAD_MAT_AT_3 __LOAD_MAT_AT_vload
#define __LOAD_MAT_AT_4 __LOAD_MAT_AT
#define __LOAD_MAT_AT_CN__(name_cn) __LOAD_MAT_AT_ ## name_cn
#define __LOAD_MAT_AT_CN_(name_cn) __LOAD_MAT_AT_CN__(name_cn)
#define __LOAD_MAT_AT_CN(name) __LOAD_MAT_AT_CN_(name ## _CN)
#define LOAD_MAT_AT(name, byteOffset) __LOAD_MAT_AT_CN(name)(name, byteOffset)
#define __STORE_MAT_AT(name, byteOffset, v) *((__global name ## _T*)(name ## Ptr + (byteOffset))) = v
#define __vstore_CN__(name_cn) vstore ## name_cn
#define __vstore_CN_(name_cn) __vstore_CN__(name_cn)
#define __vstore_CN(name) __vstore_CN_(name ## _CN)
#define __STORE_MAT_AT_vstore(name, byteOffset, v) __vstore_CN(name)(v, 0, ((__global name ## _T1*)(name ## Ptr + (byteOffset))))
#define __STORE_MAT_AT_1 __STORE_MAT_AT
#define __STORE_MAT_AT_2 __STORE_MAT_AT
#define __STORE_MAT_AT_3 __STORE_MAT_AT_vstore
#define __STORE_MAT_AT_4 __STORE_MAT_AT
#define __STORE_MAT_AT_CN__(name_cn) __STORE_MAT_AT_ ## name_cn
#define __STORE_MAT_AT_CN_(name_cn) __STORE_MAT_AT_CN__(name_cn)
#define __STORE_MAT_AT_CN(name) __STORE_MAT_AT_CN_(name ## _CN)
#define STORE_MAT_AT(name, byteOffset, v) __STORE_MAT_AT_CN(name)(name, byteOffset, v)
#define T1_uchar uchar
#define T1_uchar2 uchar
#define T1_uchar3 uchar
#define T1_uchar4 uchar
#define T1_char char
#define T1_char2 char
#define T1_char3 char
#define T1_char4 char
#define T1_ushort ushort
#define T1_ushort2 ushort
#define T1_ushort3 ushort
#define T1_ushort4 ushort
#define T1_short short
#define T1_short2 short
#define T1_short3 short
#define T1_short4 short
#define T1_int int
#define T1_int2 int
#define T1_int3 int
#define T1_int4 int
#define T1_float float
#define T1_float2 float
#define T1_float3 float
#define T1_float4 float
#define T1_double double
#define T1_double2 double
#define T1_double3 double
#define T1_double4 double
#define T1(type) REF(CAT(T1_, REF(type)))
#define uchar1 uchar
#define char1 char
#define short1 short
#define ushort1 ushort
#define int1 int
#define float1 float
#define double1 double
#define TYPE(type, cn) REF(CAT(REF(type), REF(cn)))
#define __CONVERT_MODE_uchar_uchar __NO_CONVERT
#define __CONVERT_MODE_uchar_char __CONVERT_sat
#define __CONVERT_MODE_uchar_ushort __CONVERT
#define __CONVERT_MODE_uchar_short __CONVERT
#define __CONVERT_MODE_uchar_int __CONVERT
#define __CONVERT_MODE_uchar_float __CONVERT
#define __CONVERT_MODE_uchar_double __CONVERT
#define __CONVERT_MODE_char_uchar __CONVERT_sat
#define __CONVERT_MODE_char_char __NO_CONVERT
#define __CONVERT_MODE_char_ushort __CONVERT_sat
#define __CONVERT_MODE_char_short __CONVERT
#define __CONVERT_MODE_char_int __CONVERT
#define __CONVERT_MODE_char_float __CONVERT
#define __CONVERT_MODE_char_double __CONVERT
#define __CONVERT_MODE_ushort_uchar __CONVERT_sat
#define __CONVERT_MODE_ushort_char __CONVERT_sat
#define __CONVERT_MODE_ushort_ushort __NO_CONVERT
#define __CONVERT_MODE_ushort_short __CONVERT_sat
#define __CONVERT_MODE_ushort_int __CONVERT
#define __CONVERT_MODE_ushort_float __CONVERT
#define __CONVERT_MODE_ushort_double __CONVERT
#define __CONVERT_MODE_short_uchar __CONVERT_sat
#define __CONVERT_MODE_short_char __CONVERT_sat
#define __CONVERT_MODE_short_ushort __CONVERT_sat
#define __CONVERT_MODE_short_short __NO_CONVERT
#define __CONVERT_MODE_short_int __CONVERT
#define __CONVERT_MODE_short_float __CONVERT
#define __CONVERT_MODE_short_double __CONVERT
#define __CONVERT_MODE_int_uchar __CONVERT_sat
#define __CONVERT_MODE_int_char __CONVERT_sat
#define __CONVERT_MODE_int_ushort __CONVERT_sat
#define __CONVERT_MODE_int_short __CONVERT_sat
#define __CONVERT_MODE_int_int __NO_CONVERT
#define __CONVERT_MODE_int_float __CONVERT
#define __CONVERT_MODE_int_double __CONVERT
#define __CONVERT_MODE_float_uchar __CONVERT_sat_rte
#define __CONVERT_MODE_float_char __CONVERT_sat_rte
#define __CONVERT_MODE_float_ushort __CONVERT_sat_rte
#define __CONVERT_MODE_float_short __CONVERT_sat_rte
#define __CONVERT_MODE_float_int __CONVERT_rte
#define __CONVERT_MODE_float_float __NO_CONVERT
#define __CONVERT_MODE_float_double __CONVERT
#define __CONVERT_MODE_double_uchar __CONVERT_sat_rte
#define __CONVERT_MODE_double_char __CONVERT_sat_rte
#define __CONVERT_MODE_double_ushort __CONVERT_sat_rte
#define __CONVERT_MODE_double_short __CONVERT_sat_rte
#define __CONVERT_MODE_double_int __CONVERT_rte
#define __CONVERT_MODE_double_float __CONVERT
#define __CONVERT_MODE_double_double __NO_CONVERT
#define __CONVERT_MODE(srcType, dstType) CAT(__CONVERT_MODE_, CAT(REF(T1(srcType)), CAT(_, REF(T1(dstType)))))
#define __ROUND_MODE__NO_CONVERT
#define __ROUND_MODE__CONVERT // nothing
#define __ROUND_MODE__CONVERT_rte _rte
#define __ROUND_MODE__CONVERT_sat _sat
#define __ROUND_MODE__CONVERT_sat_rte _sat_rte
#define ROUND_MODE(srcType, dstType) CAT(__ROUND_MODE_, __CONVERT_MODE(srcType, dstType))
#define __CONVERT_ROUND(dstType, roundMode) CAT(CAT(convert_, REF(dstType)), roundMode)
#define __NO_CONVERT(dstType) // nothing
#define __CONVERT(dstType) __CONVERT_ROUND(dstType,)
#define __CONVERT_rte(dstType) __CONVERT_ROUND(dstType,_rte)
#define __CONVERT_sat(dstType) __CONVERT_ROUND(dstType,_sat)
#define __CONVERT_sat_rte(dstType) __CONVERT_ROUND(dstType,_sat_rte)
#define CONVERT(srcType, dstType) REF(__CONVERT_MODE(srcType,dstType))(dstType)
#define CONVERT_TO(dstType) __CONVERT_ROUND(dstType,)
// OpenCV depths
#define CV_8U 0
#define CV_8S 1
#define CV_16U 2
#define CV_16S 3
#define CV_32S 4
#define CV_32F 5
#define CV_64F 6
//
// End of common preprocessors macro
//
#if defined(DEFINE_feed)
#define workType TYPE(weight_T1, src_CN)
#define convertSrcToWorkType CONVERT_TO(workType)
#define convertWorkTypeToDstType CONVERT(workType, dst_T)
__kernel void feed(
DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(weight),
DECLARE_MAT_ARG(dst), DECLARE_MAT_ARG(dstWeight)
)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if (x < srcWidth && y < srcHeight)
{
int src_byteOffset = MAT_BYTE_OFFSET(src, x, y);
int weight_byteOffset = MAT_BYTE_OFFSET(weight, x, y);
int dst_byteOffset = MAT_BYTE_OFFSET(dst, x, y);
int dstWeight_byteOffset = MAT_BYTE_OFFSET(dstWeight, x, y);
weight_T w = LOAD_MAT_AT(weight, weight_byteOffset);
workType src_value = convertSrcToWorkType(LOAD_MAT_AT(src, src_byteOffset));
STORE_MAT_AT(dst, dst_byteOffset, LOAD_MAT_AT(dst, dst_byteOffset) + convertWorkTypeToDstType(src_value * w));
STORE_MAT_AT(dstWeight, dstWeight_byteOffset, LOAD_MAT_AT(dstWeight, dstWeight_byteOffset) + w);
}
}
#endif
#if defined(DEFINE_normalizeUsingWeightMap)
#define workType TYPE(weight_T1, mat_CN)
#define convertSrcToWorkType CONVERT_TO(workType)
#define convertWorkTypeToDstType CONVERT(workType, mat_T)
#if weight_DEPTH >= CV_32F
#define WEIGHT_EPS 1e-5f
#else
#define WEIGHT_EPS 0
#endif
__kernel void normalizeUsingWeightMap(
DECLARE_MAT_ARG(mat), DECLARE_MAT_ARG(weight)
)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if (x < matWidth && y < matHeight)
{
int mat_byteOffset = MAT_BYTE_OFFSET(mat, x, y);
int weight_byteOffset = MAT_BYTE_OFFSET(weight, x, y);
weight_T w = LOAD_MAT_AT(weight, weight_byteOffset);
workType value = convertSrcToWorkType(LOAD_MAT_AT(mat, mat_byteOffset));
value = value / (w + WEIGHT_EPS);
STORE_MAT_AT(mat, mat_byteOffset, convertWorkTypeToDstType(value));
}
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment