Commit 3a4d4080 authored by Ilya Lavrenov's avatar Ilya Lavrenov

fixed overflow for cv::norm NORM_L2

parent 1c3bfae2
...@@ -651,13 +651,13 @@ OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity, ...@@ -651,13 +651,13 @@ OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity,
typedef Size_MatType MeanStdDevFixture; typedef Size_MatType MeanStdDevFixture;
OCL_PERF_TEST_P(MeanStdDevFixture, DISABLED_MeanStdDev, OCL_PERF_TEST_P(MeanStdDevFixture, MeanStdDev,
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES)) ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
{ {
const Size_MatType_t params = GetParam(); const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params); const Size srcSize = get<0>(params);
const int type = get<1>(params); const int type = get<1>(params);
const double eps = 1e-5; const double eps = 2e-5;
checkDeviceMaxMemoryAllocSize(srcSize, type); checkDeviceMaxMemoryAllocSize(srcSize, type);
...@@ -687,7 +687,7 @@ CV_ENUM(NormType, NORM_INF, NORM_L1, NORM_L2) ...@@ -687,7 +687,7 @@ CV_ENUM(NormType, NORM_INF, NORM_L1, NORM_L2)
typedef std::tr1::tuple<Size, MatType, NormType> NormParams; typedef std::tr1::tuple<Size, MatType, NormType> NormParams;
typedef TestBaseWithParam<NormParams> NormFixture; typedef TestBaseWithParam<NormParams> NormFixture;
OCL_PERF_TEST_P(NormFixture, DISABLED_Norm, OCL_PERF_TEST_P(NormFixture, Norm,
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES, NormType::all())) ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES, NormType::all()))
{ {
const NormParams params = GetParam(); const NormParams params = GetParam();
...@@ -703,7 +703,7 @@ OCL_PERF_TEST_P(NormFixture, DISABLED_Norm, ...@@ -703,7 +703,7 @@ OCL_PERF_TEST_P(NormFixture, DISABLED_Norm,
OCL_TEST_CYCLE() res = cv::norm(src1, src2, normType); OCL_TEST_CYCLE() res = cv::norm(src1, src2, normType);
SANITY_CHECK(res, 1e-6, ERROR_RELATIVE); SANITY_CHECK(res, 1e-5, ERROR_RELATIVE);
} }
///////////// Repeat //////////////////////// ///////////// Repeat ////////////////////////
......
...@@ -88,7 +88,7 @@ ...@@ -88,7 +88,7 @@
#define REDUCE_GLOBAL \ #define REDUCE_GLOBAL \
accumulator += src[0] == zero ? zero : one accumulator += src[0] == zero ? zero : one
#define SET_LOCAL_1 \ #define SET_LOCAL_1 \
localmem[lid] = accumulator localmem[lid] = accumulator
#define REDUCE_LOCAL_1 \ #define REDUCE_LOCAL_1 \
localmem[lid - WGS2_ALIGNED] += accumulator localmem[lid - WGS2_ALIGNED] += accumulator
#define REDUCE_LOCAL_2 \ #define REDUCE_LOCAL_2 \
......
...@@ -479,7 +479,8 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) ...@@ -479,7 +479,8 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op )
int dbsize = ocl::Device::getDefault().maxComputeUnits(); int dbsize = ocl::Device::getDefault().maxComputeUnits();
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
int ddepth = std::max(CV_32S, depth), dtype = CV_MAKE_TYPE(ddepth, cn); int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
dtype = CV_MAKE_TYPE(ddepth, cn);
int wgs2_aligned = 1; int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs) while (wgs2_aligned < (int)wgs)
...@@ -501,7 +502,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) ...@@ -501,7 +502,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op )
dbsize, ocl::KernelArg::PtrWriteOnly(db)); dbsize, ocl::KernelArg::PtrWriteOnly(db));
size_t globalsize = dbsize * wgs; size_t globalsize = dbsize * wgs;
if (k.run(1, &globalsize, &wgs, true)) if (k.run(1, &globalsize, &wgs, false))
{ {
typedef Scalar (*part_sum)(Mat m); typedef Scalar (*part_sum)(Mat m);
part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> }, part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
...@@ -1927,8 +1928,9 @@ static bool ocl_norm( InputArray _src, int normType, double & result ) ...@@ -1927,8 +1928,9 @@ static bool ocl_norm( InputArray _src, int normType, double & result )
Scalar s; Scalar s;
bool unstype = depth == CV_8U || depth == CV_16U; bool unstype = depth == CV_8U || depth == CV_16U;
ocl_sum(src.reshape(1), s, normType == NORM_L2 ? if ( !ocl_sum(src.reshape(1), s, normType == NORM_L2 ?
OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS) ); OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS)) )
return false;
result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]); result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]);
} }
......
...@@ -1355,7 +1355,7 @@ OCL_TEST_P(ScaleAdd, Mat) ...@@ -1355,7 +1355,7 @@ OCL_TEST_P(ScaleAdd, Mat)
OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi)); OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi));
OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi)); OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi));
Near(depth <= CV_32S ? 1 : 1e-6); Near(depth <= CV_32S ? 1 : 1e-3);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment