Commit cafcfc4d authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #2749 from jet47:fix-bug-3678

parents f13e05cd f1e44fa5
...@@ -850,7 +850,7 @@ CUDA_TEST_P(Integral, Accuracy) ...@@ -850,7 +850,7 @@ CUDA_TEST_P(Integral, Accuracy)
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine( INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(768, 1066)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -439,8 +439,6 @@ namespace integral_detail ...@@ -439,8 +439,6 @@ namespace integral_detail
T sum = (tidx < cols) && (y < rows) ? *p : 0; T sum = (tidx < cols) && (y < rows) ? *p : 0;
y += blockDim.y;
sums[threadIdx.x][threadIdx.y] = sum; sums[threadIdx.x][threadIdx.y] = sum;
__syncthreads(); __syncthreads();
...@@ -467,14 +465,17 @@ namespace integral_detail ...@@ -467,14 +465,17 @@ namespace integral_detail
if (threadIdx.y > 0) if (threadIdx.y > 0)
sum += sums[threadIdx.x][threadIdx.y - 1]; sum += sums[threadIdx.x][threadIdx.y - 1];
if (tidx < cols)
{
sum += stepSum; sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1]; stepSum += sums[threadIdx.x][blockDim.y - 1];
__syncthreads();
if ((tidx < cols) && (y < rows))
{
*p = sum; *p = sum;
} }
__syncthreads(); y += blockDim.y;
} }
#else #else
__shared__ T smem[32][32]; __shared__ T smem[32][32];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment