Commit dfaa8af6 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed #1279

parent ed801d3e
...@@ -566,9 +566,6 @@ namespace cv { namespace gpu { namespace surf ...@@ -566,9 +566,6 @@ namespace cv { namespace gpu { namespace surf
float* s_sum_row = s_sum + threadIdx.y * 32; float* s_sum_row = s_sum + threadIdx.y * 32;
//reduceSum32(s_sum_row, sumx);
//reduceSum32(s_sum_row, sumy);
warpReduce32(s_sum_row, sumx, threadIdx.x, plus<volatile float>()); warpReduce32(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
warpReduce32(s_sum_row, sumy, threadIdx.x, plus<volatile float>()); warpReduce32(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
......
...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device ...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device
} }
// warp-synchronous 32 elements reduction // warp-synchronous 32 elements reduction
template <typename T, typename Op> __device__ __forceinline__ void warpReduce32(volatile T* data, volatile T& partial_reduction, int tid, Op op) template <typename T, typename Op> __device__ __forceinline__ void warpReduce32(volatile T* data, T& partial_reduction, int tid, Op op)
{ {
data[tid] = partial_reduction; data[tid] = partial_reduction;
...@@ -88,7 +88,7 @@ namespace cv { namespace gpu { namespace device ...@@ -88,7 +88,7 @@ namespace cv { namespace gpu { namespace device
} }
// warp-synchronous 16 elements reduction // warp-synchronous 16 elements reduction
template <typename T, typename Op> __device__ __forceinline__ void warpReduce16(volatile T* data, volatile T& partial_reduction, int tid, Op op) template <typename T, typename Op> __device__ __forceinline__ void warpReduce16(volatile T* data, T& partial_reduction, int tid, Op op)
{ {
data[tid] = partial_reduction; data[tid] = partial_reduction;
...@@ -102,7 +102,7 @@ namespace cv { namespace gpu { namespace device ...@@ -102,7 +102,7 @@ namespace cv { namespace gpu { namespace device
} }
// warp-synchronous reduction // warp-synchronous reduction
template <int n, typename T, typename Op> __device__ __forceinline__ void warpReduce(volatile T* data, volatile T& partial_reduction, int tid, Op op) template <int n, typename T, typename Op> __device__ __forceinline__ void warpReduce(volatile T* data, T& partial_reduction, int tid, Op op)
{ {
if (tid < n) if (tid < n)
data[tid] = partial_reduction; data[tid] = partial_reduction;
......
...@@ -109,9 +109,11 @@ int main(int argc, char** argv) ...@@ -109,9 +109,11 @@ int main(int argc, char** argv)
cvtest::TS::ptr()->init("gpu"); cvtest::TS::ptr()->init("gpu");
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
//cv::CommandLineParser parser(argc, (const char**)argv); const char* keys ="{ nvtest_output_level | nvtest_output_level | none | NVidia test verbosity level }";
std::string outputLevel = "none";//parser.get<std::string>("nvtest_output_level", "none"); cv::CommandLineParser parser(argc, (const char**)argv, keys);
std::string outputLevel = parser.get<std::string>("nvtest_output_level", "none");
if (outputLevel == "none") if (outputLevel == "none")
nvidiaTestOutputLevel = OutputLevelNone; nvidiaTestOutputLevel = OutputLevelNone;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment