Commit 7928cec6 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added linesAccumGlobal kernel

parent 7ae94c57
...@@ -112,6 +112,8 @@ namespace cv { namespace gpu ...@@ -112,6 +112,8 @@ namespace cv { namespace gpu
int multiProcessorCount() const { return multi_processor_count_; } int multiProcessorCount() const { return multi_processor_count_; }
size_t sharedMemPerBlock() const { return sharedMemPerBlock_; }
size_t freeMemory() const; size_t freeMemory() const;
size_t totalMemory() const; size_t totalMemory() const;
...@@ -133,6 +135,7 @@ namespace cv { namespace gpu ...@@ -133,6 +135,7 @@ namespace cv { namespace gpu
int multi_processor_count_; int multi_processor_count_;
int majorVersion_; int majorVersion_;
int minorVersion_; int minorVersion_;
size_t sharedMemPerBlock_;
}; };
CV_EXPORTS void printCudaDeviceInfo(int device); CV_EXPORTS void printCudaDeviceInfo(int device);
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/core/gpumat.hpp" #include "opencv2/core/gpumat.hpp"
#include <iostream> #include <iostream>
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
...@@ -301,6 +300,7 @@ void cv::gpu::DeviceInfo::query() ...@@ -301,6 +300,7 @@ void cv::gpu::DeviceInfo::query()
multi_processor_count_ = prop.multiProcessorCount; multi_processor_count_ = prop.multiProcessorCount;
majorVersion_ = prop.major; majorVersion_ = prop.major;
minorVersion_ = prop.minor; minorVersion_ = prop.minor;
sharedMemPerBlock_ = prop.sharedMemPerBlock;
} }
void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const
......
This diff is collapsed.
...@@ -56,9 +56,9 @@ namespace cv { namespace gpu { namespace device ...@@ -56,9 +56,9 @@ namespace cv { namespace gpu { namespace device
{ {
namespace hough namespace hough
{ {
unsigned int buildPointList_gpu(DevMem2Db src, unsigned int* list); int buildPointList_gpu(DevMem2Db src, unsigned int* list);
void linesAccum_gpu(const unsigned int* list, unsigned int count, DevMem2D_<unsigned int> accum, float rho, float theta); void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock);
unsigned int linesGetResult_gpu(DevMem2D_<uint> accum, float2* out, int* voices, unsigned int maxSize, float rho, float theta, float threshold, bool doSort); int linesGetResult_gpu(DevMem2Di accum, float2* out, int* voices, int maxSize, float rho, float theta, float threshold, bool doSort);
} }
}}} }}}
...@@ -71,16 +71,21 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, ...@@ -71,16 +71,21 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf,
CV_Assert(src.rows < std::numeric_limits<unsigned short>::max()); CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf); ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf);
unsigned int count = buildPointList_gpu(src, buf.ptr<unsigned int>());
const int count = buildPointList_gpu(src, buf.ptr<unsigned int>());
const int numangle = cvRound(CV_PI / theta); const int numangle = cvRound(CV_PI / theta);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
CV_Assert(numangle > 0 && numrho > 0);
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum); ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum);
accum.setTo(cv::Scalar::all(0)); accum.setTo(cv::Scalar::all(0));
cv::gpu::DeviceInfo devInfo;
if (count > 0) if (count > 0)
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta); linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta, devInfo.sharedMemPerBlock());
} }
void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
...@@ -90,7 +95,8 @@ void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float ...@@ -90,7 +95,8 @@ void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float
CV_Assert(accum.type() == CV_32SC1); CV_Assert(accum.type() == CV_32SC1);
ensureSizeIsEnough(2, maxLines, CV_32FC2, lines); ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
unsigned int count = hough::linesGetResult_gpu(accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
int count = hough::linesGetResult_gpu(accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
if (count > 0) if (count > 0)
lines.cols = count; lines.cols = count;
......
...@@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device ...@@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device
} }
template<typename T> template<typename T>
static __device__ __forceinline__ void atomicAdd(T* address, T val) static __device__ __forceinline__ T atomicAdd(T* address, T val)
{ {
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count; T count;
...@@ -110,8 +110,10 @@ namespace cv { namespace gpu { namespace device ...@@ -110,8 +110,10 @@ namespace cv { namespace gpu { namespace device
count = tag | (count + val); count = tag | (count + val);
*address = count; *address = count;
} while (*address != count); } while (*address != count);
return (count & TAG_MASK) - val;
#else #else
::atomicAdd(address, val); return ::atomicAdd(address, val);
#endif #endif
} }
...@@ -134,4 +136,4 @@ namespace cv { namespace gpu { namespace device ...@@ -134,4 +136,4 @@ namespace cv { namespace gpu { namespace device
}; };
}}} // namespace cv { namespace gpu { namespace device }}} // namespace cv { namespace gpu { namespace device
#endif /* OPENCV_GPU_EMULATION_HPP_ */ #endif /* OPENCV_GPU_EMULATION_HPP_ */
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment