Commit 90ae1e3a authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

refactored gpu module

parent 8503f752
...@@ -69,22 +69,33 @@ Returns true, if the specified GPU has atomics support, otherwise false. ...@@ -69,22 +69,33 @@ Returns true, if the specified GPU has atomics support, otherwise false.
\end{description} \end{description}
\cvCppFunc{gpu::checkPtxVersion} \cvCppFunc{gpu::ptxVersionIs}
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false. Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
\cvdefCpp{template $<$unsigned int cmp\_op$>$\newline \cvdefCpp{bool ptxVersionIs(int major, int minor);}
bool checkPtxVersion(int major, int minor);}
\begin{description} \begin{description}
\cvarg{cmp\_op}{Comparison operation: \cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::ptxVersionIsLessOrEqual}
Returns true, if the GPU module was built with PTX support of the given compute capability or less, otherwise false.
\cvdefCpp{bool ptxVersionIsLessOrEqual(int major, int minor);}
\begin{description} \begin{description}
\cvarg{CMP\_EQ}{Return true, if at least one of GPU module PTX versions matches the given one, otherwise false} \cvarg{major}{Major compute capability version.}
\cvarg{CMP\_LT}{Return true, if at least one of GPU module PTX versions is less than the given one, otherwise false} \cvarg{minor}{Minor compute capability version.}
\cvarg{CMP\_LE}{Return true, if at least one of GPU module PTX versions is less or equal to the given one, otherwise false} \end{description}
\cvarg{CMP\_GT}{Return true, if at least one of GPU module PTX versions is greater than the given one, otherwise false}
\cvarg{CMP\_GE}{Return true, if at least one of GPU module PTX versions is greater or equal to the given one, otherwise false}
\end{description}} \cvCppFunc{gpu::ptxVersionIsGreaterOrEqual}
\cvarg{major}{Major CC version.} Returns true, if the GPU module was built with PTX support of the given compute capability or greater, otherwise false.
\cvarg{minor}{Minor CC version.}
\cvdefCpp{bool ptxVersionIsGreaterOrEqual(int major, int minor);}
\begin{description}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description} \end{description}
......
...@@ -72,8 +72,9 @@ namespace cv ...@@ -72,8 +72,9 @@ namespace cv
CV_EXPORTS bool hasNativeDoubleSupport(int device); CV_EXPORTS bool hasNativeDoubleSupport(int device);
CV_EXPORTS bool hasAtomicsSupport(int device); CV_EXPORTS bool hasAtomicsSupport(int device);
template <unsigned int cmp_op> CV_EXPORTS bool ptxVersionIs(int major, int minor);
CV_EXPORTS bool checkPtxVersion(int major, int minor); CV_EXPORTS bool ptxVersionIsLessOrEqual(int major, int minor);
CV_EXPORTS bool ptxVersionIsGreaterOrEqual(int major, int minor);
//! Checks if the GPU module is PTX compatible with the given NVIDIA device //! Checks if the GPU module is PTX compatible with the given NVIDIA device
CV_EXPORTS bool isCompatibleWith(int device); CV_EXPORTS bool isCompatibleWith(int device);
......
...@@ -719,7 +719,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -719,7 +719,7 @@ namespace cv { namespace gpu { namespace imgproc
////////////////////////////// Column Sum ////////////////////////////////////// ////////////////////////////// Column Sum //////////////////////////////////////
__global__ void column_sum_kernel_32F(int cols, int rows, const PtrStep src, const PtrStep dst) __global__ void column_sumKernel_32F(int cols, int rows, const PtrStep src, const PtrStep dst)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -745,7 +745,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -745,7 +745,7 @@ namespace cv { namespace gpu { namespace imgproc
dim3 threads(256); dim3 threads(256);
dim3 grid(divUp(src.cols, threads.x)); dim3 grid(divUp(src.cols, threads.x));
column_sum_kernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst); column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
......
This diff is collapsed.
...@@ -133,85 +133,81 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device) ...@@ -133,85 +133,81 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
namespace namespace
{ {
template <unsigned int cmp_op> struct ComparerEqual
bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2);
template <>
bool comparePairs<CMP_EQ>(int lhs1, int lhs2, int rhs1, int rhs2)
{ {
return lhs1 == rhs1 && lhs2 == rhs2; bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
} {
return lhs1 == rhs1 && lhs2 == rhs2;
}
};
template <>
bool comparePairs<CMP_GT>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 > rhs2);
}
template <> struct ComparerLessOrEqual
bool comparePairs<CMP_GE>(int lhs1, int lhs2, int rhs1, int rhs2)
{ {
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2); bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
} {
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
};
template <>
bool comparePairs<CMP_LT>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 < rhs2);
}
struct ComparerGreaterOrEqual
template <>
bool comparePairs<CMP_LE>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
template <>
bool comparePairs<CMP_NE>(int lhs1, int lhs2, int rhs1, int rhs2)
{ {
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2); bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
} {
} return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
}
};
template <unsigned int cmp_op> template <typename Comparer>
CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor) bool checkPtxVersion(int major, int minor, Comparer cmp)
{ {
#ifdef OPENCV_GPU_CUDA_ARCH_10 #ifdef OPENCV_GPU_CUDA_ARCH_10
if (comparePairs<cmp_op>(1, 0, major, minor)) return true; if (cmp(1, 0, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_11 #ifdef OPENCV_GPU_CUDA_ARCH_11
if (comparePairs<cmp_op>(1, 1, major, minor)) return true; if (cmp(1, 1, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_12 #ifdef OPENCV_GPU_CUDA_ARCH_12
if (comparePairs<cmp_op>(1, 2, major, minor)) return true; if (cmp(1, 2, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_13 #ifdef OPENCV_GPU_CUDA_ARCH_13
if (comparePairs<cmp_op>(1, 3, major, minor)) return true; if (cmp(1, 3, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_20 #ifdef OPENCV_GPU_CUDA_ARCH_20
if (comparePairs<cmp_op>(2, 0, major, minor)) return true; if (cmp(2, 0, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_21 #ifdef OPENCV_GPU_CUDA_ARCH_21
if (comparePairs<cmp_op>(2, 1, major, minor)) return true; if (cmp(2, 1, major, minor)) return true;
#endif #endif
return false; return false;
}
}
CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerEqual());
} }
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_EQ>(int major, int minor); CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor)
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GT>(int major, int minor); {
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GE>(int major, int minor); return checkPtxVersion(major, minor, ComparerLessOrEqual());
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LT>(int major, int minor); }
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LE>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_NE>(int major, int minor);
CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerGreaterOrEqual());
}
CV_EXPORTS bool isCompatibleWith(int device) CV_EXPORTS bool isCompatibleWith(int device)
...@@ -223,7 +219,7 @@ CV_EXPORTS bool isCompatibleWith(int device) ...@@ -223,7 +219,7 @@ CV_EXPORTS bool isCompatibleWith(int device)
int major, minor; int major, minor;
getComputeCapability(device, major, minor); getComputeCapability(device, major, minor);
return checkPtxVersion<CMP_LE>(major, minor); return ptxVersionIsLessOrEqual(major, minor);
} }
#endif #endif
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment