Commit 90ae1e3a authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

refactored gpu module

parent 8503f752
......@@ -69,22 +69,33 @@ Returns true, if the specified GPU has atomics support, otherwise false.
\end{description}
\cvCppFunc{gpu::checkPtxVersion}
\cvCppFunc{gpu::ptxVersionIs}
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
\cvdefCpp{template $<$unsigned int cmp\_op$>$\newline
bool checkPtxVersion(int major, int minor);}
\cvdefCpp{bool ptxVersionIs(int major, int minor);}
\begin{description}
\cvarg{cmp\_op}{Comparison operation:
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::ptxVersionIsLessOrEqual}
Returns true, if the GPU module was built with PTX support of the given compute capability or less, otherwise false.
\cvdefCpp{bool ptxVersionIsLessOrEqual(int major, int minor);}
\begin{description}
\cvarg{CMP\_EQ}{Return true, if at least one of GPU module PTX versions matches the given one, otherwise false}
\cvarg{CMP\_LT}{Return true, if at least one of GPU module PTX versions is less than the given one, otherwise false}
\cvarg{CMP\_LE}{Return true, if at least one of GPU module PTX versions is less or equal to the given one, otherwise false}
\cvarg{CMP\_GT}{Return true, if at least one of GPU module PTX versions is greater than the given one, otherwise false}
\cvarg{CMP\_GE}{Return true, if at least one of GPU module PTX versions is greater or equal to the given one, otherwise false}
\end{description}}
\cvarg{major}{Major CC version.}
\cvarg{minor}{Minor CC version.}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::ptxVersionIsGreaterOrEqual}
Returns true, if the GPU module was built with PTX support of the given compute capability or greater, otherwise false.
\cvdefCpp{bool ptxVersionIsGreaterOrEqual(int major, int minor);}
\begin{description}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
......
......@@ -72,8 +72,9 @@ namespace cv
CV_EXPORTS bool hasNativeDoubleSupport(int device);
CV_EXPORTS bool hasAtomicsSupport(int device);
template <unsigned int cmp_op>
CV_EXPORTS bool checkPtxVersion(int major, int minor);
CV_EXPORTS bool ptxVersionIs(int major, int minor);
CV_EXPORTS bool ptxVersionIsLessOrEqual(int major, int minor);
CV_EXPORTS bool ptxVersionIsGreaterOrEqual(int major, int minor);
//! Checks if the GPU module is PTX compatible with the given NVIDIA device
CV_EXPORTS bool isCompatibleWith(int device);
......
......@@ -719,7 +719,7 @@ namespace cv { namespace gpu { namespace imgproc
////////////////////////////// Column Sum //////////////////////////////////////
__global__ void column_sum_kernel_32F(int cols, int rows, const PtrStep src, const PtrStep dst)
__global__ void column_sumKernel_32F(int cols, int rows, const PtrStep src, const PtrStep dst)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -745,7 +745,7 @@ namespace cv { namespace gpu { namespace imgproc
dim3 threads(256);
dim3 grid(divUp(src.cols, threads.x));
column_sum_kernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
cudaSafeCall(cudaThreadSynchronize());
}
......
This diff is collapsed.
......@@ -133,85 +133,81 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
namespace
{
template <unsigned int cmp_op>
bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2);
template <>
bool comparePairs<CMP_EQ>(int lhs1, int lhs2, int rhs1, int rhs2)
struct ComparerEqual
{
return lhs1 == rhs1 && lhs2 == rhs2;
}
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
{
return lhs1 == rhs1 && lhs2 == rhs2;
}
};
template <>
bool comparePairs<CMP_GT>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 > rhs2);
}
template <>
bool comparePairs<CMP_GE>(int lhs1, int lhs2, int rhs1, int rhs2)
struct ComparerLessOrEqual
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
}
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
};
template <>
bool comparePairs<CMP_LT>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 < rhs2);
}
template <>
bool comparePairs<CMP_LE>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
template <>
bool comparePairs<CMP_NE>(int lhs1, int lhs2, int rhs1, int rhs2)
struct ComparerGreaterOrEqual
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
}
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
}
};
template <unsigned int cmp_op>
CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor)
{
template <typename Comparer>
bool checkPtxVersion(int major, int minor, Comparer cmp)
{
#ifdef OPENCV_GPU_CUDA_ARCH_10
if (comparePairs<cmp_op>(1, 0, major, minor)) return true;
if (cmp(1, 0, major, minor)) return true;
#endif
#ifdef OPENCV_GPU_CUDA_ARCH_11
if (comparePairs<cmp_op>(1, 1, major, minor)) return true;
if (cmp(1, 1, major, minor)) return true;
#endif
#ifdef OPENCV_GPU_CUDA_ARCH_12
if (comparePairs<cmp_op>(1, 2, major, minor)) return true;
if (cmp(1, 2, major, minor)) return true;
#endif
#ifdef OPENCV_GPU_CUDA_ARCH_13
if (comparePairs<cmp_op>(1, 3, major, minor)) return true;
if (cmp(1, 3, major, minor)) return true;
#endif
#ifdef OPENCV_GPU_CUDA_ARCH_20
if (comparePairs<cmp_op>(2, 0, major, minor)) return true;
if (cmp(2, 0, major, minor)) return true;
#endif
#ifdef OPENCV_GPU_CUDA_ARCH_21
if (comparePairs<cmp_op>(2, 1, major, minor)) return true;
if (cmp(2, 1, major, minor)) return true;
#endif
return false;
return false;
}
}
CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerEqual());
}
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_EQ>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GT>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GE>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LT>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LE>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_NE>(int major, int minor);
CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerLessOrEqual());
}
CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerGreaterOrEqual());
}
CV_EXPORTS bool isCompatibleWith(int device)
......@@ -223,7 +219,7 @@ CV_EXPORTS bool isCompatibleWith(int device)
int major, minor;
getComputeCapability(device, major, minor);
return checkPtxVersion<CMP_LE>(major, minor);
return ptxVersionIsLessOrEqual(major, minor);
}
#endif
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment