Commit dd8e442b authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

replaced DeviceInfo().supports with deviceSupports

parent 1a1f4542
...@@ -1963,8 +1963,6 @@ private: ...@@ -1963,8 +1963,6 @@ private:
GpuMat uPyr_[2]; GpuMat uPyr_[2];
GpuMat vPyr_[2]; GpuMat vPyr_[2];
bool isDeviceArch11_;
}; };
...@@ -1981,7 +1979,6 @@ public: ...@@ -1981,7 +1979,6 @@ public:
polyN = 5; polyN = 5;
polySigma = 1.1; polySigma = 1.1;
flags = 0; flags = 0;
isDeviceArch11_ = !DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
} }
int numLevels; int numLevels;
...@@ -2029,8 +2026,6 @@ private: ...@@ -2029,8 +2026,6 @@ private:
GpuMat frames_[2]; GpuMat frames_[2];
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
std::vector<GpuMat> pyramid0_, pyramid1_; std::vector<GpuMat> pyramid0_, pyramid1_;
bool isDeviceArch11_;
}; };
......
...@@ -68,11 +68,16 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, ...@@ -68,11 +68,16 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool,
void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream) void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream)
{ {
#ifndef HAVE_CUBLAS #ifndef HAVE_CUBLAS
(void)src1; (void)src2; (void)alpha; (void)src3; (void)beta; (void)dst; (void)flags; (void)stream; (void)src1;
(void)src2;
(void)alpha;
(void)src3;
(void)beta;
(void)dst;
(void)flags;
(void)stream;
CV_Error(CV_StsNotImplemented, "The library was build without CUBLAS"); CV_Error(CV_StsNotImplemented, "The library was build without CUBLAS");
#else #else
// CUBLAS works with column-major matrices // CUBLAS works with column-major matrices
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2); CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2);
...@@ -80,7 +85,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G ...@@ -80,7 +85,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
if (src1.depth() == CV_64F) if (src1.depth() == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
...@@ -188,7 +193,6 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G ...@@ -188,7 +193,6 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
} }
cublasSafeCall( cublasDestroy_v2(handle) ); cublasSafeCall( cublasDestroy_v2(handle) );
#endif #endif
} }
...@@ -227,7 +231,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -227,7 +231,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
} }
else // if (src.elemSize() == 8) else // if (src.elemSize() == 8)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
NppStStreamHandler h(stream); NppStStreamHandler h(stream);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -198,9 +198,9 @@ namespace bgfg ...@@ -198,9 +198,9 @@ namespace bgfg
void calcDiffHistogram_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, void calcDiffHistogram_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame,
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
int cc, cudaStream_t stream) bool cc20, cudaStream_t stream)
{ {
const int HISTOGRAM_WARP_COUNT = cc < 20 ? 4 : 6; const int HISTOGRAM_WARP_COUNT = cc20 ? 6 : 4;
const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE; const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE;
calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>( calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>(
...@@ -214,10 +214,10 @@ namespace bgfg ...@@ -214,10 +214,10 @@ namespace bgfg
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream); template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
template void calcDiffHistogram_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream); template void calcDiffHistogram_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
template void calcDiffHistogram_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream); template void calcDiffHistogram_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
template void calcDiffHistogram_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream); template void calcDiffHistogram_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// calcDiffThreshMask // calcDiffThreshMask
......
...@@ -125,7 +125,7 @@ namespace bgfg ...@@ -125,7 +125,7 @@ namespace bgfg
void calcDiffHistogram_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, void calcDiffHistogram_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
int cc, cudaStream_t stream); bool cc20, cudaStream_t stream);
template <typename PT, typename CT> template <typename PT, typename CT>
void calcDiffThreshMask_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream); void calcDiffThreshMask_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream);
......
This diff is collapsed.
This diff is collapsed.
...@@ -125,9 +125,6 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma ...@@ -125,9 +125,6 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma
CV_Assert(img.type() == CV_8UC1); CV_Assert(img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area()); int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_); ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
...@@ -148,9 +145,6 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) ...@@ -148,9 +145,6 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{ {
using namespace cv::gpu::device::fast; using namespace cv::gpu::device::fast;
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
if (count_ == 0) if (count_ == 0)
return 0; return 0;
......
...@@ -336,7 +336,7 @@ namespace ...@@ -336,7 +336,7 @@ namespace
{ {
void calcDiffHistogram(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf) void calcDiffHistogram(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf)
{ {
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream); typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
static const func_t funcs[4][4] = static const func_t funcs[4][4] =
{ {
{0,0,0,0}, {0,0,0,0},
...@@ -348,14 +348,11 @@ namespace ...@@ -348,14 +348,11 @@ namespace
hist.create(3, 256, CV_32SC1); hist.create(3, 256, CV_32SC1);
histBuf.create(3, bgfg::PARTIAL_HISTOGRAM_COUNT * bgfg::HISTOGRAM_BIN_COUNT, CV_32SC1); histBuf.create(3, bgfg::PARTIAL_HISTOGRAM_COUNT * bgfg::HISTOGRAM_BIN_COUNT, CV_32SC1);
cv::gpu::DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
funcs[prevFrame.channels() - 1][curFrame.channels() - 1]( funcs[prevFrame.channels() - 1][curFrame.channels() - 1](
prevFrame, curFrame, prevFrame, curFrame,
hist.ptr<unsigned int>(0), hist.ptr<unsigned int>(1), hist.ptr<unsigned int>(2), hist.ptr<unsigned int>(0), hist.ptr<unsigned int>(1), hist.ptr<unsigned int>(2),
histBuf.ptr<unsigned int>(0), histBuf.ptr<unsigned int>(1), histBuf.ptr<unsigned int>(2), histBuf.ptr<unsigned int>(0), histBuf.ptr<unsigned int>(1), histBuf.ptr<unsigned int>(2),
cc, 0); cv::gpu::deviceSupports(cv::gpu::FEATURE_SET_COMPUTE_20), 0);
} }
void calcRelativeVariance(unsigned int hist[3 * 256], double relativeVariance[3][bgfg::HISTOGRAM_BIN_COUNT]) void calcRelativeVariance(unsigned int hist[3 * 256], double relativeVariance[3][bgfg::HISTOGRAM_BIN_COUNT])
......
...@@ -68,9 +68,6 @@ void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, ...@@ -68,9 +68,6 @@ void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image,
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
ensureSizeIsEnough(image.size(), CV_32F, eig_); ensureSizeIsEnough(image.size(), CV_32F, eig_);
if (useHarrisDetector) if (useHarrisDetector)
......
...@@ -343,9 +343,8 @@ void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& ...@@ -343,9 +343,8 @@ void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf&
ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles); ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
DeviceInfo devInfo;
const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr<float3>(), maxCircles, const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr<float3>(), maxCircles,
dp, minRadius, maxRadius, votesThreshold, devInfo.supports(FEATURE_SET_COMPUTE_20)); dp, minRadius, maxRadius, votesThreshold, deviceSupports(FEATURE_SET_COMPUTE_20));
if (circlesCount > 0) if (circlesCount > 0)
circles.cols = circlesCount; circles.cols = circlesCount;
......
...@@ -544,13 +544,12 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S ...@@ -544,13 +544,12 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
DeviceInfo info;
cv::Size whole; cv::Size whole;
cv::Point offset; cv::Point offset;
src.locateROI(whole, offset); src.locateROI(whole, offset);
if (info.supports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048 if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x)) && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
{ {
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer); ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
...@@ -1489,7 +1488,7 @@ void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_th ...@@ -1489,7 +1488,7 @@ void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_th
CV_Assert(src.type() == CV_8UC1); CV_Assert(src.type() == CV_8UC1);
if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS)) if (!deviceSupports(SHARED_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics"); CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics");
if( low_thresh > high_thresh ) if( low_thresh > high_thresh )
......
...@@ -118,7 +118,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat ...@@ -118,7 +118,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
{ {
CV_Assert(src.type() == CV_8UC1); CV_Assert(src.type() == CV_8UC1);
if (!TargetArchs::builtWith(FEATURE_SET_COMPUTE_13) || !DeviceInfo().supports(FEATURE_SET_COMPUTE_13)) if (!deviceSupports(FEATURE_SET_COMPUTE_13))
CV_Error(CV_StsNotImplemented, "Not sufficient compute capebility"); CV_Error(CV_StsNotImplemented, "Not sufficient compute capebility");
NppiSize sz; NppiSize sz;
...@@ -240,7 +240,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) ...@@ -240,7 +240,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
if (src.depth() == CV_64F) if (src.depth() == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
...@@ -277,6 +277,12 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) ...@@ -277,6 +277,12 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
{0, ::sum::runAbs<double, 1>, ::sum::runAbs<double, 2>, ::sum::runAbs<double, 3>, ::sum::runAbs<double, 4>} {0, ::sum::runAbs<double, 1>, ::sum::runAbs<double, 2>, ::sum::runAbs<double, 3>, ::sum::runAbs<double, 4>}
}; };
if (src.depth() == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
Size buf_size; Size buf_size;
::sum::getBufSize(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); ::sum::getBufSize(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -310,6 +316,12 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) ...@@ -310,6 +316,12 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
{0, ::sum::runSqr<double, 1>, ::sum::runSqr<double, 2>, ::sum::runSqr<double, 3>, ::sum::runSqr<double, 4>} {0, ::sum::runSqr<double, 1>, ::sum::runSqr<double, 2>, ::sum::runSqr<double, 3>, ::sum::runSqr<double, 4>}
}; };
if (src.depth() == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
Size buf_size; Size buf_size;
::sum::getBufSize(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); ::sum::getBufSize(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -359,7 +371,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -359,7 +371,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
if (src.depth() == CV_64F) if (src.depth() == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
...@@ -410,7 +422,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -410,7 +422,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
if (src.depth() == CV_64F) if (src.depth() == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
...@@ -461,7 +473,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ...@@ -461,7 +473,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
if (src.depth() == CV_64F) if (src.depth() == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
......
...@@ -172,7 +172,7 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_boxFilter( ...@@ -172,7 +172,7 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
{ {
if (!isDeviceArch11_) if (deviceSupports(FEATURE_SET_COMPUTE_12))
device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, S(streams[0])); device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, S(streams[0]));
else else
device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, S(streams[0])); device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, S(streams[0]));
...@@ -191,7 +191,7 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur( ...@@ -191,7 +191,7 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
{ {
if (!isDeviceArch11_) if (deviceSupports(FEATURE_SET_COMPUTE_12))
device::optflow_farneback::gaussianBlur5Gpu( device::optflow_farneback::gaussianBlur5Gpu(
M, blockSize/2, bufM, BORDER_REPLICATE_GPU, S(streams[0])); M, blockSize/2, bufM, BORDER_REPLICATE_GPU, S(streams[0]));
else else
......
...@@ -74,22 +74,21 @@ cv::gpu::PyrLKOpticalFlow::PyrLKOpticalFlow() ...@@ -74,22 +74,21 @@ cv::gpu::PyrLKOpticalFlow::PyrLKOpticalFlow()
maxLevel = 3; maxLevel = 3;
iters = 30; iters = 30;
useInitialFlow = false; useInitialFlow = false;
isDeviceArch11_ = !DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
} }
namespace namespace
{ {
void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch, bool isDeviceArch11) void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch)
{ {
if (winSize.width > 32 && winSize.width > 2 * winSize.height) if (winSize.width > 32 && winSize.width > 2 * winSize.height)
{ {
block.x = isDeviceArch11 ? 16 : 32; block.x = deviceSupports(FEATURE_SET_COMPUTE_12) ? 32 : 16;
block.y = 8; block.y = 8;
} }
else else
{ {
block.x = 16; block.x = 16;
block.y = isDeviceArch11 ? 8 : 16; block.y = deviceSupports(FEATURE_SET_COMPUTE_12) ? 16 : 8;
} }
patch.x = (winSize.width + block.x - 1) / block.x; patch.x = (winSize.width + block.x - 1) / block.x;
...@@ -110,7 +109,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next ...@@ -110,7 +109,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
} }
dim3 block, patch; dim3 block, patch;
calcPatchSize(winSize, block, patch, isDeviceArch11_); calcPatchSize(winSize, block, patch);
CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4); CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
......
...@@ -54,7 +54,7 @@ namespace cv { namespace gpu { namespace device ...@@ -54,7 +54,7 @@ namespace cv { namespace gpu { namespace device
{ {
template <typename T> template <typename T>
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc); int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
} }
}}} }}}
...@@ -63,7 +63,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp ...@@ -63,7 +63,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
using namespace cv::gpu::device::imgproc; using namespace cv::gpu::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc); int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
...@@ -91,15 +91,12 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp ...@@ -91,15 +91,12 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
Scalar_<float> borderValueFloat; Scalar_<float> borderValueFloat;
borderValueFloat = borderValue; borderValueFloat = borderValue;
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
Size wholeSize; Size wholeSize;
Point ofs; Point ofs;
src.locateROI(wholeSize, ofs); src.locateROI(wholeSize, ofs);
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap, func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc); dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
} }
#endif // HAVE_CUDA #endif // HAVE_CUDA
...@@ -78,7 +78,7 @@ namespace ...@@ -78,7 +78,7 @@ namespace
if (depth == CV_64F) if (depth == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
...@@ -122,7 +122,7 @@ namespace ...@@ -122,7 +122,7 @@ namespace
if (depth == CV_64F) if (depth == CV_64F)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
......
...@@ -121,9 +121,6 @@ namespace ...@@ -121,9 +121,6 @@ namespace
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
const int min_size = calcSize(surf_.nOctaves - 1, 0); const int min_size = calcSize(surf_.nOctaves - 1, 0);
CV_Assert(img_rows - min_size >= 0); CV_Assert(img_rows - min_size >= 0);
CV_Assert(img_cols - min_size >= 0); CV_Assert(img_cols - min_size >= 0);
......
...@@ -61,13 +61,13 @@ namespace cv { namespace gpu { namespace device ...@@ -61,13 +61,13 @@ namespace cv { namespace gpu { namespace device
template <typename T> template <typename T>
void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc); int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream); void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
template <typename T> template <typename T>
void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc); int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
} }
}}} }}}
...@@ -166,7 +166,7 @@ namespace ...@@ -166,7 +166,7 @@ namespace
cv::gpu::NppStreamHandler h(stream); cv::gpu::NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi, nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi,
coeffs, npp_inter[interpolation]) ); coeffs, npp_inter[interpolation]) );
if (stream == 0) if (stream == 0)
...@@ -277,7 +277,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz ...@@ -277,7 +277,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
using namespace cv::gpu::device::imgproc; using namespace cv::gpu::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc); int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
...@@ -310,11 +310,8 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz ...@@ -310,11 +310,8 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
Scalar_<float> borderValueFloat; Scalar_<float> borderValueFloat;
borderValueFloat = borderValue; borderValueFloat = borderValue;
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs, func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc); dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
} }
} }
...@@ -420,7 +417,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size ...@@ -420,7 +417,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
using namespace cv::gpu::device::imgproc; using namespace cv::gpu::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc); int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
...@@ -453,11 +450,8 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size ...@@ -453,11 +450,8 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
Scalar_<float> borderValueFloat; Scalar_<float> borderValueFloat;
borderValueFloat = borderValue; borderValueFloat = borderValue;
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs, func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc); dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment