Commit 0771fd82 authored by cuda-geek's avatar cuda-geek Committed by OpenCV Buildbot

Merge pull request #557 from jet47:gpu-sanity

parents 2be893a2 c9868fc0
This diff is collapsed.
This diff is collapsed.
...@@ -3,8 +3,7 @@ ...@@ -3,8 +3,7 @@
using namespace std; using namespace std;
using namespace testing; using namespace testing;
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::szXGA, perf::sz720p, perf::sz1080p) #define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// BilateralFilter // BilateralFilter
...@@ -12,96 +11,86 @@ using namespace testing; ...@@ -12,96 +11,86 @@ using namespace testing;
DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int); DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter, PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter,
Combine(GPU_DENOISING_IMAGE_SIZES, Values(CV_8U, CV_32F), GPU_CHANNELS_1_3, Values(3, 5, 9))) Combine(GPU_DENOISING_IMAGE_SIZES,
Values(CV_8U, CV_32F),
GPU_CHANNELS_1_3,
Values(3, 5, 9)))
{ {
declare.time(60.0); declare.time(60.0);
cv::Size size = GET_PARAM(0); const cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1); const int depth = GET_PARAM(1);
int channels = GET_PARAM(2); const int channels = GET_PARAM(2);
int kernel_size = GET_PARAM(3); const int kernel_size = GET_PARAM(3);
float sigma_color = 7; const float sigma_color = 7;
float sigma_spatial = 5; const float sigma_spatial = 5;
int borderMode = cv::BORDER_REFLECT101; const int borderMode = cv::BORDER_REFLECT101;
int type = CV_MAKE_TYPE(depth, channels); const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type); cv::Mat src(size, type);
fillRandom(src); declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src); const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst; cv::gpu::GpuMat dst;
cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
TEST_CYCLE() TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
{
cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
}
GPU_SANITY_CHECK(d_dst); GPU_SANITY_CHECK(dst);
} }
else else
{ {
cv::Mat dst; cv::Mat dst;
cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode); TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
TEST_CYCLE()
{
cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
}
CPU_SANITY_CHECK(dst); CPU_SANITY_CHECK(dst);
} }
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// nonLocalMeans // nonLocalMeans
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int); DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans, PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(5, 7))) Combine(GPU_DENOISING_IMAGE_SIZES,
Values<MatDepth>(CV_8U),
GPU_CHANNELS_1_3,
Values(21),
Values(5)))
{ {
declare.time(60.0); declare.time(60.0);
cv::Size size = GET_PARAM(0); const cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1); const int depth = GET_PARAM(1);
int channels = GET_PARAM(2); const int channels = GET_PARAM(2);
const int search_widow_size = GET_PARAM(3);
int search_widow_size = GET_PARAM(3); const int block_size = GET_PARAM(4);
int block_size = GET_PARAM(4);
float h = 10; const float h = 10;
int borderMode = cv::BORDER_REFLECT101; const int borderMode = cv::BORDER_REFLECT101;
int type = CV_MAKE_TYPE(depth, channels); const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type); cv::Mat src(size, type);
fillRandom(src); declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src); const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst; cv::gpu::GpuMat dst;
cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode); TEST_CYCLE() cv::gpu::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode);
TEST_CYCLE() GPU_SANITY_CHECK(dst);
{
cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
}
GPU_SANITY_CHECK(d_dst);
} }
else else
{ {
FAIL() << "No such CPU implementation analogy"; FAIL_NO_CPU();
} }
} }
...@@ -112,46 +101,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans, ...@@ -112,46 +101,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int); DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans, PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(7))) Combine(GPU_DENOISING_IMAGE_SIZES,
Values<MatDepth>(CV_8U),
GPU_CHANNELS_1_3,
Values(21),
Values(7)))
{ {
declare.time(150.0); declare.time(60.0);
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int search_widow_size = GET_PARAM(2); const cv::Size size = GET_PARAM(0);
int block_size = GET_PARAM(3); const int depth = GET_PARAM(1);
const int search_widow_size = GET_PARAM(2);
const int block_size = GET_PARAM(3);
float h = 10; const float h = 10;
int type = CV_MAKE_TYPE(depth, 1); const int type = CV_MAKE_TYPE(depth, 1);
cv::Mat src(size, type); cv::Mat src(size, type);
fillRandom(src); declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::FastNonLocalMeansDenoising fnlmd; cv::gpu::FastNonLocalMeansDenoising fnlmd;
fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size); const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
{
fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
}
GPU_SANITY_CHECK(d_dst); GPU_SANITY_CHECK(dst);
} }
else else
{ {
cv::Mat dst; cv::Mat dst;
cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
TEST_CYCLE() TEST_CYCLE() cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
{
cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
}
CPU_SANITY_CHECK(dst); CPU_SANITY_CHECK(dst);
} }
...@@ -163,47 +147,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans, ...@@ -163,47 +147,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int); DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int);
PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored, PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), Values(21), Values(7))) Combine(GPU_DENOISING_IMAGE_SIZES,
Values<MatDepth>(CV_8U),
Values(21),
Values(7)))
{ {
declare.time(350.0); declare.time(60.0);
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int search_widow_size = GET_PARAM(2); const cv::Size size = GET_PARAM(0);
int block_size = GET_PARAM(3); const int depth = GET_PARAM(1);
const int search_widow_size = GET_PARAM(2);
const int block_size = GET_PARAM(3);
float h = 10; const float h = 10;
int type = CV_MAKE_TYPE(depth, 3); const int type = CV_MAKE_TYPE(depth, 3);
cv::Mat src(size, type); cv::Mat src(size, type);
fillRandom(src); declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::FastNonLocalMeansDenoising fnlmd; cv::gpu::FastNonLocalMeansDenoising fnlmd;
fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size); const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
{
fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
}
GPU_SANITY_CHECK(d_dst); GPU_SANITY_CHECK(dst);
} }
else else
{ {
cv::Mat dst; cv::Mat dst;
cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
TEST_CYCLE() TEST_CYCLE() cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
{
cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
}
CPU_SANITY_CHECK(dst); CPU_SANITY_CHECK(dst);
} }
} }
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
using namespace std; using namespace std;
using namespace testing; using namespace testing;
namespace {
DEF_PARAM_TEST_1(Image, string); DEF_PARAM_TEST_1(Image, string);
struct GreedyLabeling struct GreedyLabeling
...@@ -100,28 +98,45 @@ struct GreedyLabeling ...@@ -100,28 +98,45 @@ struct GreedyLabeling
dot* stack; dot* stack;
}; };
PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/aloe-disp.png")) PERF_TEST_P(Image, Labeling_ConnectivityMask,
Values<string>("gpu/labeling/aloe-disp.png"))
{ {
declare.time(1.0); declare.time(1.0);
cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE); const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_image(image);
cv::gpu::GpuMat mask; cv::gpu::GpuMat mask;
mask.create(image.rows, image.cols, CV_8UC1);
cv::gpu::GpuMat components; TEST_CYCLE() cv::gpu::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2));
components.create(image.rows, image.cols, CV_32SC1);
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2)); GPU_SANITY_CHECK(mask);
}
else
{
FAIL_NO_CPU();
}
}
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components)); PERF_TEST_P(Image, Labeling_ConnectedComponents,
Values<string>("gpu/labeling/aloe-disp.png"))
{
declare.time(1.0);
TEST_CYCLE() const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
{ ASSERT_FALSE(image.empty());
cv::gpu::labelComponents(mask, components);
} if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_mask;
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2));
cv::gpu::GpuMat components;
TEST_CYCLE() cv::gpu::labelComponents(d_mask, components);
GPU_SANITY_CHECK(components); GPU_SANITY_CHECK(components);
} }
...@@ -129,17 +144,9 @@ PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/al ...@@ -129,17 +144,9 @@ PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/al
{ {
GreedyLabeling host(image); GreedyLabeling host(image);
host(host._labels); TEST_CYCLE() host(host._labels);
declare.time(1.0); cv::Mat components = host._labels;
CPU_SANITY_CHECK(components);
TEST_CYCLE()
{
host(host._labels);
}
CPU_SANITY_CHECK(host._labels);
} }
} }
} // namespace
#include "perf_precomp.hpp" #include "perf_precomp.hpp"
namespace{
static void printOsInfo() static void printOsInfo()
{ {
#if defined _WIN32 #if defined _WIN32
...@@ -69,6 +67,4 @@ static void printCudaInfo() ...@@ -69,6 +67,4 @@ static void printCudaInfo()
#endif #endif
} }
} CV_PERF_TEST_MAIN(gpu, printCudaInfo())
CV_PERF_TEST_MAIN(gpu, printCudaInfo())
\ No newline at end of file
...@@ -3,137 +3,112 @@ ...@@ -3,137 +3,112 @@
using namespace std; using namespace std;
using namespace testing; using namespace testing;
namespace {
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// SetTo // SetTo
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4)) PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
GPU_CHANNELS_1_3_4))
{ {
cv::Size size = GET_PARAM(0); const cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1); const int depth = GET_PARAM(1);
int channels = GET_PARAM(2); const int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels); const int type = CV_MAKE_TYPE(depth, channels);
cv::Scalar val(1, 2, 3, 4); const cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(size, type); cv::gpu::GpuMat dst(size, type);
d_src.setTo(val);
TEST_CYCLE() TEST_CYCLE() dst.setTo(val);
{
d_src.setTo(val);
}
GPU_SANITY_CHECK(d_src); GPU_SANITY_CHECK(dst);
} }
else else
{ {
cv::Mat src(size, type); cv::Mat dst(size, type);
src.setTo(val); TEST_CYCLE() dst.setTo(val);
TEST_CYCLE() CPU_SANITY_CHECK(dst);
{
src.setTo(val);
}
CPU_SANITY_CHECK(src);
} }
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// SetToMasked // SetToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4)) PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
GPU_CHANNELS_1_3_4))
{ {
cv::Size size = GET_PARAM(0); const cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1); const int depth = GET_PARAM(1);
int channels = GET_PARAM(2); const int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels); const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type); cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1); cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2); declare.in(src, mask, WARMUP_RNG);
cv::Scalar val(1, 2, 3, 4); const cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst(src);
cv::gpu::GpuMat d_mask(mask); const cv::gpu::GpuMat d_mask(mask);
d_src.setTo(val, d_mask);
TEST_CYCLE() TEST_CYCLE() dst.setTo(val, d_mask);
{
d_src.setTo(val, d_mask);
}
GPU_SANITY_CHECK(d_src); GPU_SANITY_CHECK(dst);
} }
else else
{ {
src.setTo(val, mask); cv::Mat dst = src;
TEST_CYCLE() TEST_CYCLE() dst.setTo(val, mask);
{
src.setTo(val, mask);
}
CPU_SANITY_CHECK(src); CPU_SANITY_CHECK(dst);
} }
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// CopyToMasked // CopyToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4)) PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
GPU_CHANNELS_1_3_4))
{ {
cv::Size size = GET_PARAM(0); const cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1); const int depth = GET_PARAM(1);
int channels = GET_PARAM(2); const int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels); const int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type); cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1); cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2); declare.in(src, mask, WARMUP_RNG);
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src); const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask); const cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat d_dst; cv::gpu::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0));
d_src.copyTo(d_dst, d_mask); TEST_CYCLE() d_src.copyTo(dst, d_mask);
TEST_CYCLE() GPU_SANITY_CHECK(dst);
{
d_src.copyTo(d_dst, d_mask);
}
GPU_SANITY_CHECK(d_dst);
} }
else else
{ {
cv::Mat dst; cv::Mat dst(src.size(), src.type(), cv::Scalar::all(0));
src.copyTo(dst, mask); TEST_CYCLE() src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
CPU_SANITY_CHECK(dst); CPU_SANITY_CHECK(dst);
} }
...@@ -144,42 +119,36 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Valu ...@@ -144,42 +119,36 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Valu
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth); DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F))) PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
Values(CV_8U, CV_16U, CV_32F, CV_64F)))
{ {
cv::Size size = GET_PARAM(0); const cv::Size size = GET_PARAM(0);
int depth1 = GET_PARAM(1); const int depth1 = GET_PARAM(1);
int depth2 = GET_PARAM(2); const int depth2 = GET_PARAM(2);
cv::Mat src(size, depth1); cv::Mat src(size, depth1);
fillRandom(src); declare.in(src, WARMUP_RNG);
const double a = 0.5;
const double b = 1.0;
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_src(src); const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst; cv::gpu::GpuMat dst;
d_src.convertTo(d_dst, depth2, 0.5, 1.0); TEST_CYCLE() d_src.convertTo(dst, depth2, a, b);
TEST_CYCLE() GPU_SANITY_CHECK(dst);
{
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
}
GPU_SANITY_CHECK(d_dst);
} }
else else
{ {
cv::Mat dst; cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0); TEST_CYCLE() src.convertTo(dst, depth2, a, b);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
CPU_SANITY_CHECK(dst); CPU_SANITY_CHECK(dst);
} }
} }
} // namespace
...@@ -3,90 +3,47 @@ ...@@ -3,90 +3,47 @@
using namespace std; using namespace std;
using namespace testing; using namespace testing;
namespace {
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// HOG // HOG
DEF_PARAM_TEST_1(Image, string); DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png")) PERF_TEST_P(Image, ObjDetect_HOG,
Values<string>("gpu/hog/road.png",
"gpu/caltech/image_00000009_0.png",
"gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png",
"gpu/caltech/image_00000261_0.png",
"gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png",
"gpu/caltech/image_00000574_0.png"))
{ {
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE); const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty()); ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
{ {
cv::gpu::GpuMat d_img(img); const cv::gpu::GpuMat d_img(img);
std::vector<cv::Rect> gpu_found_locations;
cv::gpu::HOGDescriptor d_hog; cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations); TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
TEST_CYCLE() SANITY_CHECK(gpu_found_locations);
{
d_hog.detectMultiScale(d_img, found_locations);
}
} }
else else
{ {
cv::HOGDescriptor hog; std::vector<cv::Rect> cpu_found_locations;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
SANITY_CHECK(found_locations);
}
//===========test for CalTech data =============//
DEF_PARAM_TEST_1(HOG, string);
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog; cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations); TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
TEST_CYCLE() SANITY_CHECK(cpu_found_locations);
{
hog.detectMultiScale(img, found_locations);
}
} }
SANITY_CHECK(found_locations);
} }
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
...@@ -96,9 +53,9 @@ typedef pair<string, string> pair_string; ...@@ -96,9 +53,9 @@ typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string); DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier, PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml"))) Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{ {
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty()); ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
...@@ -106,33 +63,28 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier, ...@@ -106,33 +63,28 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
cv::gpu::CascadeClassifier_GPU d_cascade; cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second))); ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img); const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_objects_buffer; cv::gpu::GpuMat objects_buffer;
int detections_num = 0;
d_cascade.detectMultiScale(d_img, d_objects_buffer); TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
TEST_CYCLE() std::vector<cv::Rect> gpu_rects(detections_num);
{ cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
d_cascade.detectMultiScale(d_img, d_objects_buffer); objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
} cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects);
GPU_SANITY_CHECK(d_objects_buffer);
} }
else else
{ {
cv::CascadeClassifier cascade; cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml"))); ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> rects; std::vector<cv::Rect> cpu_rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE() TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects); SANITY_CHECK(cpu_rects);
} }
} }
...@@ -140,9 +92,9 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier, ...@@ -140,9 +92,9 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
// LBP cascade // LBP cascade
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier, PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml"))) Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{ {
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty()); ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU()) if (PERF_RUN_GPU())
...@@ -150,34 +102,27 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier, ...@@ -150,34 +102,27 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
cv::gpu::CascadeClassifier_GPU d_cascade; cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second))); ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img); const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_gpu_rects; cv::gpu::GpuMat objects_buffer;
int detections_num = 0;
d_cascade.detectMultiScale(d_img, d_gpu_rects); TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
TEST_CYCLE() std::vector<cv::Rect> gpu_rects(detections_num);
{ cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
d_cascade.detectMultiScale(d_img, d_gpu_rects); objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
} cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects);
GPU_SANITY_CHECK(d_gpu_rects);
} }
else else
{ {
cv::CascadeClassifier cascade; cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml"))); ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects; std::vector<cv::Rect> cpu_rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE() TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects); SANITY_CHECK(cpu_rects);
} }
} }
} // namespace
\ No newline at end of file
This diff is collapsed.
...@@ -2,13 +2,6 @@ ...@@ -2,13 +2,6 @@
using namespace std; using namespace std;
using namespace cv; using namespace cv;
using namespace cv::gpu;
void fillRandom(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
}
Mat readImage(const string& fileName, int flags) Mat readImage(const string& fileName, int flags)
{ {
...@@ -188,4 +181,4 @@ void PrintTo(const CvtColorInfo& info, ostream* os) ...@@ -188,4 +181,4 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
}; };
*os << str[info.code]; *os << str[info.code];
} }
\ No newline at end of file
...@@ -2,11 +2,9 @@ ...@@ -2,11 +2,9 @@
#define __OPENCV_PERF_GPU_UTILITY_HPP__ #define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core/core.hpp" #include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/imgproc/imgproc.hpp" #include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp" #include "opencv2/ts/ts_perf.hpp"
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR); cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType; using perf::MatType;
...@@ -17,12 +15,13 @@ CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONS ...@@ -17,12 +15,13 @@ CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONS
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA) CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all()) #define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX) CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4; enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA) CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA) #define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR) #define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
struct CvtColorInfo struct CvtColorInfo
{ {
...@@ -30,7 +29,8 @@ struct CvtColorInfo ...@@ -30,7 +29,8 @@ struct CvtColorInfo
int dcn; int dcn;
int code; int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {} CvtColorInfo() {}
explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {}
}; };
void PrintTo(const CvtColorInfo& info, std::ostream* os); void PrintTo(const CvtColorInfo& info, std::ostream* os);
...@@ -46,39 +46,18 @@ DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); ...@@ -46,39 +46,18 @@ DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p) #define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
#define GPU_SANITY_CHECK(dmat, ...) \ #define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
do{ \
cv::Mat d##dmat(dmat); \
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
} while(0)
#define CPU_SANITY_CHECK(cmat, ...) \ #define GPU_SANITY_CHECK(mat, ...) \
do{ \ do{ \
SANITY_CHECK(cmat, ## __VA_ARGS__); \ cv::Mat gpu_##mat(mat); \
SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
} while(0) } while(0)
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \ #define CPU_SANITY_CHECK(mat, ...) \
do{ \ do{ \
cv::Mat d##dmat(dmat); \ cv::Mat cpu_##mat(mat); \
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \ SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
} while(0) } while(0)
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__ #endif // __OPENCV_PERF_GPU_UTILITY_HPP__
...@@ -2284,15 +2284,18 @@ namespace arithm ...@@ -2284,15 +2284,18 @@ namespace arithm
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
......
...@@ -2280,11 +2280,11 @@ namespace ...@@ -2280,11 +2280,11 @@ namespace
{ {
typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream); typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream);
template <bit_scalar_func_t func> struct BitScalar template <typename T, bit_scalar_func_t func> struct BitScalar
{ {
static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream) static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
{ {
func(src, static_cast<unsigned int>(sc.val[0]), dst, stream); func(src, saturate_cast<T>(sc.val[0]), dst, stream);
} }
}; };
...@@ -2292,14 +2292,12 @@ namespace ...@@ -2292,14 +2292,12 @@ namespace
{ {
static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream) static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
{ {
Scalar_<unsigned int> isc = sc;
unsigned int packedVal = 0; unsigned int packedVal = 0;
packedVal |= (isc.val[0] & 0xffff); packedVal |= (saturate_cast<unsigned char>(sc.val[0]) & 0xffff);
packedVal |= (isc.val[1] & 0xffff) << 8; packedVal |= (saturate_cast<unsigned char>(sc.val[1]) & 0xffff) << 8;
packedVal |= (isc.val[2] & 0xffff) << 16; packedVal |= (saturate_cast<unsigned char>(sc.val[2]) & 0xffff) << 16;
packedVal |= (isc.val[3] & 0xffff) << 24; packedVal |= (saturate_cast<unsigned char>(sc.val[3]) & 0xffff) << 24;
func(src, packedVal, dst, stream); func(src, packedVal, dst, stream);
} }
...@@ -2330,7 +2328,7 @@ namespace ...@@ -2330,7 +2328,7 @@ namespace
oSizeROI.width = src.cols; oSizeROI.width = src.cols;
oSizeROI.height = src.rows; oSizeROI.height = src.rows;
const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])}; const npp_t pConstants[] = {saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3])};
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
...@@ -2350,7 +2348,7 @@ namespace ...@@ -2350,7 +2348,7 @@ namespace
oSizeROI.width = src.cols; oSizeROI.width = src.cols;
oSizeROI.height = src.rows; oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre ...@@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{BitScalar< bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call}, {BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
{0,0,0,0}, {0,0,0,0},
{BitScalar< bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call}, {BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
{BitScalar< bitScalarAnd<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call} {BitScalar<int, bitScalarAnd<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
}; };
const int depth = src.depth(); const int depth = src.depth();
...@@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea ...@@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{BitScalar< bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call}, {BitScalar<unsigned char, bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
{0,0,0,0}, {0,0,0,0},
{BitScalar< bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call}, {BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
{BitScalar< bitScalarOr<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call} {BitScalar<int, bitScalarOr<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
}; };
const int depth = src.depth(); const int depth = src.depth();
...@@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre ...@@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{BitScalar< bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call}, {BitScalar<unsigned char, bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
{0,0,0,0}, {0,0,0,0},
{BitScalar< bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call}, {BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
{BitScalar< bitScalarXor<unsigned int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call} {BitScalar<int, bitScalarXor<int> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
}; };
const int depth = src.depth(); const int depth = src.depth();
......
...@@ -104,12 +104,12 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal ...@@ -104,12 +104,12 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal
void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s) void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
{ {
if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
CV_Assert(!mask.empty() && mask.type() == CV_8U); CV_Assert(!mask.empty() && mask.type() == CV_8U);
if (mask.size() != components.size() || components.type() != CV_32SC1) if (!deviceSupports(SHARED_ATOMICS))
components.create(mask.size(), CV_32SC1); CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
components.create(mask.size(), CV_32SC1);
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
device::ccl::labelComponents(mask, components, flags, stream); device::ccl::labelComponents(mask, components, flags, stream);
......
...@@ -522,6 +522,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d ...@@ -522,6 +522,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type()); dst.create(dsize, src.type());
dst.setTo(Scalar::all(0));
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
} }
......
...@@ -382,6 +382,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, ...@@ -382,6 +382,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]); dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]); dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]); dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
dstcol[3] = 255;
} }
} }
} }
......
...@@ -209,6 +209,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI ...@@ -209,6 +209,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
uPyr_[0].setTo(Scalar::all(0));
vPyr_[0].setTo(Scalar::all(0));
uPyr_[1].setTo(Scalar::all(0)); uPyr_[1].setTo(Scalar::all(0));
vPyr_[1].setTo(Scalar::all(0)); vPyr_[1].setTo(Scalar::all(0));
......
This diff is collapsed.
...@@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel ...@@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
src = randomMat(size, CV_MAKE_TYPE(depth, channels)); src = randomMat(size, CV_MAKE_TYPE(depth, channels));
cv::Scalar_<int> ival = randomScalar(0.0, 255.0); cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
val = ival; val = ival;
} }
}; };
......
...@@ -252,6 +252,8 @@ PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolat ...@@ -252,6 +252,8 @@ PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolat
GPU_TEST_P(WarpAffineNPP, Accuracy) GPU_TEST_P(WarpAffineNPP, Accuracy)
{ {
cv::Mat src = readImageType("stereobp/aloe-L.png", type); cv::Mat src = readImageType("stereobp/aloe-L.png", type);
ASSERT_FALSE(src.empty());
cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4); cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
int flags = interpolation; int flags = interpolation;
if (inverse) if (inverse)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment