Commit 2582464e authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed several problems with CUDA 5.0

* gpu::LUT, uses device memory instead of host memory
* gpu::multiply, round mod for CV_8U depth
parent 71625ad4
...@@ -320,12 +320,23 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) ...@@ -320,12 +320,23 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
const Npp32s* pLevels3[3]; const Npp32s* pLevels3[3];
int nValues3[3]; int nValues3[3];
#if (CUDA_VERSION > 4020)
GpuMat d_pLevels;
#endif
LevelsInit() LevelsInit()
{ {
nValues3[0] = nValues3[1] = nValues3[2] = 256; nValues3[0] = nValues3[1] = nValues3[2] = 256;
for (int i = 0; i < 256; ++i) for (int i = 0; i < 256; ++i)
pLevels[i] = i; pLevels[i] = i;
#if (CUDA_VERSION <= 4020)
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
#else
d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
#endif
} }
}; };
static LevelsInit lvls; static LevelsInit lvls;
...@@ -350,22 +361,48 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) ...@@ -350,22 +361,48 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
if (src.type() == CV_8UC1) if (src.type() == CV_8UC1)
{ {
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), lvls.d_pLevels.ptr<Npp32s>(), 256) );
#endif
} }
else else
{ {
Mat nppLut3[3];
const Npp32s* pValues3[3]; const Npp32s* pValues3[3];
Mat nppLut3[3];
if (nppLut.channels() == 1) if (nppLut.channels() == 1)
{
#if (CUDA_VERSION <= 4020)
pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>(); pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
#endif
}
else else
{ {
cv::split(nppLut, nppLut3); cv::split(nppLut, nppLut3);
#if (CUDA_VERSION <= 4020)
pValues3[0] = nppLut3[0].ptr<Npp32s>(); pValues3[0] = nppLut3[0].ptr<Npp32s>();
pValues3[1] = nppLut3[1].ptr<Npp32s>(); pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>(); pValues3[2] = nppLut3[2].ptr<Npp32s>();
#else
GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
pValues3[0] = d_nppLut0.ptr<Npp32s>();
pValues3[1] = d_nppLut1.ptr<Npp32s>();
pValues3[2] = d_nppLut2.ptr<Npp32s>();
#endif
} }
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) );
} }
......
...@@ -658,7 +658,11 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub ...@@ -658,7 +658,11 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
#if (CUDA_VERSION <= 4020)
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F) if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
#else
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F && src1.depth() > CV_8U)
#endif
{ {
npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
return; return;
......
...@@ -1189,18 +1189,18 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine( ...@@ -1189,18 +1189,18 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Abs // Abs
PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int type; int depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = GET_PARAM(1);
type = GET_PARAM(2); depth = GET_PARAM(2);
useRoi = GET_PARAM(3); useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
...@@ -1209,9 +1209,9 @@ PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) ...@@ -1209,9 +1209,9 @@ PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Abs, Accuracy) TEST_P(Abs, Accuracy)
{ {
cv::Mat src = randomMat(size, type); cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::abs(loadMat(src, useRoi), dst); cv::gpu::abs(loadMat(src, useRoi), dst);
cv::Mat dst_gold = cv::abs(src); cv::Mat dst_gold = cv::abs(src);
...@@ -1222,24 +1222,24 @@ TEST_P(Abs, Accuracy) ...@@ -1222,24 +1222,24 @@ TEST_P(Abs, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_16SC1), MatType(CV_32FC1)), testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Sqr // Sqr
PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int type; int depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = GET_PARAM(1);
type = GET_PARAM(2); depth = GET_PARAM(2);
useRoi = GET_PARAM(3); useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
...@@ -1248,9 +1248,9 @@ PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) ...@@ -1248,9 +1248,9 @@ PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Sqr, Accuracy) TEST_P(Sqr, Accuracy)
{ {
cv::Mat src = randomMat(size, type); cv::Mat src = randomMat(size, depth, 0, depth == CV_8U ? 16 : 255);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::sqr(loadMat(src, useRoi), dst); cv::gpu::sqr(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
...@@ -1262,10 +1262,10 @@ TEST_P(Sqr, Accuracy) ...@@ -1262,10 +1262,10 @@ TEST_P(Sqr, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), testing::Values(MatDepth(CV_8U),
MatType(CV_16UC1), MatDepth(CV_16U),
MatType(CV_16SC1), MatDepth(CV_16S),
MatType(CV_32FC1)), MatDepth(CV_32F)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -1295,18 +1295,18 @@ void sqrtGold(const cv::Mat& src, cv::Mat& dst) ...@@ -1295,18 +1295,18 @@ void sqrtGold(const cv::Mat& src, cv::Mat& dst)
funcs[src.depth()](src, dst); funcs[src.depth()](src, dst);
} }
PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int type; int depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = GET_PARAM(1);
type = GET_PARAM(2); depth = GET_PARAM(2);
useRoi = GET_PARAM(3); useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
...@@ -1315,24 +1315,24 @@ PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) ...@@ -1315,24 +1315,24 @@ PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Sqrt, Accuracy) TEST_P(Sqrt, Accuracy)
{ {
cv::Mat src = randomMat(size, type); cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::sqrt(loadMat(src, useRoi), dst); cv::gpu::sqrt(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
sqrtGold(src, dst_gold); sqrtGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5); EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), testing::Values(MatDepth(CV_8U),
MatType(CV_16UC1), MatDepth(CV_16U),
MatType(CV_16SC1), MatDepth(CV_16S),
MatType(CV_32FC1)), MatDepth(CV_32F)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -1362,18 +1362,18 @@ void logGold(const cv::Mat& src, cv::Mat& dst) ...@@ -1362,18 +1362,18 @@ void logGold(const cv::Mat& src, cv::Mat& dst)
funcs[src.depth()](src, dst); funcs[src.depth()](src, dst);
} }
PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int type; int depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = GET_PARAM(1);
type = GET_PARAM(2); depth = GET_PARAM(2);
useRoi = GET_PARAM(3); useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
...@@ -1382,24 +1382,24 @@ PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) ...@@ -1382,24 +1382,24 @@ PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Log, Accuracy) TEST_P(Log, Accuracy)
{ {
cv::Mat src = randomMat(size, type, 1.0, 255.0); cv::Mat src = randomMat(size, depth, 1.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::log(loadMat(src, useRoi), dst); cv::gpu::log(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
logGold(src, dst_gold); logGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-6); EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6);
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), testing::Values(MatDepth(CV_8U),
MatType(CV_16UC1), MatDepth(CV_16U),
MatType(CV_16SC1), MatDepth(CV_16S),
MatType(CV_32FC1)), MatDepth(CV_32F)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -1439,18 +1439,18 @@ void expGold(const cv::Mat& src, cv::Mat& dst) ...@@ -1439,18 +1439,18 @@ void expGold(const cv::Mat& src, cv::Mat& dst)
funcs[src.depth()](src, dst); funcs[src.depth()](src, dst);
} }
PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int type; int depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = GET_PARAM(1);
type = GET_PARAM(2); depth = GET_PARAM(2);
useRoi = GET_PARAM(3); useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
...@@ -1459,24 +1459,24 @@ PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) ...@@ -1459,24 +1459,24 @@ PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Exp, Accuracy) TEST_P(Exp, Accuracy)
{ {
cv::Mat src = randomMat(size, type, 0.0, 10.0); cv::Mat src = randomMat(size, depth, 0.0, 10.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::exp(loadMat(src, useRoi), dst); cv::gpu::exp(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
expGold(src, dst_gold); expGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-2); EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2);
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), testing::Values(MatDepth(CV_8U),
MatType(CV_16UC1), MatDepth(CV_16U),
MatType(CV_16SC1), MatDepth(CV_16S),
MatType(CV_32FC1)), MatDepth(CV_32F)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
......
...@@ -311,7 +311,7 @@ TEST_P(ConvertTo, WithScaling) ...@@ -311,7 +311,7 @@ TEST_P(ConvertTo, WithScaling)
cv::Mat dst_gold; cv::Mat dst_gold;
src.convertTo(dst_gold, depth2, a, b); src.convertTo(dst_gold, depth2, a, b);
EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 0.0 : 1e-4); EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 1.0 : 1e-4);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment