Commit fa3603a5 authored by Namgoo Lee's avatar Namgoo Lee

[moved from opencv] Add CV_16UC1 support for cuda::CLAHE

Due to size limit of shared memory, histogram is built on
the global memory for CV_16UC1 case.

The amount of memory needed for building histogram is:

    65536 * 4byte = 256KB

and shared memory limit is 48KB typically.

Added test cases for CV_16UC1 and various clip limits.
Added perf tests for CV_16UC1 on both CPU and CUDA code.

There was also a bug in CV_8UC1 case when redistributing
"residual" clipped pixels. Adding the test case where clip
limit is 5.0 exposes this bug.

original commit: https://github.com/opencv/opencv/commit/fb8e652c3f20d377e9f935faee370ed28fb60122
parent d6895a1b
......@@ -183,16 +183,18 @@ PERF_TEST_P(Sz, EqualizeHist,
//////////////////////////////////////////////////////////////////////
// CLAHE
DEF_PARAM_TEST(Sz_ClipLimit, cv::Size, double);
DEF_PARAM_TEST(Sz_ClipLimit, cv::Size, double, MatType);
PERF_TEST_P(Sz_ClipLimit, CLAHE,
Combine(CUDA_TYPICAL_MAT_SIZES,
Values(0.0, 40.0)))
Values(0.0, 40.0),
Values(MatType(CV_8UC1), MatType(CV_16UC1))))
{
const cv::Size size = GET_PARAM(0);
const double clipLimit = GET_PARAM(1);
const int type = GET_PARAM(2);
cv::Mat src(size, CV_8UC1);
cv::Mat src(size, type);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_CUDA())
......
This diff is collapsed.
......@@ -141,8 +141,9 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream)
namespace clahe
{
void calcLut(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream);
void transform(PtrStepSzb src, PtrStepSzb dst, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream);
void calcLut_8U(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream);
void calcLut_16U(PtrStepSzus src, PtrStepus lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, PtrStepSzi hist, cudaStream_t stream);
template <typename T> void transform(PtrStepSz<T> src, PtrStepSz<T> dst, PtrStep<T> lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream);
}
namespace
......@@ -170,6 +171,7 @@ namespace
GpuMat srcExt_;
GpuMat lut_;
GpuMat hist_; // histogram on global memory for CV_16UC1 case
};
CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
......@@ -186,14 +188,16 @@ namespace
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
const int type = src.type();
_dst.create( src.size(), src.type() );
CV_Assert( type == CV_8UC1 || type == CV_16UC1 );
_dst.create( src.size(), type );
GpuMat dst = _dst.getGpuMat();
const int histSize = 256;
const int histSize = type == CV_8UC1 ? 256 : 65536;
ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_);
ensureSizeIsEnough(tilesX_ * tilesY_, histSize, type, lut_);
cudaStream_t stream = StreamAccessor::getStream(s);
......@@ -227,9 +231,18 @@ namespace
clipLimit = std::max(clipLimit, 1);
}
clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), clipLimit, lutScale, stream);
if (type == CV_8UC1)
clahe::calcLut_8U(srcForLut, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), clipLimit, lutScale, stream);
else // type == CV_16UC1
{
ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_32SC1, hist_);
clahe::calcLut_16U(srcForLut, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), clipLimit, lutScale, hist_, stream);
}
clahe::transform(src, dst, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), stream);
if (type == CV_8UC1)
clahe::transform<uchar>(src, dst, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), stream);
else // type == CV_16UC1
clahe::transform<ushort>(src, dst, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), stream);
}
void CLAHE_Impl::setClipLimit(double clipLimit)
......
......@@ -236,17 +236,19 @@ namespace
IMPLEMENT_PARAM_CLASS(ClipLimit, double)
}
PARAM_TEST_CASE(CLAHE, cv::cuda::DeviceInfo, cv::Size, ClipLimit)
PARAM_TEST_CASE(CLAHE, cv::cuda::DeviceInfo, cv::Size, ClipLimit, MatType)
{
cv::cuda::DeviceInfo devInfo;
cv::Size size;
double clipLimit;
int type;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
clipLimit = GET_PARAM(2);
type = GET_PARAM(3);
cv::cuda::setDevice(devInfo.deviceID());
}
......@@ -254,7 +256,11 @@ PARAM_TEST_CASE(CLAHE, cv::cuda::DeviceInfo, cv::Size, ClipLimit)
CUDA_TEST_P(CLAHE, Accuracy)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::Mat src;
if (type == CV_8UC1)
src = randomMat(size, type);
else if (type == CV_16UC1)
src = randomMat(size, type, 0, 65535);
cv::Ptr<cv::cuda::CLAHE> clahe = cv::cuda::createCLAHE(clipLimit);
cv::cuda::GpuMat dst;
......@@ -270,7 +276,8 @@ CUDA_TEST_P(CLAHE, Accuracy)
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CLAHE, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(0.0, 40.0)));
testing::Values(0.0, 5.0, 10.0, 20.0, 40.0),
testing::Values(MatType(CV_8UC1), MatType(CV_16UC1))));
}} // namespace
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment