Commit 6ef4d9b1 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

added sipport of BORDER_REPLICATE mode into gpu::corner* functions

parent bb9bf53e
...@@ -44,7 +44,7 @@ namespace cv { namespace gpu { ...@@ -44,7 +44,7 @@ namespace cv { namespace gpu {
struct BrdReflect101 struct BrdReflect101
{ {
BrdReflect101(int len) : last(len - 1) {} BrdReflect101(int len): last(len - 1) {}
__device__ int idx_low(int i) const __device__ int idx_low(int i) const
{ {
...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { ...@@ -73,7 +73,7 @@ namespace cv { namespace gpu {
template <typename T> template <typename T>
struct BrdRowReflect101: BrdReflect101 struct BrdRowReflect101: BrdReflect101
{ {
BrdRowReflect101(int len) : BrdReflect101(len) {} BrdRowReflect101(int len): BrdReflect101(len) {}
__device__ float at_low(int i, const T* data) const __device__ float at_low(int i, const T* data) const
{ {
...@@ -90,7 +90,7 @@ namespace cv { namespace gpu { ...@@ -90,7 +90,7 @@ namespace cv { namespace gpu {
template <typename T> template <typename T>
struct BrdColReflect101: BrdReflect101 struct BrdColReflect101: BrdReflect101
{ {
BrdColReflect101(int len, int step) : BrdReflect101(len), step(step) {} BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}
__device__ float at_low(int i, const T* data) const __device__ float at_low(int i, const T* data) const
{ {
...@@ -108,7 +108,7 @@ namespace cv { namespace gpu { ...@@ -108,7 +108,7 @@ namespace cv { namespace gpu {
struct BrdReplicate struct BrdReplicate
{ {
BrdReplicate(int len) : last(len - 1) {} BrdReplicate(int len): last(len - 1) {}
__device__ int idx_low(int i) const __device__ int idx_low(int i) const
{ {
...@@ -122,7 +122,7 @@ namespace cv { namespace gpu { ...@@ -122,7 +122,7 @@ namespace cv { namespace gpu {
__device__ int idx(int i) const __device__ int idx(int i) const
{ {
return min(max(i, last), 0); return max(min(i, last), 0);
} }
bool is_range_safe(int mini, int maxi) const bool is_range_safe(int mini, int maxi) const
...@@ -137,7 +137,7 @@ namespace cv { namespace gpu { ...@@ -137,7 +137,7 @@ namespace cv { namespace gpu {
template <typename T> template <typename T>
struct BrdRowReplicate: BrdReplicate struct BrdRowReplicate: BrdReplicate
{ {
BrdRowReplicate(int len) : BrdReplicate(len) {} BrdRowReplicate(int len): BrdReplicate(len) {}
__device__ float at_low(int i, const T* data) const __device__ float at_low(int i, const T* data) const
{ {
...@@ -154,7 +154,7 @@ namespace cv { namespace gpu { ...@@ -154,7 +154,7 @@ namespace cv { namespace gpu {
template <typename T> template <typename T>
struct BrdColReplicate: BrdReplicate struct BrdColReplicate: BrdReplicate
{ {
BrdColReplicate(int len, int step) : BrdReplicate(len), step(step) {} BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}
__device__ float at_low(int i, const T* data) const __device__ float at_low(int i, const T* data) const
{ {
......
...@@ -99,9 +99,9 @@ namespace cv ...@@ -99,9 +99,9 @@ namespace cv
// border interpolation modes) // border interpolation modes)
enum enum
{ {
BORDER_REFLECT101 = 0 BORDER_REFLECT101 = 0,
BORDER_REPLICATE
}; };
} }
} }
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "cuda_shared.hpp" #include "cuda_shared.hpp"
#include "border_interpolate.hpp" #include "border_interpolate.hpp"
#include <stdio.h>
using namespace cv::gpu; using namespace cv::gpu;
...@@ -498,6 +499,39 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -498,6 +499,39 @@ namespace cv { namespace gpu { namespace imgproc
texture<float, 2> harrisDxTex; texture<float, 2> harrisDxTex;
texture<float, 2> harrisDyTex; texture<float, 2> harrisDyTex;
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStep dst)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows)
{
float a = 0.f;
float b = 0.f;
float c = 0.f;
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (int i = ibegin; i < iend; ++i)
{
for (int j = jbegin; j < jend; ++j)
{
float dx = tex2D(harrisDxTex, j, i);
float dy = tex2D(harrisDyTex, j, i);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c);
}
}
template <typename B> template <typename B>
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k, __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStep dst, B border_row, B border_col) PtrStep dst, B border_row, B border_col)
...@@ -555,6 +589,13 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -555,6 +589,13 @@ namespace cv { namespace gpu { namespace imgproc
cornerHarris_kernel<<<grid, threads>>>( cornerHarris_kernel<<<grid, threads>>>(
cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows)); cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows));
break; break;
case BORDER_REPLICATE:
harrisDxTex.addressMode[0] = cudaAddressModeClamp;
harrisDxTex.addressMode[1] = cudaAddressModeClamp;
harrisDyTex.addressMode[0] = cudaAddressModeClamp;
harrisDyTex.addressMode[1] = cudaAddressModeClamp;
cornerHarris_kernel<<<grid, threads>>>(cols, rows, block_size, k, dst);
break;
} }
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
...@@ -567,6 +608,42 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -567,6 +608,42 @@ namespace cv { namespace gpu { namespace imgproc
texture<float, 2> minEigenValDxTex; texture<float, 2> minEigenValDxTex;
texture<float, 2> minEigenValDyTex; texture<float, 2> minEigenValDyTex;
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
PtrStep dst)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows)
{
float a = 0.f;
float b = 0.f;
float c = 0.f;
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (int i = ibegin; i < iend; ++i)
{
for (int j = jbegin; j < jend; ++j)
{
float dx = tex2D(minEigenValDxTex, j, i);
float dy = tex2D(minEigenValDyTex, j, i);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
a *= 0.5f;
c *= 0.5f;
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
}
}
template <typename B> template <typename B>
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
PtrStep dst, B border_row, B border_col) PtrStep dst, B border_row, B border_col)
...@@ -624,8 +701,14 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -624,8 +701,14 @@ namespace cv { namespace gpu { namespace imgproc
{ {
case BORDER_REFLECT101: case BORDER_REFLECT101:
cornerMinEigenVal_kernel<<<grid, threads>>>( cornerMinEigenVal_kernel<<<grid, threads>>>(
cols, rows, block_size, dst, cols, rows, block_size, dst, BrdReflect101(cols), BrdReflect101(rows));
BrdReflect101(cols), BrdReflect101(rows)); break;
case BORDER_REPLICATE:
minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;
minEigenValDxTex.addressMode[1] = cudaAddressModeClamp;
minEigenValDyTex.addressMode[0] = cudaAddressModeClamp;
minEigenValDyTex.addressMode[1] = cudaAddressModeClamp;
cornerMinEigenVal_kernel<<<grid, threads>>>(cols, rows, block_size, dst);
break; break;
} }
......
...@@ -150,7 +150,8 @@ void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, ...@@ -150,7 +150,8 @@ void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
static const Caller callers[] = static const Caller callers[] =
{ {
rowFilterCaller<T, BrdRowReflect101<T> > rowFilterCaller<T, BrdRowReflect101<T> >,
rowFilterCaller<T, BrdRowReplicate<T> >
}; };
callers[brd_interp](src, dst, anchor, kernel, ksize); callers[brd_interp](src, dst, anchor, kernel, ksize);
...@@ -251,7 +252,8 @@ void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, ...@@ -251,7 +252,8 @@ void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
static const Caller callers[] = static const Caller callers[] =
{ {
colFilterCaller<T, BrdColReflect101<T> > colFilterCaller<T, BrdColReflect101<T> >,
colFilterCaller<T, BrdColReplicate<T> >
}; };
callers[brd_interp](src, dst, anchor, kernel, ksize); callers[brd_interp](src, dst, anchor, kernel, ksize);
...@@ -261,4 +263,4 @@ void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, ...@@ -261,4 +263,4 @@ void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int); template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int); template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
}}} }}}
\ No newline at end of file
...@@ -944,10 +944,13 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ks ...@@ -944,10 +944,13 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ks
switch (borderType) switch (borderType)
{ {
case cv::BORDER_REFLECT101: case cv::BORDER_REFLECT101:
gpuBorderType = cv::gpu::BORDER_REFLECT101; gpuBorderType = cv::gpu::BORDER_REFLECT101;
break;
case cv::BORDER_REPLICATE:
gpuBorderType = cv::gpu::BORDER_REPLICATE;
break; break;
default: default:
CV_Error(CV_StsBadArg, "cornerHarris: unsupported border type"); CV_Error(CV_StsBadArg, "cornerHarris: unsupported border extrapolation mode");
} }
GpuMat Dx, Dy; GpuMat Dx, Dy;
...@@ -964,8 +967,11 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i ...@@ -964,8 +967,11 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i
case cv::BORDER_REFLECT101: case cv::BORDER_REFLECT101:
gpuBorderType = cv::gpu::BORDER_REFLECT101; gpuBorderType = cv::gpu::BORDER_REFLECT101;
break; break;
case cv::BORDER_REPLICATE:
gpuBorderType = cv::gpu::BORDER_REPLICATE;
break;
default: default:
CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border type"); CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border extrapolation mode");
} }
GpuMat Dx, Dy; GpuMat Dx, Dy;
......
...@@ -640,15 +640,37 @@ struct CV_GpuCornerHarrisTest: CvTest ...@@ -640,15 +640,37 @@ struct CV_GpuCornerHarrisTest: CvTest
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256)); rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
double k = 0.1; double k = 0.1;
int borderType = BORDER_REFLECT101;
cv::Mat dst_gold; cv::Mat dst_gold;
cv::gpu::GpuMat dst;
cv::Mat dsth;
int borderType;
borderType = BORDER_REFLECT101;
cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
cv::gpu::GpuMat dst; dsth = dst;
for (int i = 0; i < dst.rows; ++i)
{
for (int j = 0; j < dst.cols; ++j)
{
float a = dst_gold.at<float>(i, j);
float b = dsth.at<float>(i, j);
if (fabs(a - b) > 1e-3f)
{
ts->printf(CvTS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
return false;
};
}
}
borderType = BORDER_REPLICATE;
cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType); cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
cv::Mat dsth = dst; dsth = dst;
for (int i = 0; i < dst.rows; ++i) for (int i = 0; i < dst.rows; ++i)
{ {
for (int j = 0; j < dst.cols; ++j) for (int j = 0; j < dst.cols; ++j)
...@@ -703,15 +725,37 @@ struct CV_GpuCornerMinEigenValTest: CvTest ...@@ -703,15 +725,37 @@ struct CV_GpuCornerMinEigenValTest: CvTest
else if (depth == CV_8U) else if (depth == CV_8U)
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256)); rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
int borderType = BORDER_REFLECT101;
cv::Mat dst_gold; cv::Mat dst_gold;
cv::gpu::GpuMat dst;
cv::Mat dsth;
int borderType;
borderType = BORDER_REFLECT101;
cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);
cv::gpu::GpuMat dst; dsth = dst;
for (int i = 0; i < dst.rows; ++i)
{
for (int j = 0; j < dst.cols; ++j)
{
float a = dst_gold.at<float>(i, j);
float b = dsth.at<float>(i, j);
if (fabs(a - b) > 1e-2f)
{
ts->printf(CvTS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
return false;
};
}
}
borderType = BORDER_REPLICATE;
cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType); cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);
cv::Mat dsth = dst; dsth = dst;
for (int i = 0; i < dst.rows; ++i) for (int i = 0; i < dst.rows; ++i)
{ {
for (int j = 0; j < dst.cols; ++j) for (int j = 0; j < dst.cols; ++j)
...@@ -726,6 +770,7 @@ struct CV_GpuCornerMinEigenValTest: CvTest ...@@ -726,6 +770,7 @@ struct CV_GpuCornerMinEigenValTest: CvTest
}; };
} }
} }
return true; return true;
} }
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment