Commit 9ee15946 authored by Anatoly Baksheev's avatar Anatoly Baksheev

meanShiftFiltering added (by masha)

get free memory function
parent 37f47c9f
...@@ -64,6 +64,8 @@ namespace cv ...@@ -64,6 +64,8 @@ namespace cv
CV_EXPORTS void getComputeCapability(int device, int* major, int* minor); CV_EXPORTS void getComputeCapability(int device, int* major, int* minor);
CV_EXPORTS int getNumberOfSMs(int device); CV_EXPORTS int getNumberOfSMs(int device);
CV_EXPORTS void getGpuMemInfo(size_t *free, size_t* total);
//////////////////////////////// GpuMat //////////////////////////////// //////////////////////////////// GpuMat ////////////////////////////////
class CudaStream; class CudaStream;
class MatPL; class MatPL;
...@@ -328,7 +330,10 @@ namespace cv ...@@ -328,7 +330,10 @@ namespace cv
////////////////////////////// Image processing ////////////////////////////// ////////////////////////////// Image processing //////////////////////////////
void CV_EXPORTS remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst); CV_EXPORTS void remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst);
CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, float sp, float sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
//////////////////////////////// StereoBM_GPU //////////////////////////////// //////////////////////////////// StereoBM_GPU ////////////////////////////////
......
...@@ -148,10 +148,10 @@ namespace beliefpropagation_gpu ...@@ -148,10 +148,10 @@ namespace beliefpropagation_gpu
} }
namespace cv { namespace gpu { namespace impl { namespace cv { namespace gpu { namespace impl {
typedef void (*CompDataFunc)(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream); typedef void (*CompDataFunc)(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream);
template<typename T> template<typename T>
void comp_data_(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream) void comp_data_(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -162,29 +162,29 @@ namespace cv { namespace gpu { namespace impl { ...@@ -162,29 +162,29 @@ namespace cv { namespace gpu { namespace impl {
if (channels == 1) if (channels == 1)
beliefpropagation_gpu::comp_data_gray<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows); beliefpropagation_gpu::comp_data_gray<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
else else
beliefpropagation_gpu::comp_data_bgr<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows); beliefpropagation_gpu::comp_data_bgr<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void comp_data(int msg_type, const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream) void comp_data(int msg_type, const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream)
{ {
static CompDataFunc tab[8] = static CompDataFunc tab[8] =
{ {
0, // uchar 0, // uchar
0, // schar 0, // schar
0, // ushort 0, // ushort
comp_data_<short>, // short comp_data_<short>, // short
0, // int 0, // int
comp_data_<float>, // float comp_data_<float>, // float
0, // double 0, // double
0 // user type 0 // user type
}; };
CompDataFunc func = tab[msg_type]; CompDataFunc func = tab[msg_type];
if (func == 0) if (func == 0)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__); cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
func(l, r, channels, mdata, stream); func(l, r, channels, mdata, stream);
} }
}}} }}}
...@@ -220,10 +220,10 @@ namespace beliefpropagation_gpu ...@@ -220,10 +220,10 @@ namespace beliefpropagation_gpu
} }
namespace cv { namespace gpu { namespace impl { namespace cv { namespace gpu { namespace impl {
typedef void (*DataStepDownFunc)(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream); typedef void (*DataStepDownFunc)(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream);
template<typename T> template<typename T>
void data_step_down_(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream) void data_step_down_(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -231,29 +231,29 @@ namespace cv { namespace gpu { namespace impl { ...@@ -231,29 +231,29 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(dst_cols, threads.x); grid.x = divUp(dst_cols, threads.x);
grid.y = divUp(dst_rows, threads.y); grid.y = divUp(dst_rows, threads.y);
beliefpropagation_gpu::data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)src.ptr, src.step/sizeof(T), (T*)dst.ptr, dst.step/sizeof(T)); beliefpropagation_gpu::data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)src.ptr, src.step/sizeof(T), (T*)dst.ptr, dst.step/sizeof(T));
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void data_step_down(int dst_cols, int dst_rows, int src_rows, int msg_type, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream) void data_step_down(int dst_cols, int dst_rows, int src_rows, int msg_type, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream)
{ {
static DataStepDownFunc tab[8] = static DataStepDownFunc tab[8] =
{ {
0, // uchar 0, // uchar
0, // schar 0, // schar
0, // ushort 0, // ushort
data_step_down_<short>, // short data_step_down_<short>, // short
0, // int 0, // int
data_step_down_<float>, // float data_step_down_<float>, // float
0, // double 0, // double
0 // user type 0 // user type
}; };
DataStepDownFunc func = tab[msg_type]; DataStepDownFunc func = tab[msg_type];
if (func == 0) if (func == 0)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__); cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
func(dst_cols, dst_rows, src_rows, src, dst, stream); func(dst_cols, dst_rows, src_rows, src, dst, stream);
} }
}}} }}}
...@@ -285,10 +285,10 @@ namespace beliefpropagation_gpu ...@@ -285,10 +285,10 @@ namespace beliefpropagation_gpu
} }
namespace cv { namespace gpu { namespace impl { namespace cv { namespace gpu { namespace impl {
typedef void (*LevelUpMessagesFunc)(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream); typedef void (*LevelUpMessagesFunc)(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream);
template<typename T> template<typename T>
void level_up_messages_(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream) void level_up_messages_(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -301,29 +301,29 @@ namespace cv { namespace gpu { namespace impl { ...@@ -301,29 +301,29 @@ namespace cv { namespace gpu { namespace impl {
beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mus[src_idx].ptr, mus[src_idx].step/sizeof(T), (T*)mus[dst_idx].ptr, mus[dst_idx].step/sizeof(T)); beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mus[src_idx].ptr, mus[src_idx].step/sizeof(T), (T*)mus[dst_idx].ptr, mus[dst_idx].step/sizeof(T));
beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mds[src_idx].ptr, mds[src_idx].step/sizeof(T), (T*)mds[dst_idx].ptr, mds[dst_idx].step/sizeof(T)); beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mds[src_idx].ptr, mds[src_idx].step/sizeof(T), (T*)mds[dst_idx].ptr, mds[dst_idx].step/sizeof(T));
beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mls[src_idx].ptr, mls[src_idx].step/sizeof(T), (T*)mls[dst_idx].ptr, mls[dst_idx].step/sizeof(T)); beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mls[src_idx].ptr, mls[src_idx].step/sizeof(T), (T*)mls[dst_idx].ptr, mls[dst_idx].step/sizeof(T));
beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mrs[src_idx].ptr, mrs[src_idx].step/sizeof(T), (T*)mrs[dst_idx].ptr, mrs[dst_idx].step/sizeof(T)); beliefpropagation_gpu::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mrs[src_idx].ptr, mrs[src_idx].step/sizeof(T), (T*)mrs[dst_idx].ptr, mrs[dst_idx].step/sizeof(T));
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void level_up_messages(int dst_idx, int dst_cols, int dst_rows, int src_rows, int msg_type, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream) void level_up_messages(int dst_idx, int dst_cols, int dst_rows, int src_rows, int msg_type, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream)
{ {
static LevelUpMessagesFunc tab[8] = static LevelUpMessagesFunc tab[8] =
{ {
0, // uchar 0, // uchar
0, // schar 0, // schar
0, // ushort 0, // ushort
level_up_messages_<short>, // short level_up_messages_<short>, // short
0, // int 0, // int
level_up_messages_<float>, // float level_up_messages_<float>, // float
0, // double 0, // double
0 // user type 0 // user type
}; };
LevelUpMessagesFunc func = tab[msg_type]; LevelUpMessagesFunc func = tab[msg_type];
if (func == 0) if (func == 0)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__); cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
func(dst_idx, dst_cols, dst_rows, src_rows, mus, mds, mls, mrs, stream); func(dst_idx, dst_cols, dst_rows, src_rows, mus, mds, mls, mrs, stream);
} }
}}} }}}
...@@ -430,10 +430,10 @@ namespace beliefpropagation_gpu ...@@ -430,10 +430,10 @@ namespace beliefpropagation_gpu
} }
namespace cv { namespace gpu { namespace impl { namespace cv { namespace gpu { namespace impl {
typedef void (*CalcAllIterationFunc)(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream); typedef void (*CalcAllIterationFunc)(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream);
template<typename T> template<typename T>
void calc_all_iterations_(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream) void calc_all_iterations_(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -443,30 +443,30 @@ namespace cv { namespace gpu { namespace impl { ...@@ -443,30 +443,30 @@ namespace cv { namespace gpu { namespace impl {
for(int t = 0; t < iters; ++t) for(int t = 0; t < iters; ++t)
{ {
beliefpropagation_gpu::one_iteration<T><<<grid, threads, 0, stream>>>(t, (T*)u.ptr, (T*)d.ptr, (T*)l.ptr, (T*)r.ptr, u.step/sizeof(T), (const T*)data.ptr, data.step/sizeof(T), cols, rows); beliefpropagation_gpu::one_iteration<T><<<grid, threads, 0, stream>>>(t, (T*)u.ptr, (T*)d.ptr, (T*)l.ptr, (T*)r.ptr, u.step/sizeof(T), (const T*)data.ptr, data.step/sizeof(T), cols, rows);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
} }
void calc_all_iterations(int cols, int rows, int iters, int msg_type, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream) void calc_all_iterations(int cols, int rows, int iters, int msg_type, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream)
{ {
static CalcAllIterationFunc tab[8] = static CalcAllIterationFunc tab[8] =
{ {
0, // uchar 0, // uchar
0, // schar 0, // schar
0, // ushort 0, // ushort
calc_all_iterations_<short>, // short calc_all_iterations_<short>, // short
0, // int 0, // int
calc_all_iterations_<float>, // float calc_all_iterations_<float>, // float
0, // double 0, // double
0 // user type 0 // user type
}; };
CalcAllIterationFunc func = tab[msg_type]; CalcAllIterationFunc func = tab[msg_type];
if (func == 0) if (func == 0)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__); cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
func(cols, rows, iters, u, d, l, r, data, stream); func(cols, rows, iters, u, d, l, r, data, stream);
} }
}}} }}}
...@@ -516,10 +516,10 @@ namespace beliefpropagation_gpu ...@@ -516,10 +516,10 @@ namespace beliefpropagation_gpu
} }
namespace cv { namespace gpu { namespace impl { namespace cv { namespace gpu { namespace impl {
typedef void (*OutputFunc)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream); typedef void (*OutputFunc)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream);
template<typename T> template<typename T>
void output_(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream) void output_(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -527,29 +527,29 @@ namespace cv { namespace gpu { namespace impl { ...@@ -527,29 +527,29 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(disp.cols, threads.x); grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y); grid.y = divUp(disp.rows, threads.y);
beliefpropagation_gpu::output<T><<<grid, threads, 0, stream>>>(disp.cols, disp.rows, (const T*)u.ptr, (const T*)d.ptr, (const T*)l.ptr, (const T*)r.ptr, (const T*)data.ptr, u.step/sizeof(T), (short*)disp.ptr, disp.step/sizeof(short)); beliefpropagation_gpu::output<T><<<grid, threads, 0, stream>>>(disp.cols, disp.rows, (const T*)u.ptr, (const T*)d.ptr, (const T*)l.ptr, (const T*)r.ptr, (const T*)data.ptr, u.step/sizeof(T), (short*)disp.ptr, disp.step/sizeof(short));
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void output(int msg_type, const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream) void output(int msg_type, const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream)
{ {
static OutputFunc tab[8] = static OutputFunc tab[8] =
{ {
0, // uchar 0, // uchar
0, // schar 0, // schar
0, // ushort 0, // ushort
output_<short>, // short output_<short>, // short
0, // int 0, // int
output_<float>, // float output_<float>, // float
0, // double 0, // double
0 // user type 0 // user type
}; };
OutputFunc func = tab[msg_type]; OutputFunc func = tab[msg_type];
if (func == 0) if (func == 0)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__); cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
func(u, d, l, r, data, disp, stream); func(u, d, l, r, data, disp, stream);
} }
}}} }}}
\ No newline at end of file
...@@ -46,7 +46,7 @@ using namespace cv::gpu; ...@@ -46,7 +46,7 @@ using namespace cv::gpu;
namespace imgproc namespace imgproc
{ {
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex; texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex1;
__global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height) __global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height)
{ {
...@@ -59,7 +59,78 @@ namespace imgproc ...@@ -59,7 +59,78 @@ namespace imgproc
float xcoo = mapx[idx]; float xcoo = mapx[idx];
float ycoo = mapy[idx]; float ycoo = mapy[idx];
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex, xcoo, ycoo)); out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo));
}
}
texture< uchar4, 2, cudaReadModeElementType > tex_meanshift;
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
{
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
if( x0 < cols && y0 < rows )
{
int isr2 = sr*sr;
uchar4 c = tex2D( tex_meanshift, x0, y0 );
// iterate meanshift procedure
for( int iter = 0; iter < maxIter; iter++ )
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
float icount;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0-sp;
int miny = y0-sp;
int maxx = x0+sp;
int maxy = y0+sp;
for( int y = miny; y <= maxy; y++)
{
int rowCount = 0;
for( int x = minx; x <= maxx; x++ )
{
uchar4 t = tex2D( tex_meanshift, x, y );
int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
if( norm2 <= isr2 )
{
s0 += t.x; s1 += t.y; s2 += t.z;
sx += x; rowCount++;
}
}
count += rowCount;
sy += y*rowCount;
}
if( count == 0 )
break;
icount = 1./count;
int x1 = floor(sx*icount);
int y1 = floor(sy*icount);
s0 = floor(s0*icount);
s1 = floor(s1*icount);
s2 = floor(s2*icount);
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1-x0) + abs(y1-y0) + norm2 <= eps);
x0 = x1; y0 = y1;
c.x = s0; c.y = s1; c.z = s2;
if( stopFlag )
break;
}
int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 3 * sizeof(uchar);
out[base+0] = c.x;
out[base+1] = c.y;
out[base+2] = c.z;
} }
} }
} }
...@@ -75,14 +146,31 @@ namespace cv { namespace gpu { namespace impl ...@@ -75,14 +146,31 @@ namespace cv { namespace gpu { namespace impl
grid.x = divUp(dst.cols, block.x); grid.x = divUp(dst.cols, block.x);
grid.y = divUp(dst.rows, block.y); grid.y = divUp(dst.rows, block.y);
tex.filterMode = cudaFilterModeLinear; tex1.filterMode = cudaFilterModeLinear;
tex.addressMode[0] = tex.addressMode[1] = cudaAddressModeWrap; tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0, tex, src.ptr, desc, dst.cols, dst.rows, src.step) ); cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) );
kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows); kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(tex) ); cudaSafeCall( cudaUnbindTexture(tex1) );
} }
}}}
\ No newline at end of file extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps)
{
dim3 grid(1, 1, 1);
dim3 threads(32, 16, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols * 4, src.rows, src.step ) );
meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
}
}}}
...@@ -48,6 +48,7 @@ using namespace cv::gpu; ...@@ -48,6 +48,7 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); } void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, float, float, TermCriteria ) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
...@@ -56,6 +57,8 @@ namespace cv { namespace gpu ...@@ -56,6 +57,8 @@ namespace cv { namespace gpu
namespace impl namespace impl
{ {
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst); extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst);
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps);
} }
}} }}
...@@ -70,4 +73,30 @@ void cv::gpu::remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, G ...@@ -70,4 +73,30 @@ void cv::gpu::remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, G
impl::remap_gpu(src, xmap, ymap, dst); impl::remap_gpu(src, xmap, ymap, dst);
} }
void cv::gpu::meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, float sp, float sr, TermCriteria criteria)
{
if( src.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src.depth() != CV_8U || src.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
dst.create( src.size(), CV_8UC3 );
float eps;
if( !(criteria.type & TermCriteria::MAX_ITER) )
criteria.maxCount = 5;
int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
if( !(criteria.type & TermCriteria::EPS) )
eps = 1.f;
eps = std::max(criteria.epsilon, 0.0);
impl::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
}
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
...@@ -54,6 +54,7 @@ CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); } ...@@ -54,6 +54,7 @@ CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); }
CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; } CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; }
CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { throw_nogpu(); } CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { throw_nogpu(); }
CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; } CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; }
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t* /*free*/, size_t* /*total*/) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
...@@ -99,5 +100,11 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) ...@@ -99,5 +100,11 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
return prop.multiProcessorCount; return prop.multiProcessorCount;
} }
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t *free, size_t* total)
{
cudaSafeCall( cudaMemGetInfo( free, total ) );
}
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment