Commit b29cde55 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #5325 from jet47:gpu-stereobp-fix

parents cba401fe 3ef067cc
...@@ -255,7 +255,7 @@ namespace cv { namespace gpu { namespace device ...@@ -255,7 +255,7 @@ namespace cv { namespace gpu { namespace device
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
template <typename T> template <typename T>
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst) __global__ void data_step_down(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -264,10 +264,10 @@ namespace cv { namespace gpu { namespace device ...@@ -264,10 +264,10 @@ namespace cv { namespace gpu { namespace device
{ {
for (int d = 0; d < cndisp; ++d) for (int d = 0; d < cndisp; ++d)
{ {
float dst_reg = src.ptr(d * src_rows + (2*y+0))[(2*x+0)]; float dst_reg = src.ptr(d * src_rows + ::min(2*y+0, src_rows-1))[::min(2*x+0, src_cols-1)];
dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)]; dst_reg += src.ptr(d * src_rows + ::min(2*y+1, src_rows-1))[::min(2*x+0, src_cols-1)];
dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)]; dst_reg += src.ptr(d * src_rows + ::min(2*y+0, src_rows-1))[::min(2*x+1, src_cols-1)];
dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)]; dst_reg += src.ptr(d * src_rows + ::min(2*y+1, src_rows-1))[::min(2*x+1, src_cols-1)];
dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg); dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg);
} }
...@@ -275,7 +275,7 @@ namespace cv { namespace gpu { namespace device ...@@ -275,7 +275,7 @@ namespace cv { namespace gpu { namespace device
} }
template<typename T> template<typename T>
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -283,15 +283,15 @@ namespace cv { namespace gpu { namespace device ...@@ -283,15 +283,15 @@ namespace cv { namespace gpu { namespace device
grid.x = divUp(dst_cols, threads.x); grid.x = divUp(dst_cols, threads.x);
grid.y = divUp(dst_rows, threads.y); grid.y = divUp(dst_rows, threads.y);
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst); data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_cols, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
/////////////////// level up messages //////////////////////// /////////////////// level up messages ////////////////////////
......
...@@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device ...@@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device
template<typename T, typename D> template<typename T, typename D>
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream); void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
template<typename T> template<typename T>
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
template <typename T> template <typename T>
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
template <typename T> template <typename T>
...@@ -158,7 +158,7 @@ namespace ...@@ -158,7 +158,7 @@ namespace
init(stream); init(stream);
datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type); createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, datas[0]);
comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream)); comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream));
...@@ -187,10 +187,10 @@ namespace ...@@ -187,10 +187,10 @@ namespace
private: private:
void init(Stream& stream) void init(Stream& stream)
{ {
u.create(rows * rthis.ndisp, cols, rthis.msg_type); createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, u);
d.create(rows * rthis.ndisp, cols, rthis.msg_type); createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, d);
l.create(rows * rthis.ndisp, cols, rthis.msg_type); createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, l);
r.create(rows * rthis.ndisp, cols, rthis.msg_type); createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, r);
if (rthis.levels & 1) if (rthis.levels & 1)
{ {
...@@ -213,13 +213,13 @@ namespace ...@@ -213,13 +213,13 @@ namespace
if (rthis.levels > 1) if (rthis.levels > 1)
{ {
int less_rows = rows / 2; int less_rows = (rows + 1) / 2;
int less_cols = cols / 2; int less_cols = (cols + 1) / 2;
u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, u2);
d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, d2);
l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, l2);
r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, r2);
if ((rthis.levels & 1) == 0) if ((rthis.levels & 1) == 0)
{ {
...@@ -253,7 +253,7 @@ namespace ...@@ -253,7 +253,7 @@ namespace
void calcBP(GpuMat& disp, Stream& stream) void calcBP(GpuMat& disp, Stream& stream)
{ {
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
static const data_step_down_t data_step_down_callers[2] = static const data_step_down_t data_step_down_callers[2] =
{ {
data_step_down_gpu<short>, data_step_down_gpu<float> data_step_down_gpu<short>, data_step_down_gpu<float>
...@@ -283,12 +283,12 @@ namespace ...@@ -283,12 +283,12 @@ namespace
for (int i = 1; i < rthis.levels; ++i) for (int i = 1; i < rthis.levels; ++i)
{ {
cols_all[i] = cols_all[i-1] / 2; cols_all[i] = (cols_all[i-1] + 1) / 2;
rows_all[i] = rows_all[i-1] / 2; rows_all[i] = (rows_all[i-1] + 1) / 2;
datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type); createContinuous(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type, datas[i]);
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream); data_step_down_callers[funcIdx](cols_all[i], rows_all[i], cols_all[i-1], rows_all[i-1], datas[i-1], datas[i], cudaStream);
} }
PtrStepSzb mus[] = {u, u2}; PtrStepSzb mus[] = {u, u2};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment