Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
34567877
Commit
34567877
authored
Jan 04, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #5771 from dtmoodie:pyrlk
parents
430e2f20
66738d74
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
802 additions
and
191 deletions
+802
-191
perf_optflow.cpp
modules/cudaoptflow/perf/perf_optflow.cpp
+12
-3
pyrlk.cu
modules/cudaoptflow/src/cuda/pyrlk.cu
+644
-91
precomp.hpp
modules/cudaoptflow/src/precomp.hpp
+1
-1
pyrlk.cpp
modules/cudaoptflow/src/pyrlk.cpp
+102
-72
test_optflow.cpp
modules/cudaoptflow/test/test_optflow.cpp
+35
-18
pyr_down.cu
modules/cudawarping/src/cuda/pyr_down.cu
+5
-4
pyramids.cpp
modules/cudawarping/src/pyramids.cpp
+3
-2
No files found.
modules/cudaoptflow/perf/perf_optflow.cpp
View file @
34567877
...
...
@@ -116,10 +116,10 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, PyrLKOpticalFlowSparse,
const
int
levels
=
GET_PARAM
(
4
);
const
int
iters
=
GET_PARAM
(
5
);
c
onst
c
v
::
Mat
frame0
=
readImage
(
imagePair
.
first
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
cv
::
Mat
frame0
=
readImage
(
imagePair
.
first
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame0
.
empty
());
c
onst
c
v
::
Mat
frame1
=
readImage
(
imagePair
.
second
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
cv
::
Mat
frame1
=
readImage
(
imagePair
.
second
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame1
.
empty
());
cv
::
Mat
gray_frame
;
...
...
@@ -131,6 +131,14 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, PyrLKOpticalFlowSparse,
cv
::
Mat
pts
;
cv
::
goodFeaturesToTrack
(
gray_frame
,
pts
,
points
,
0.01
,
0.0
);
frame0
.
convertTo
(
frame0
,
CV_32F
);
frame1
.
convertTo
(
frame1
,
CV_32F
);
if
(
!
useGray
)
{
cv
::
cvtColor
(
frame0
,
frame0
,
cv
::
COLOR_BGR2BGRA
);
cv
::
cvtColor
(
frame1
,
frame1
,
cv
::
COLOR_BGR2BGRA
);
}
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_pts
(
pts
.
reshape
(
2
,
1
));
...
...
@@ -318,4 +326,4 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
CPU_SANITY_CHECK
(
flow
);
}
}
}
\ No newline at end of file
modules/cudaoptflow/src/cuda/pyrlk.cu
View file @
34567877
...
...
@@ -48,6 +48,8 @@
#include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/filters.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
using namespace cv::cuda;
using namespace cv::cuda::device;
...
...
@@ -60,53 +62,240 @@ namespace pyrlk
__constant__ int c_halfWin_y;
__constant__ int c_iters;
texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_I8U(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_I8UC4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<ushort4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_I16UC4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_If(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_If4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_Ib(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_J8U(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_J8UC4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<ushort4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_J16UC4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_Jf(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_Jf4(false, cudaFilterModeLinear, cudaAddressModeClamp);
template <int cn> struct Tex_I;
template <> struct Tex_I<1>
template <int cn, typename T> struct Tex_I
{
static __host__ __forceinline__ void bindTexture_(PtrStepSz<typename TypeVec<T, cn>::vec_type> I)
{
(void)I;
}
};
template <> struct Tex_I<1, uchar>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_I8U, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<uchar>& I)
{
bindTexture(&tex_I8U, I);
}
};
template <> struct Tex_I<1, ushort>
{
static __device__ __forceinline__ float read(float x, float y)
{
return 0.0;
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<ushort>& I)
{
(void)I;
}
};
template <> struct Tex_I<1, int>
{
static __device__ __forceinline__ float read(float x, float y)
{
return 0.0;
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<int>& I)
{
(void)I;
}
};
template <> struct Tex_I<1, float>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_If, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<float>& I)
{
bindTexture(&tex_If, I);
}
};
template <> struct Tex_I<4>
// ****************** 3 channel specializations ************************
template <> struct Tex_I<3, uchar>
{
static __device__ __forceinline__ float3 read(float x, float y)
{
return make_float3(0,0,0);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<uchar3> I)
{
(void)I;
}
};
template <> struct Tex_I<3, ushort>
{
static __device__ __forceinline__ float3 read(float x, float y)
{
return make_float3(0, 0, 0);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<ushort3> I)
{
(void)I;
}
};
template <> struct Tex_I<3, int>
{
static __device__ __forceinline__ float3 read(float x, float y)
{
return make_float3(0, 0, 0);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<int3> I)
{
(void)I;
}
};
template <> struct Tex_I<3, float>
{
static __device__ __forceinline__ float3 read(float x, float y)
{
return make_float3(0, 0, 0);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<float3> I)
{
(void)I;
}
};
// ****************** 4 channel specializations ************************
template <> struct Tex_I<4, uchar>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_I8UC4, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<uchar4>& I)
{
bindTexture(&tex_I8UC4, I);
}
};
template <> struct Tex_I<4, ushort>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_I16UC4, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<ushort4>& I)
{
bindTexture(&tex_I16UC4, I);
}
};
template <> struct Tex_I<4, float>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_If4, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<float4>& I)
{
bindTexture(&tex_If4, I);
}
};
template <int cn> struct Tex_J;
template <> struct Tex_J<1>
// ************* J ***************
template <int cn, typename T> struct Tex_J
{
static __host__ __forceinline__ void bindTexture_(PtrStepSz<typename TypeVec<T,cn>::vec_type>& J)
{
(void)J;
}
};
template <> struct Tex_J<1, uchar>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_J8U, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<uchar>& J)
{
bindTexture(&tex_J8U, J);
}
};
template <> struct Tex_J<1, float>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_Jf, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<float>& J)
{
bindTexture(&tex_Jf, J);
}
};
template <> struct Tex_J<4>
// ************* 4 channel specializations ***************
template <> struct Tex_J<4, uchar>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_J8UC4, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<uchar4>& J)
{
bindTexture(&tex_J8UC4, J);
}
};
template <> struct Tex_J<4, ushort>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_J16UC4, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<ushort4>& J)
{
bindTexture(&tex_J16UC4, J);
}
};
template <> struct Tex_J<4, float>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_Jf4, x, y);
}
static __host__ __forceinline__ void bindTexture_(PtrStepSz<float4>& J)
{
bindTexture(&tex_Jf4, J);
}
};
__device__ __forceinline__ void accum(float& dst,
float
val)
__device__ __forceinline__ void accum(float& dst,
const float&
val)
{
dst += val;
}
__device__ __forceinline__ void accum(float& dst, const float4& val)
__device__ __forceinline__ void accum(float& dst, const float2& val)
{
dst += val.x + val.y;
}
__device__ __forceinline__ void accum(float& dst, const float3& val)
{
dst += val.x + val.y + val.z;
}
__device__ __forceinline__ void accum(float& dst, const float4& val)
{
dst += val.x + val.y + val.z + val.w;
}
__device__ __forceinline__ float abs_(float a)
{
...
...
@@ -116,8 +305,46 @@ namespace pyrlk
{
return abs(a);
}
__device__ __forceinline__ float2 abs_(const float2& a)
{
return abs(a);
}
__device__ __forceinline__ float3 abs_(const float3& a)
{
return abs(a);
}
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
template<typename T> __device__ __forceinline__ typename TypeVec<float, 1>::vec_type ToFloat(const typename TypeVec<T, 1>::vec_type& other)
{
return other;
}
template<typename T> __device__ __forceinline__ typename TypeVec<float, 2>::vec_type ToFloat(const typename TypeVec<T, 2>::vec_type& other)
{
typename TypeVec<float, 2>::vec_type ret;
ret.x = other.x;
ret.y = other.y;
return ret;
}
template<typename T> __device__ __forceinline__ typename TypeVec<float, 3>::vec_type ToFloat(const typename TypeVec<T, 3>::vec_type& other)
{
typename TypeVec<float, 3>::vec_type ret;
ret.x = other.x;
ret.y = other.y;
ret.z = other.z;
return ret;
}
template<typename T> __device__ __forceinline__ typename TypeVec<float, 4>::vec_type ToFloat(const typename TypeVec<T, 4>::vec_type& other)
{
typename TypeVec<float, 4>::vec_type ret;
ret.x = other.x;
ret.y = other.y;
ret.z = other.z;
ret.w = other.w;
return ret;
}
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr, typename T>
__global__ void sparseKernel(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if __CUDA_ARCH__ <= 110
...
...
@@ -166,15 +393,15 @@ namespace pyrlk
float x = prevPt.x + xBase + 0.5f;
float y = prevPt.y + yBase + 0.5f;
I_patch[i][j] = Tex_I<cn>::read(x, y);
I_patch[i][j] = Tex_I<cn
, T
>::read(x, y);
// Sharr Deriv
work_type dIdx = 3.0f * Tex_I<cn
>::read(x+1, y-1) + 10.0f * Tex_I<cn>::read(x+1, y) + 3.0f * Tex_I<cn
>::read(x+1, y+1) -
(3.0f * Tex_I<cn
>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x-1, y) + 3.0f * Tex_I<cn
>::read(x-1, y+1));
work_type dIdx = 3.0f * Tex_I<cn
,T>::read(x+1, y-1) + 10.0f * Tex_I<cn, T>::read(x+1, y) + 3.0f * Tex_I<cn,T
>::read(x+1, y+1) -
(3.0f * Tex_I<cn
,T>::read(x-1, y-1) + 10.0f * Tex_I<cn, T>::read(x-1, y) + 3.0f * Tex_I<cn,T
>::read(x-1, y+1));
work_type dIdy = 3.0f * Tex_I<cn
>::read(x-1, y+1) + 10.0f * Tex_I<cn>::read(x, y+1) + 3.0f * Tex_I<cn
>::read(x+1, y+1) -
(3.0f * Tex_I<cn
>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x, y-1) + 3.0f * Tex_I<cn
>::read(x+1, y-1));
work_type dIdy = 3.0f * Tex_I<cn
,T>::read(x-1, y+1) + 10.0f * Tex_I<cn, T>::read(x, y+1) + 3.0f * Tex_I<cn,T
>::read(x+1, y+1) -
(3.0f * Tex_I<cn
,T>::read(x-1, y-1) + 10.0f * Tex_I<cn, T>::read(x, y-1) + 3.0f * Tex_I<cn,T
>::read(x+1, y-1));
dIdx_patch[i][j] = dIdx;
dIdy_patch[i][j] = dIdy;
...
...
@@ -243,7 +470,7 @@ namespace pyrlk
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
work_type I_val = I_patch[i][j];
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type J_val = Tex_J<cn
, T
>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type diff = (J_val - I_val) * 32.0f;
...
...
@@ -286,7 +513,7 @@ namespace pyrlk
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
work_type I_val = I_patch[i][j];
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type J_val = Tex_J<cn
, T
>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type diff = J_val - I_val;
...
...
@@ -309,22 +536,352 @@ namespace pyrlk
}
}
template <int cn, int PATCH_X, int PATCH_Y>
void sparse_caller(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream
)
// Kernel, uses non texture fetches
template <int PATCH_X, int PATCH_Y, bool calcErr, int cn, typename T, typename Ptr2D>
__global__ void sparseKernel_(Ptr2D I, Ptr2D J, const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols
)
{
dim3 grid(ptcount);
#if __CUDA_ARCH__ <= 110
const int BLOCK_SIZE = 128;
#else
const int BLOCK_SIZE = 256;
#endif
if (level == 0 && err)
sparseKernel<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
else
sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
__shared__ float smem1[BLOCK_SIZE];
__shared__ float smem2[BLOCK_SIZE];
__shared__ float smem3[BLOCK_SIZE];
cudaSafeCall( cudaGetLastError() );
const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
float2 prevPt = prevPts[blockIdx.x];
prevPt.x *= (1.0f / (1 << level));
prevPt.y *= (1.0f / (1 << level));
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
return;
}
prevPt.x -= c_halfWin_x;
prevPt.y -= c_halfWin_y;
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
typedef typename TypeVec<float, cn>::vec_type work_type;
work_type I_patch[PATCH_Y][PATCH_X];
work_type dIdx_patch[PATCH_Y][PATCH_X];
work_type dIdy_patch[PATCH_Y][PATCH_X];
for (int yBase = threadIdx.y, i = 0; yBase < c_winSize_y; yBase += blockDim.y, ++i)
{
for (int xBase = threadIdx.x, j = 0; xBase < c_winSize_x; xBase += blockDim.x, ++j)
{
float x = prevPt.x + xBase + 0.5f;
float y = prevPt.y + yBase + 0.5f;
I_patch[i][j] = ToFloat<T>(I(y, x));
// Sharr Deriv
work_type dIdx = 3.0f * I(y - 1, x + 1) + 10.0f * I(y, x + 1) + 3.0f * I(y + 1, x + 1) -
(3.0f * I(y - 1, x - 1) + 10.0f * I(y, x - 1) + 3.0f * I(y + 1 , x - 1));
work_type dIdy = 3.0f * I(y + 1, x - 1) + 10.0f * I(y + 1, x) + 3.0f * I(y+1, x + 1) -
(3.0f * I(y - 1, x - 1) + 10.0f * I(y-1, x) + 3.0f * I(y - 1, x + 1));
dIdx_patch[i][j] = dIdx;
dIdy_patch[i][j] = dIdy;
accum(A11, dIdx * dIdx);
accum(A12, dIdx * dIdy);
accum(A22, dIdy * dIdy);
}
}
reduce<BLOCK_SIZE>(smem_tuple(smem1, smem2, smem3), thrust::tie(A11, A12, A22), tid, thrust::make_tuple(plus<float>(), plus<float>(), plus<float>()));
#if __CUDA_ARCH__ >= 300
if (tid == 0)
{
smem1[0] = A11;
smem2[0] = A12;
smem3[0] = A22;
}
#endif
__syncthreads();
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
float D = A11 * A22 - A12 * A12;
if (D < numeric_limits<float>::epsilon())
{
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
return;
}
D = 1.f / D;
A11 *= D;
A12 *= D;
A22 *= D;
float2 nextPt = nextPts[blockIdx.x];
nextPt.x *= 2.f;
nextPt.y *= 2.f;
nextPt.x -= c_halfWin_x;
nextPt.y -= c_halfWin_y;
for (int k = 0; k < c_iters; ++k)
{
if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows)
{
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
return;
}
float b1 = 0;
float b2 = 0;
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
work_type I_val = I_patch[i][j];
work_type J_val = ToFloat<T>(J(nextPt.y + y + 0.5f, nextPt.x + x + 0.5f));
work_type diff = (J_val - I_val) * 32.0f;
accum(b1, diff * dIdx_patch[i][j]);
accum(b2, diff * dIdy_patch[i][j]);
}
}
reduce<BLOCK_SIZE>(smem_tuple(smem1, smem2), thrust::tie(b1, b2), tid, thrust::make_tuple(plus<float>(), plus<float>()));
#if __CUDA_ARCH__ >= 300
if (tid == 0)
{
smem1[0] = b1;
smem2[0] = b2;
}
#endif
__syncthreads();
b1 = smem1[0];
b2 = smem2[0];
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
nextPt.x += delta.x;
nextPt.y += delta.y;
if (::fabs(delta.x) < 0.01f && ::fabs(delta.y) < 0.01f)
break;
}
float errval = 0;
if (calcErr)
{
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
work_type I_val = I_patch[i][j];
work_type J_val = ToFloat<T>(J(nextPt.y + y + 0.5f, nextPt.x + x + 0.5f));
work_type diff = J_val - I_val;
accum(errval, abs_(diff));
}
}
reduce<BLOCK_SIZE>(smem1, errval, tid, plus<float>());
}
if (tid == 0)
{
nextPt.x += c_halfWin_x;
nextPt.y += c_halfWin_y;
nextPts[blockIdx.x] = nextPt;
if (calcErr)
err[blockIdx.x] = static_cast<float>(errval) / (3 * c_winSize_x * c_winSize_y);
}
} // __global__ void sparseKernel_
template <int cn, int PATCH_X, int PATCH_Y, typename T> class sparse_caller
{
public:
static void call(PtrStepSz<typename TypeVec<T, cn>::vec_type> I, PtrStepSz<typename TypeVec<T, cn>::vec_type> J, int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
(void)I;
(void)J;
if (level == 0 && err)
sparseKernel<cn, PATCH_X, PATCH_Y, true, T> <<<grid, block, 0, stream >>>(prevPts, nextPts, status, err, level, rows, cols);
else
sparseKernel<cn, PATCH_X, PATCH_Y, false, T> <<<grid, block, 0, stream >>>(prevPts, nextPts, status, err, level, rows, cols);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
};
// Specialization to use non texture path because for some reason the texture path keeps failing accuracy tests
template<int PATCH_X, int PATCH_Y> class sparse_caller<1, PATCH_X, PATCH_Y, unsigned short>
{
public:
typedef typename TypeVec<unsigned short, 1>::vec_type work_type;
typedef PtrStepSz<work_type> Ptr2D;
typedef BrdConstant<work_type> BrdType;
typedef BorderReader<Ptr2D, BrdType> Reader;
typedef LinearFilter<Reader> Filter;
static void call(Ptr2D I, Ptr2D J, int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (level == 0 && err)
{
sparseKernel_<PATCH_X, PATCH_Y, true, 1, unsigned short> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
else
{
sparseKernel_<PATCH_X, PATCH_Y, false, 1, unsigned short> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
};
// Specialization for int because the texture path keeps failing
template<int PATCH_X, int PATCH_Y> class sparse_caller<1, PATCH_X, PATCH_Y, int>
{
public:
typedef typename TypeVec<int, 1>::vec_type work_type;
typedef PtrStepSz<work_type> Ptr2D;
typedef BrdConstant<work_type> BrdType;
typedef BorderReader<Ptr2D, BrdType> Reader;
typedef LinearFilter<Reader> Filter;
static void call(Ptr2D I, Ptr2D J, int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (level == 0 && err)
{
sparseKernel_<PATCH_X, PATCH_Y, true, 1, int> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
else
{
sparseKernel_<PATCH_X, PATCH_Y, false, 1, int> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
};
template<int PATCH_X, int PATCH_Y> class sparse_caller<4, PATCH_X, PATCH_Y, int>
{
public:
typedef typename TypeVec<int, 4>::vec_type work_type;
typedef PtrStepSz<work_type> Ptr2D;
typedef BrdConstant<work_type> BrdType;
typedef BorderReader<Ptr2D, BrdType> Reader;
typedef LinearFilter<Reader> Filter;
static void call(Ptr2D I, Ptr2D J, int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (level == 0 && err)
{
sparseKernel_<PATCH_X, PATCH_Y, true, 4, int> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
else
{
sparseKernel_<PATCH_X, PATCH_Y, false, 4, int> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
};
using namespace cv::cuda::device;
template <int PATCH_X, int PATCH_Y, typename T> class sparse_caller<3, PATCH_X, PATCH_Y, T>
{
public:
typedef typename TypeVec<T, 3>::vec_type work_type;
typedef PtrStepSz<work_type> Ptr2D;
typedef BrdConstant<work_type> BrdType;
typedef BorderReader<Ptr2D, BrdType> Reader;
typedef LinearFilter<Reader> Filter;
static void call(Ptr2D I, Ptr2D J, int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (level == 0 && err)
{
sparseKernel_<PATCH_X, PATCH_Y, true, 3, T> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
else
{
sparseKernel_<PATCH_X, PATCH_Y, false, 3, T> <<<grid, block, 0, stream >>>(
Filter(Reader(I, BrdType(rows, cols))),
Filter(Reader(J, BrdType(rows, cols))),
prevPts, nextPts, status, err, level, rows, cols);
}
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
};
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <bool calcErr>
__global__ void denseKernel(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
...
...
@@ -484,77 +1041,72 @@ namespace pyrlk
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
}
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
template<typename T, int cn> struct pyrLK_caller
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
static void sparse(PtrStepSz<typename TypeVec<T, cn>::vec_type> I, PtrStepSz<typename TypeVec<T, cn>::vec_type> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
{sparse_caller<1, 1, 1>, sparse_caller<1, 2, 1>, sparse_caller<1, 3, 1>, sparse_caller<1, 4, 1>, sparse_caller<1, 5, 1>},
{sparse_caller<1, 1, 2>, sparse_caller<1, 2, 2>, sparse_caller<1, 3, 2>, sparse_caller<1, 4, 2>, sparse_caller<1, 5, 2>},
{sparse_caller<1, 1, 3>, sparse_caller<1, 2, 3>, sparse_caller<1, 3, 3>, sparse_caller<1, 4, 3>, sparse_caller<1, 5, 3>},
{sparse_caller<1, 1, 4>, sparse_caller<1, 2, 4>, sparse_caller<1, 3, 4>, sparse_caller<1, 4, 4>, sparse_caller<1, 5, 4>},
{sparse_caller<1, 1, 5>, sparse_caller<1, 2, 5>, sparse_caller<1, 3, 5>, sparse_caller<1, 4, 5>, sparse_caller<1, 5, 5>}
};
typedef void(*func_t)(PtrStepSz<typename TypeVec<T, cn>::vec_type> I, PtrStepSz<typename TypeVec<T, cn>::vec_type> J,
int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
bindTexture(&tex_If, I);
bindTexture(&tex_Jf, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
static const func_t funcs[5][5] =
{
{ sparse_caller<cn, 1, 1,T>::call, sparse_caller<cn, 2, 1,T>::call, sparse_caller<cn, 3, 1,T>::call, sparse_caller<cn, 4, 1,T>::call, sparse_caller<cn, 5, 1,T>::call },
{ sparse_caller<cn, 1, 2,T>::call, sparse_caller<cn, 2, 2,T>::call, sparse_caller<cn, 3, 2,T>::call, sparse_caller<cn, 4, 2,T>::call, sparse_caller<cn, 5, 2,T>::call },
{ sparse_caller<cn, 1, 3,T>::call, sparse_caller<cn, 2, 3,T>::call, sparse_caller<cn, 3, 3,T>::call, sparse_caller<cn, 4, 3,T>::call, sparse_caller<cn, 5, 3,T>::call },
{ sparse_caller<cn, 1, 4,T>::call, sparse_caller<cn, 2, 4,T>::call, sparse_caller<cn, 3, 4,T>::call, sparse_caller<cn, 4, 4,T>::call, sparse_caller<cn, 5, 4,T>::call },
{ sparse_caller<cn, 1, 5,T>::call, sparse_caller<cn, 2, 5,T>::call, sparse_caller<cn, 3, 5,T>::call, sparse_caller<cn, 4, 5,T>::call, sparse_caller<cn, 5, 5,T>::call }
};
Tex_I<cn, T>::bindTexture_(I);
Tex_J<cn, T>::bindTexture_(J);
funcs[patch.y - 1][patch.x - 1](I, J, I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
static void dense(PtrStepSzb I, PtrStepSz<T> J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream)
{
{sparse_caller<4, 1, 1>, sparse_caller<4, 2, 1>, sparse_caller<4, 3, 1>, sparse_caller<4, 4, 1>, sparse_caller<4, 5, 1>},
{sparse_caller<4, 1, 2>, sparse_caller<4, 2, 2>, sparse_caller<4, 3, 2>, sparse_caller<4, 4, 2>, sparse_caller<4, 5, 2>},
{sparse_caller<4, 1, 3>, sparse_caller<4, 2, 3>, sparse_caller<4, 3, 3>, sparse_caller<4, 4, 3>, sparse_caller<4, 5, 3>},
{sparse_caller<4, 1, 4>, sparse_caller<4, 2, 4>, sparse_caller<4, 3, 4>, sparse_caller<4, 4, 4>, sparse_caller<4, 5, 4>},
{sparse_caller<4, 1, 5>, sparse_caller<4, 2, 5>, sparse_caller<4, 3, 5>, sparse_caller<4, 4, 5>, sparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
bindTexture(&tex_Jf4, J);
dim3 block(16, 16);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
Tex_I<1, uchar>::bindTexture_(I);
Tex_J<1, T>::bindTexture_(J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
const int patchWidth = block.x + 2 * halfWin.x;
const int patchHeight = block.y + 2 * halfWin.y;
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream)
{
dim3 block(16, 16);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
if (err.data)
{
denseKernel<true> << <grid, block, smem_size, stream >> >(u, v, prevU, prevV, err, I.rows, I.cols);
cudaSafeCall(cudaGetLastError());
}
else
{
denseKernel<false> << <grid, block, smem_size, stream >> >(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
cudaSafeCall(cudaGetLastError());
}
bindTexture(&tex_Ib, I);
bindTexture(&tex_Jf, J);
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
};
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2)
;
const int patchWidth = block.x + 2 * halfWin.x
;
const int patchHeight = block.y + 2 * halfWin.y
;
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int)
;
template class pyrLK_caller<unsigned char,1>
;
template class pyrLK_caller<unsigned short,1>
;
template class pyrLK_caller<int,1>
;
template class pyrLK_caller<float,1>
;
if (err.data)
{
denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
else
{
denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
template class pyrLK_caller<unsigned char, 3>;
template class pyrLK_caller<unsigned short, 3>;
template class pyrLK_caller<int, 3>;
template class pyrLK_caller<float, 3>;
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template class pyrLK_caller<unsigned char, 4>;
template class pyrLK_caller<unsigned short, 4>;
template class pyrLK_caller<int, 4>;
template class pyrLK_caller<float, 4>;
}
#endif /* CUDA_DISABLER */
#endif /* CUDA_DISABLER */
\ No newline at end of file
modules/cudaoptflow/src/precomp.hpp
View file @
34567877
...
...
@@ -52,7 +52,7 @@
#include "opencv2/video.hpp"
#include "opencv2/core/private.cuda.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDALEGACY
...
...
modules/cudaoptflow/src/pyrlk.cpp
View file @
34567877
...
...
@@ -56,14 +56,20 @@ Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size, int, in
namespace
pyrlk
{
void
loadConstants
(
int2
winSize
,
int
iters
,
cudaStream_t
stream
);
template
<
typename
T
,
int
cn
>
struct
pyrLK_caller
{
static
void
sparse
(
PtrStepSz
<
typename
device
::
TypeVec
<
T
,
cn
>::
vec_type
>
I
,
PtrStepSz
<
typename
device
::
TypeVec
<
T
,
cn
>::
vec_type
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
);
void
sparse1
(
PtrStepSzf
I
,
PtrStepSzf
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
);
void
sparse4
(
PtrStepSz
<
float4
>
I
,
PtrStepSz
<
float4
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
);
static
void
dense
(
PtrStepSzb
I
,
PtrStepSzf
J
,
PtrStepSzf
u
,
PtrStepSzf
v
,
PtrStepSzf
prevU
,
PtrStepSzf
prevV
,
PtrStepSzf
err
,
int2
winSize
,
cudaStream_t
stream
);
};
void
dense
(
PtrStepSzb
I
,
PtrStepSzf
J
,
PtrStepSzf
u
,
PtrStepSzf
v
,
PtrStepSzf
prevU
,
PtrStepSzf
prevV
,
PtrStepSzf
err
,
int2
winSize
,
cudaStream_t
stream
);
template
<
typename
T
,
int
cn
>
void
dispatcher
(
GpuMat
I
,
GpuMat
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
)
{
pyrLK_caller
<
T
,
cn
>::
sparse
(
I
,
J
,
prevPts
,
nextPts
,
status
,
err
,
ptcount
,
level
,
block
,
patch
,
stream
);
}
}
namespace
...
...
@@ -76,6 +82,9 @@ namespace
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
);
void
sparse
(
std
::
vector
<
GpuMat
>&
prevPyr
,
std
::
vector
<
GpuMat
>&
nextPyr
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
);
void
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
Stream
&
stream
);
protected
:
...
...
@@ -83,8 +92,9 @@ namespace
int
maxLevel_
;
int
iters_
;
bool
useInitialFlow_
;
void
buildImagePyramid
(
const
GpuMat
&
prevImg
,
std
::
vector
<
GpuMat
>&
prevPyr
,
const
GpuMat
&
nextImg
,
std
::
vector
<
GpuMat
>&
nextPyr
,
Stream
stream
);
private
:
friend
class
SparsePyrLKOpticalFlowImpl
;
std
::
vector
<
GpuMat
>
prevPyr_
;
std
::
vector
<
GpuMat
>
nextPyr_
;
};
...
...
@@ -113,28 +123,34 @@ namespace
block
.
z
=
patch
.
z
=
1
;
}
void
PyrLKOpticalFlowBase
::
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
)
void
PyrLKOpticalFlowBase
::
buildImagePyramid
(
const
GpuMat
&
prevImg
,
std
::
vector
<
GpuMat
>&
prevPyr
,
const
GpuMat
&
nextImg
,
std
::
vector
<
GpuMat
>&
nextPyr
,
Stream
stream
)
{
if
(
prevPts
.
empty
())
{
nextPts
.
release
();
status
.
release
();
if
(
err
)
err
->
release
();
return
;
}
prevPyr
.
resize
(
maxLevel_
+
1
);
nextPyr
.
resize
(
maxLevel_
+
1
);
dim3
block
,
patch
;
calcPatchSize
(
winSize_
,
block
,
patch
);
int
cn
=
prevImg
.
channels
();
CV_Assert
(
prevImg
.
channels
()
==
1
||
prevImg
.
channels
()
==
3
||
prevImg
.
channels
()
==
4
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
()
);
CV_Assert
(
maxLevel_
>=
0
);
CV_Assert
(
winSize_
.
width
>
2
&&
winSize_
.
height
>
2
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
CV_Assert
(
cn
==
1
||
cn
==
3
||
cn
==
4
);
prevPyr
[
0
]
=
prevImg
;
nextPyr
[
0
]
=
nextImg
;
for
(
int
level
=
1
;
level
<=
maxLevel_
;
++
level
)
{
cuda
::
pyrDown
(
prevPyr
[
level
-
1
],
prevPyr
[
level
],
stream
);
cuda
::
pyrDown
(
nextPyr
[
level
-
1
],
nextPyr
[
level
],
stream
);
}
}
void
PyrLKOpticalFlowBase
::
sparse
(
std
::
vector
<
GpuMat
>&
prevPyr
,
std
::
vector
<
GpuMat
>&
nextPyr
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
)
{
CV_Assert
(
prevPyr
.
size
()
&&
nextPyr
.
size
()
&&
"Pyramid needs to at least contain the original matrix as the first element"
);
CV_Assert
(
prevPyr
[
0
].
size
()
==
nextPyr
[
0
].
size
());
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
CV_Assert
(
maxLevel_
>=
0
);
CV_Assert
(
winSize_
.
width
>
2
&&
winSize_
.
height
>
2
);
if
(
useInitialFlow_
)
CV_Assert
(
nextPts
.
size
()
==
prevPts
.
size
()
&&
nextPts
.
type
()
==
prevPts
.
type
()
);
CV_Assert
(
nextPts
.
size
()
==
prevPts
.
size
()
&&
nextPts
.
type
()
==
prevPts
.
type
()
);
else
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
prevPts
.
type
(),
nextPts
);
...
...
@@ -142,66 +158,70 @@ namespace
GpuMat
temp2
=
nextPts
.
reshape
(
1
);
cuda
::
multiply
(
temp1
,
Scalar
::
all
(
1.0
/
(
1
<<
maxLevel_
)
/
2.0
),
temp2
,
1
,
-
1
,
stream
);
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_8UC1
,
status
);
status
.
setTo
(
Scalar
::
all
(
1
),
stream
);
if
(
err
)
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
// build the image pyramids.
if
(
prevPyr
.
size
()
!=
size_t
(
maxLevel_
+
1
)
||
nextPyr
.
size
()
!=
size_t
(
maxLevel_
+
1
))
{
buildImagePyramid
(
prevPyr
[
0
],
prevPyr
,
nextPyr
[
0
],
nextPyr
,
stream
);
}
BufferPool
pool
(
stream
);
dim3
block
,
patch
;
calcPatchSize
(
winSize_
,
block
,
patch
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
pyrlk
::
loadConstants
(
make_int2
(
winSize_
.
width
,
winSize_
.
height
),
iters_
,
StreamAccessor
::
getStream
(
stream
));
prevPyr_
.
resize
(
maxLevel_
+
1
);
nextPyr_
.
resize
(
maxLevel_
+
1
);
const
int
cn
=
prevPyr
[
0
].
channels
(
);
const
int
type
=
prevPyr
[
0
].
depth
(
);
int
cn
=
prevImg
.
channels
();
typedef
void
(
*
func_t
)(
GpuMat
I
,
GpuMat
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
);
if
(
cn
==
1
||
cn
==
4
)
// Current int datatype is disabled due to pyrDown not implementing it
// while ushort does work, it has significantly worse performance, and thus doesn't pass accuracy tests.
static
const
func_t
funcs
[
6
][
4
]
=
{
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
,
stream
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
,
stream
);
}
else
{
GpuMat
buf
=
pool
.
getBuffer
(
prevImg
.
size
(),
CV_MAKE_TYPE
(
prevImg
.
depth
(),
4
));
cuda
::
cvtColor
(
prevImg
,
buf
,
COLOR_BGR2BGRA
,
0
,
stream
);
buf
.
convertTo
(
prevPyr_
[
0
],
CV_32F
,
stream
);
{
pyrlk
::
dispatcher
<
uchar
,
1
>
,
/*pyrlk::dispatcher<uchar, 2>*/
0
,
pyrlk
::
dispatcher
<
uchar
,
3
>
,
pyrlk
::
dispatcher
<
uchar
,
4
>
},
{
/*pyrlk::dispatcher<char, 1>*/
0
,
/*pyrlk::dispatcher<char, 2>*/
0
,
/*pyrlk::dispatcher<char, 3>*/
0
,
/*pyrlk::dispatcher<char, 4>*/
0
},
{
pyrlk
::
dispatcher
<
ushort
,
1
>
,
/*pyrlk::dispatcher<ushort, 2>*/
0
,
pyrlk
::
dispatcher
<
ushort
,
3
>
,
pyrlk
::
dispatcher
<
ushort
,
4
>
},
{
/*pyrlk::dispatcher<short, 1>*/
0
,
/*pyrlk::dispatcher<short, 2>*/
0
,
/*pyrlk::dispatcher<short, 3>*/
0
,
/*pyrlk::dispatcher<short, 4>*/
0
},
{
pyrlk
::
dispatcher
<
int
,
1
>
,
/*pyrlk::dispatcher<int, 2>*/
0
,
pyrlk
::
dispatcher
<
int
,
3
>
,
pyrlk
::
dispatcher
<
int
,
4
>
},
{
pyrlk
::
dispatcher
<
float
,
1
>
,
/*pyrlk::dispatcher<float, 2>*/
0
,
pyrlk
::
dispatcher
<
float
,
3
>
,
pyrlk
::
dispatcher
<
float
,
4
>
}
};
cuda
::
cvtColor
(
nextImg
,
buf
,
COLOR_BGR2BGRA
,
0
,
stream
);
buf
.
convertTo
(
nextPyr_
[
0
],
CV_32F
,
stream
);
func_t
func
=
funcs
[
type
][
cn
-
1
];
CV_Assert
(
func
!=
NULL
&&
"Datatype not implemented"
);
for
(
int
level
=
maxLevel_
;
level
>=
0
;
level
--
)
{
func
(
prevPyr
[
level
],
nextPyr
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
,
StreamAccessor
::
getStream
(
stream
));
}
}
for
(
int
level
=
1
;
level
<=
maxLevel_
;
++
level
)
void
PyrLKOpticalFlowBase
::
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
)
{
if
(
prevPts
.
empty
())
{
cuda
::
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
],
stream
);
cuda
::
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
],
stream
);
nextPts
.
release
();
status
.
release
();
if
(
err
)
err
->
release
();
return
;
}
CV_Assert
(
prevImg
.
channels
()
==
1
||
prevImg
.
channels
()
==
3
||
prevImg
.
channels
()
==
4
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
()
);
pyrlk
::
loadConstants
(
make_int2
(
winSize_
.
width
,
winSize_
.
height
),
iters_
,
StreamAccessor
::
getStream
(
stream
));
// build the image pyramids.
buildImagePyramid
(
prevImg
,
prevPyr_
,
nextImg
,
nextPyr_
,
stream
);
sparse
(
prevPyr_
,
nextPyr_
,
prevPts
,
nextPts
,
status
,
err
,
stream
);
for
(
int
level
=
maxLevel_
;
level
>=
0
;
level
--
)
{
if
(
cn
==
1
)
{
pyrlk
::
sparse1
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
,
StreamAccessor
::
getStream
(
stream
));
}
else
{
pyrlk
::
sparse4
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
,
StreamAccessor
::
getStream
(
stream
));
}
}
}
void
PyrLKOpticalFlowBase
::
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
Stream
&
stream
)
...
...
@@ -250,7 +270,7 @@ namespace
{
int
idx2
=
(
idx
+
1
)
&
1
;
pyrlk
::
dense
(
prevPyr_
[
level
],
nextPyr_
[
level
],
pyrlk
::
pyrLK_caller
<
float
,
1
>::
dense
(
prevPyr_
[
level
],
nextPyr_
[
level
],
uPyr
[
idx
],
vPyr
[
idx
],
uPyr
[
idx2
],
vPyr
[
idx2
],
PtrStepSzf
(),
winSize2i
,
StreamAccessor
::
getStream
(
stream
));
...
...
@@ -289,14 +309,23 @@ namespace
OutputArray
_err
,
Stream
&
stream
)
{
const
GpuMat
prevImg
=
_prevImg
.
getGpuMat
();
const
GpuMat
nextImg
=
_nextImg
.
getGpuMat
();
const
GpuMat
prevPts
=
_prevPts
.
getGpuMat
();
GpuMat
&
nextPts
=
_nextPts
.
getGpuMatRef
();
GpuMat
&
status
=
_status
.
getGpuMatRef
();
GpuMat
*
err
=
_err
.
needed
()
?
&
(
_err
.
getGpuMatRef
())
:
NULL
;
sparse
(
prevImg
,
nextImg
,
prevPts
,
nextPts
,
status
,
err
,
stream
);
if
(
_prevImg
.
kind
()
==
_InputArray
::
STD_VECTOR_CUDA_GPU_MAT
&&
_prevImg
.
kind
()
==
_InputArray
::
STD_VECTOR_CUDA_GPU_MAT
)
{
std
::
vector
<
GpuMat
>
prevPyr
,
nextPyr
;
_prevImg
.
getGpuMatVector
(
prevPyr
);
_nextImg
.
getGpuMatVector
(
nextPyr
);
sparse
(
prevPyr
,
nextPyr
,
prevPts
,
nextPts
,
status
,
err
,
stream
);
}
else
{
const
GpuMat
prevImg
=
_prevImg
.
getGpuMat
();
const
GpuMat
nextImg
=
_nextImg
.
getGpuMat
();
sparse
(
prevImg
,
nextImg
,
prevPts
,
nextPts
,
status
,
err
,
stream
);
}
}
};
...
...
@@ -347,4 +376,4 @@ Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size winSize,
return
makePtr
<
DensePyrLKOpticalFlowImpl
>
(
winSize
,
maxLevel
,
iters
,
useInitialFlow
);
}
#endif
/* !defined (HAVE_CUDA) */
#endif
/* !defined (HAVE_CUDA) */
\ No newline at end of file
modules/cudaoptflow/test/test_optflow.cpp
View file @
34567877
...
...
@@ -167,33 +167,34 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, BroxOpticalFlow, ALL_DEVICES);
namespace
{
IMPLEMENT_PARAM_CLASS
(
UseGray
,
bool
)
IMPLEMENT_PARAM_CLASS
(
Chan
,
int
)
IMPLEMENT_PARAM_CLASS
(
DataType
,
int
)
}
PARAM_TEST_CASE
(
PyrLKOpticalFlow
,
cv
::
cuda
::
DeviceInfo
,
UseGray
)
PARAM_TEST_CASE
(
PyrLKOpticalFlow
,
cv
::
cuda
::
DeviceInfo
,
Chan
,
DataType
)
{
cv
::
cuda
::
DeviceInfo
devInfo
;
bool
useGray
;
int
channels
;
int
dataType
;
virtual
void
SetUp
()
{
devInfo
=
GET_PARAM
(
0
);
useGray
=
GET_PARAM
(
1
);
channels
=
GET_PARAM
(
1
);
dataType
=
GET_PARAM
(
2
);
cv
::
cuda
::
setDevice
(
devInfo
.
deviceID
());
}
};
CUDA_TEST_P
(
PyrLKOpticalFlow
,
Sparse
)
{
cv
::
Mat
frame0
=
readImage
(
"opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
cv
::
Mat
frame0
=
readImage
(
"opticalflow/frame0.png"
,
channels
==
1
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame0
.
empty
());
cv
::
Mat
frame1
=
readImage
(
"opticalflow/frame1.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
cv
::
Mat
frame1
=
readImage
(
"opticalflow/frame1.png"
,
channels
==
1
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame1
.
empty
());
cv
::
Mat
gray_frame
;
if
(
useGray
)
if
(
channels
==
1
)
gray_frame
=
frame0
;
else
cv
::
cvtColor
(
frame0
,
gray_frame
,
cv
::
COLOR_BGR2GRAY
);
...
...
@@ -208,22 +209,32 @@ CUDA_TEST_P(PyrLKOpticalFlow, Sparse)
cv
::
Ptr
<
cv
::
cuda
::
SparsePyrLKOpticalFlow
>
pyrLK
=
cv
::
cuda
::
SparsePyrLKOpticalFlow
::
create
();
std
::
vector
<
cv
::
Point2f
>
nextPts_gold
;
std
::
vector
<
unsigned
char
>
status_gold
;
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
cv
::
noArray
());
cv
::
cuda
::
GpuMat
d_nextPts
;
cv
::
cuda
::
GpuMat
d_status
;
pyrLK
->
calc
(
loadMat
(
frame0
),
loadMat
(
frame1
),
d_pts
,
d_nextPts
,
d_status
);
cv
::
Mat
converted0
,
converted1
;
if
(
channels
==
4
)
{
cv
::
cvtColor
(
frame0
,
frame0
,
cv
::
COLOR_BGR2BGRA
);
cv
::
cvtColor
(
frame1
,
frame1
,
cv
::
COLOR_BGR2BGRA
);
}
frame0
.
convertTo
(
converted0
,
dataType
);
frame1
.
convertTo
(
converted1
,
dataType
);
pyrLK
->
calc
(
loadMat
(
converted0
),
loadMat
(
converted1
),
d_pts
,
d_nextPts
,
d_status
);
std
::
vector
<
cv
::
Point2f
>
nextPts
(
d_nextPts
.
cols
);
cv
::
Mat
nextPts_mat
(
1
,
d_nextPts
.
cols
,
CV_32FC2
,
(
void
*
)
&
nextPts
[
0
]);
cv
::
Mat
nextPts_mat
(
1
,
d_nextPts
.
cols
,
CV_32FC2
,
(
void
*
)
&
nextPts
[
0
]);
d_nextPts
.
download
(
nextPts_mat
);
std
::
vector
<
unsigned
char
>
status
(
d_status
.
cols
);
cv
::
Mat
status_mat
(
1
,
d_status
.
cols
,
CV_8UC1
,
(
void
*
)
&
status
[
0
]);
cv
::
Mat
status_mat
(
1
,
d_status
.
cols
,
CV_8UC1
,
(
void
*
)
&
status
[
0
]);
d_status
.
download
(
status_mat
);
std
::
vector
<
cv
::
Point2f
>
nextPts_gold
;
std
::
vector
<
unsigned
char
>
status_gold
;
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
cv
::
noArray
());
ASSERT_EQ
(
nextPts_gold
.
size
(),
nextPts
.
size
());
ASSERT_EQ
(
status_gold
.
size
(),
status
.
size
());
...
...
@@ -251,11 +262,16 @@ CUDA_TEST_P(PyrLKOpticalFlow, Sparse)
double
bad_ratio
=
static_cast
<
double
>
(
mistmatch
)
/
nextPts
.
size
();
ASSERT_LE
(
bad_ratio
,
0.01
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_OptFlow
,
PyrLKOpticalFlow
,
testing
::
Combine
(
ALL_DEVICES
,
testing
::
Values
(
UseGray
(
true
),
UseGray
(
false
))));
testing
::
Values
(
Chan
(
1
),
Chan
(
3
),
Chan
(
4
)),
testing
::
Values
(
DataType
(
CV_8U
),
DataType
(
CV_16U
),
DataType
(
CV_32S
),
DataType
(
CV_32F
))));
//////////////////////////////////////////////////////
// FarnebackOpticalFlow
...
...
@@ -385,4 +401,4 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine(
ALL_DEVICES
,
testing
::
Values
(
Gamma
(
0.0
),
Gamma
(
1.0
))));
#endif // HAVE_CUDA
#endif // HAVE_CUDA
\ No newline at end of file
modules/cudawarping/src/cuda/pyr_down.cu
View file @
34567877
...
...
@@ -212,10 +212,10 @@ namespace cv { namespace cuda { namespace device
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//
template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//
template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//
template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
...
...
@@ -225,4 +225,4 @@ namespace cv { namespace cuda { namespace device
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */
#endif /* CUDA_DISABLER */
\ No newline at end of file
modules/cudawarping/src/pyramids.cpp
View file @
34567877
...
...
@@ -74,7 +74,7 @@ void cv::cuda::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
{
0
/*pyrDown_gpu<schar>*/
,
0
/*pyrDown_gpu<schar2>*/
,
0
/*pyrDown_gpu<schar3>*/
,
0
/*pyrDown_gpu<schar4>*/
},
{
pyrDown_gpu
<
ushort
>
,
0
/*pyrDown_gpu<ushort2>*/
,
pyrDown_gpu
<
ushort3
>
,
pyrDown_gpu
<
ushort4
>
},
{
pyrDown_gpu
<
short
>
,
0
/*pyrDown_gpu<short2>*/
,
pyrDown_gpu
<
short3
>
,
pyrDown_gpu
<
short4
>
},
{
0
/*pyrDown_gpu<int>*/
,
0
/*pyrDown_gpu<int2>*/
,
0
/*pyrDown_gpu<int3>*/
,
0
/*pyrDown_gpu<int4>*/
},
{
pyrDown_gpu
<
int
>
,
0
/*pyrDown_gpu<int2>*/
,
pyrDown_gpu
<
int3
>
,
pyrDown_gpu
<
int4
>
},
{
pyrDown_gpu
<
float
>
,
0
/*pyrDown_gpu<float2>*/
,
pyrDown_gpu
<
float3
>
,
pyrDown_gpu
<
float4
>
}
};
...
...
@@ -131,4 +131,4 @@ void cv::cuda::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
func
(
src
,
dst
,
StreamAccessor
::
getStream
(
stream
));
}
#endif
#endif
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment