Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6a9f9342
Commit
6a9f9342
authored
Apr 28, 2012
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed bug #1759
parent
40d8d11f
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
72 additions
and
8 deletions
+72
-8
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+6
-0
optical_flow_farneback.cu
modules/gpu/src/cuda/optical_flow_farneback.cu
+37
-4
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+12
-0
optical_flow_farneback.cpp
modules/gpu/src/optical_flow_farneback.cpp
+12
-0
pyrlk.cpp
modules/gpu/src/pyrlk.cpp
+5
-4
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
6a9f9342
...
...
@@ -1750,6 +1750,7 @@ public:
useInitialFlow
=
false
;
minEigThreshold
=
1e-4
f
;
getMinEigenVals
=
false
;
isDeviceArch11_
=
!
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
}
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
...
...
@@ -1796,6 +1797,8 @@ private:
vector
<
GpuMat
>
uPyr_
;
vector
<
GpuMat
>
vPyr_
;
bool
isDeviceArch11_
;
};
...
...
@@ -1812,6 +1815,7 @@ public:
polyN
=
5
;
polySigma
=
1.1
;
flags
=
0
;
isDeviceArch11_
=
!
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
}
int
numLevels
;
...
...
@@ -1859,6 +1863,8 @@ private:
GpuMat
frames_
[
2
];
GpuMat
pyrLevel_
[
2
],
M_
,
bufM_
,
R_
[
2
],
blurredFrame_
[
2
];
std
::
vector
<
GpuMat
>
pyramid0_
,
pyramid1_
;
bool
isDeviceArch11_
;
};
...
...
modules/gpu/src/cuda/optical_flow_farneback.cu
View file @
6a9f9342
...
...
@@ -433,6 +433,25 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void boxFilter5Gpu_CC11(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
dim3 block(128);
dim3 grid(divUp(width, block.x), divUp(height, block.y));
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
__constant__ float c_gKer[MAX_KSIZE_HALF + 1];
template <typename Border>
...
...
@@ -575,14 +594,14 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
template <typename Border>
template <typename Border
, int blockDimX
>
void gaussianBlur5Caller(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
dim3 block(
256
);
dim3 block(
blockDimX
);
dim3 grid(divUp(width, block.x), divUp(height, block.y));
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
Border b(height, width);
...
...
@@ -603,8 +622,22 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
static const caller_t callers[] =
{
gaussianBlur5Caller<BrdReflect101<float> >,
gaussianBlur5Caller<BrdReplicate<float> >,
gaussianBlur5Caller<BrdReflect101<float>,256>,
gaussianBlur5Caller<BrdReplicate<float>,256>,
};
callers[borderMode](src, ksizeHalf, dst, stream);
}
void gaussianBlur5Gpu_CC11(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
static const caller_t callers[] =
{
gaussianBlur5Caller<BrdReflect101<float>,128>,
gaussianBlur5Caller<BrdReplicate<float>,128>,
};
callers[borderMode](src, ksizeHalf, dst, stream);
...
...
modules/gpu/src/cuda/pyrlk.cu
View file @
6a9f9342
...
...
@@ -181,6 +181,7 @@ namespace cv { namespace gpu { namespace device
smem3[tid] = val3;
__syncthreads();
#if __CUDA_ARCH__ > 110
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
...
...
@@ -188,6 +189,7 @@ namespace cv { namespace gpu { namespace device
smem3[tid] = val3 += smem3[tid + 128];
}
__syncthreads();
#endif
if (tid < 64)
{
...
...
@@ -235,12 +237,14 @@ namespace cv { namespace gpu { namespace device
smem2[tid] = val2;
__syncthreads();
#if __CUDA_ARCH__ > 110
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
}
__syncthreads();
#endif
if (tid < 64)
{
...
...
@@ -279,11 +283,13 @@ namespace cv { namespace gpu { namespace device
smem1[tid] = val1;
__syncthreads();
#if __CUDA_ARCH__ > 110
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
}
__syncthreads();
#endif
if (tid < 64)
{
...
...
@@ -310,9 +316,15 @@ namespace cv { namespace gpu { namespace device
__global__ void lkSparse(const PtrStepb I, const PtrStepb J, const PtrStep<short> dIdx, const PtrStep<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if __CUDA_ARCH__ <= 110
__shared__ float smem1[128];
__shared__ float smem2[128];
__shared__ float smem3[128];
#else
__shared__ float smem1[256];
__shared__ float smem2[256];
__shared__ float smem3[256];
#endif
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
...
...
modules/gpu/src/optical_flow_farneback.cpp
View file @
6a9f9342
...
...
@@ -81,6 +81,8 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
void
boxFilter5Gpu
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
cudaStream_t
stream
);
void
boxFilter5Gpu_CC11
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
cudaStream_t
stream
);
void
setGaussianBlurKernel
(
const
float
*
gKer
,
int
ksizeHalf
);
void
gaussianBlurGpu
(
...
...
@@ -89,6 +91,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
void
gaussianBlur5Gpu
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
int
borderType
,
cudaStream_t
stream
);
void
gaussianBlur5Gpu_CC11
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
int
borderType
,
cudaStream_t
stream
);
}}}}
// namespace cv { namespace gpu { namespace device { namespace optflow_farneback
...
...
@@ -167,7 +172,10 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_boxFilter(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
if
(
!
isDeviceArch11_
)
device
::
optflow_farneback
::
boxFilter5Gpu
(
M
,
blockSize
/
2
,
bufM
,
S
(
streams
[
0
]));
else
device
::
optflow_farneback
::
boxFilter5Gpu_CC11
(
M
,
blockSize
/
2
,
bufM
,
S
(
streams
[
0
]));
swap
(
M
,
bufM
);
for
(
int
i
=
1
;
i
<
5
;
++
i
)
...
...
@@ -183,8 +191,12 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
if
(
!
isDeviceArch11_
)
device
::
optflow_farneback
::
gaussianBlur5Gpu
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE_GPU
,
S
(
streams
[
0
]));
else
device
::
optflow_farneback
::
gaussianBlur5Gpu_CC11
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE_GPU
,
S
(
streams
[
0
]));
swap
(
M
,
bufM
);
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
S
(
streams
[
0
]));
...
...
modules/gpu/src/pyrlk.cpp
View file @
6a9f9342
...
...
@@ -126,18 +126,19 @@ void cv::gpu::PyrLKOpticalFlow::buildImagePyramid(const GpuMat& img0, vector<Gpu
namespace
{
void
calcPatchSize
(
cv
::
Size
winSize
,
int
cn
,
dim3
&
block
,
dim3
&
patch
)
void
calcPatchSize
(
cv
::
Size
winSize
,
int
cn
,
dim3
&
block
,
dim3
&
patch
,
bool
isDeviceArch11
)
{
winSize
.
width
*=
cn
;
if
(
winSize
.
width
>
32
&&
winSize
.
width
>
2
*
winSize
.
height
)
{
block
.
x
=
32
;
block
.
x
=
isDeviceArch11
?
16
:
32
;
block
.
y
=
8
;
}
else
{
block
.
x
=
block
.
y
=
16
;
block
.
x
=
16
;
block
.
y
=
isDeviceArch11
?
8
:
16
;
}
patch
.
x
=
(
winSize
.
width
+
block
.
x
-
1
)
/
block
.
x
;
...
...
@@ -166,7 +167,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
const
int
cn
=
prevImg
.
channels
();
dim3
block
,
patch
;
calcPatchSize
(
winSize
,
cn
,
block
,
patch
);
calcPatchSize
(
winSize
,
cn
,
block
,
patch
,
isDeviceArch11_
);
CV_Assert
(
derivLambda
>=
0
);
CV_Assert
(
maxLevel
>=
0
&&
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment