Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
681ac9be
Commit
681ac9be
authored
Feb 16, 2012
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added missing files
parent
5c459aa8
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1013 additions
and
0 deletions
+1013
-0
optical_flow_farneback.cu
modules/gpu/src/cuda/optical_flow_farneback.cu
+614
-0
optical_flow_farneback.cpp
modules/gpu/src/optical_flow_farneback.cpp
+399
-0
No files found.
modules/gpu/src/cuda/optical_flow_farneback.cu
0 → 100644
View file @
681ac9be
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <stdio.h>
#include "internal_shared.hpp"
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#define tx threadIdx.x
#define ty threadIdx.y
#define bx blockIdx.x
#define by blockIdx.y
#define bdx blockDim.x
#define bdy blockDim.y
#define BORDER_SIZE 5
#define MAX_KSIZE_HALF 100
using namespace std;
namespace cv { namespace gpu { namespace device { namespace optflow_farneback
{
__constant__ float c_g[8];
__constant__ float c_xg[8];
__constant__ float c_xxg[8];
__constant__ float c_ig11, c_ig03, c_ig33, c_ig55;
template <int polyN>
__global__ void polynomialExpansion(
const int height, const int width, const PtrStepf src, PtrStepf dst)
{
const int y = by * bdy + ty;
const int x = bx * (bdx - 2*polyN) + tx - polyN;
if (y < height)
{
extern __shared__ float smem[];
volatile float *row = smem + tx;
int xWarped = ::min(::max(x, 0), width - 1);
row[0] = src(y, xWarped) * c_g[0];
row[bdx] = 0.f;
row[2*bdx] = 0.f;
for (int k = 1; k <= polyN; ++k)
{
float t0 = src(::max(y - k, 0), xWarped);
float t1 = src(::min(y + k, height - 1), xWarped);
row[0] += c_g[k] * (t0 + t1);
row[bdx] += c_xg[k] * (t1 - t0);
row[2*bdx] += c_xxg[k] * (t0 + t1);
}
__syncthreads();
if (tx >= polyN && tx + polyN < bdx && x < width)
{
float b1 = c_g[0] * row[0];
float b3 = c_g[0] * row[bdx];
float b5 = c_g[0] * row[2*bdx];
float b2 = 0, b4 = 0, b6 = 0;
for (int k = 1; k <= polyN; ++k)
{
b1 += (row[k] + row[-k]) * c_g[k];
b4 += (row[k] + row[-k]) * c_xxg[k];
b2 += (row[k] - row[-k]) * c_xg[k];
b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
}
dst(y, xWarped) = b3*c_ig11;
dst(height + y, xWarped) = b2*c_ig11;
dst(2*height + y, xWarped) = b1*c_ig03 + b5*c_ig33;
dst(3*height + y, xWarped) = b1*c_ig03 + b4*c_ig33;
dst(4*height + y, xWarped) = b6*c_ig55;
}
}
}
void setPolinomialExpansionConsts(
int polyN, const float *g, const float *xg, const float *xxg,
float ig11, float ig03, float ig33, float ig55)
{
cudaSafeCall(cudaMemcpyToSymbol(c_g, g, (polyN + 1) * sizeof(*g)));
cudaSafeCall(cudaMemcpyToSymbol(c_xg, xg, (polyN + 1) * sizeof(*xg)));
cudaSafeCall(cudaMemcpyToSymbol(c_xxg, xxg, (polyN + 1) * sizeof(*xxg)));
cudaSafeCall(cudaMemcpyToSymbol(c_ig11, &ig11, sizeof(ig11)));
cudaSafeCall(cudaMemcpyToSymbol(c_ig03, &ig03, sizeof(ig03)));
cudaSafeCall(cudaMemcpyToSymbol(c_ig33, &ig33, sizeof(ig33)));
cudaSafeCall(cudaMemcpyToSymbol(c_ig55, &ig55, sizeof(ig55)));
}
void polynomialExpansionGpu(const DevMem2Df &src, int polyN, DevMem2Df dst, cudaStream_t stream)
{
dim3 block(256);
dim3 grid(divUp(src.cols, block.x - 2*polyN), src.rows);
int smem = 3 * block.x * sizeof(float);
if (polyN == 5)
polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
else if (polyN == 7)
polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
__constant__ float c_border[BORDER_SIZE + 1];
__global__ void updateMatrices(
const int height, const int width, const PtrStepf flowx, const PtrStepf flowy,
const PtrStepf R0, const PtrStepf R1, PtrStepf M)
{
const int y = by * bdy + ty;
const int x = bx * bdx + tx;
if (y < height && x < width)
{
float dx = flowx(y, x);
float dy = flowy(y, x);
float fx = x + dx;
float fy = y + dy;
int x1 = floorf(fx);
int y1 = floorf(fy);
fx -= x1; fy -= y1;
float r2, r3, r4, r5, r6;
if (x1 >= 0 && y1 >= 0 && x1 < width - 1 && y1 < height - 1)
{
float a00 = (1.f - fx) * (1.f - fy);
float a01 = fx * (1.f - fy);
float a10 = (1.f - fx) * fy;
float a11 = fx * fy;
r2 = a00 * R1(y1, x1) +
a01 * R1(y1, x1 + 1) +
a10 * R1(y1 + 1, x1) +
a11 * R1(y1 + 1, x1 + 1);
r3 = a00 * R1(height + y1, x1) +
a01 * R1(height + y1, x1 + 1) +
a10 * R1(height + y1 + 1, x1) +
a11 * R1(height + y1 + 1, x1 + 1);
r4 = a00 * R1(2*height + y1, x1) +
a01 * R1(2*height + y1, x1 + 1) +
a10 * R1(2*height + y1 + 1, x1) +
a11 * R1(2*height + y1 + 1, x1 + 1);
r5 = a00 * R1(3*height + y1, x1) +
a01 * R1(3*height + y1, x1 + 1) +
a10 * R1(3*height + y1 + 1, x1) +
a11 * R1(3*height + y1 + 1, x1 + 1);
r6 = a00 * R1(4*height + y1, x1) +
a01 * R1(4*height + y1, x1 + 1) +
a10 * R1(4*height + y1 + 1, x1) +
a11 * R1(4*height + y1 + 1, x1 + 1);
r4 = (R0(2*height + y, x) + r4) * 0.5f;
r5 = (R0(3*height + y, x) + r5) * 0.5f;
r6 = (R0(4*height + y, x) + r6) * 0.25f;
}
else
{
r2 = r3 = 0.f;
r4 = R0(2*height + y, x);
r5 = R0(3*height + y, x);
r6 = R0(4*height + y, x) * 0.5f;
}
r2 = (R0(y, x) - r2) * 0.5f;
r3 = (R0(height + y, x) - r3) * 0.5f;
r2 += r4*dy + r6*dx;
r3 += r6*dy + r5*dx;
float scale =
c_border[::min(x, BORDER_SIZE)] *
c_border[::min(y, BORDER_SIZE)] *
c_border[::min(width - x - 1, BORDER_SIZE)] *
c_border[::min(height - y - 1, BORDER_SIZE)];
r2 *= scale; r3 *= scale; r4 *= scale;
r5 *= scale; r6 *= scale;
M(y, x) = r4*r4 + r6*r6;
M(height + y, x) = (r4 + r5)*r6;
M(2*height + y, x) = r5*r5 + r6*r6;
M(3*height + y, x) = r4*r2 + r6*r3;
M(4*height + y, x) = r6*r2 + r5*r3;
}
}
void setUpdateMatricesConsts()
{
static const float border[BORDER_SIZE + 1] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f};
cudaSafeCall(cudaMemcpyToSymbol(c_border, border, (BORDER_SIZE + 1) * sizeof(*border)));
}
void updateMatricesGpu(
const DevMem2Df flowx, const DevMem2Df flowy, const DevMem2Df R0, const DevMem2Df R1,
DevMem2Df M, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
updateMatrices<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, flowx, flowy, R0, R1, M);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
__global__ void updateFlow(
const int height, const int width, const PtrStepf M, PtrStepf flowx, PtrStepf flowy)
{
const int y = by * bdy + ty;
const int x = bx * bdx + tx;
if (y < height && x < width)
{
float g11 = M(y, x);
float g12 = M(height + y, x);
float g22 = M(2*height + y, x);
float h1 = M(3*height + y, x);
float h2 = M(4*height + y, x);
float detInv = 1.f / (g11*g22 - g12*g12 + 1e-3f);
flowx(y, x) = (g11*h2 - g12*h1) * detInv;
flowy(y, x) = (g22*h1 - g12*h2) * detInv;
}
}
void updateFlowGpu(const DevMem2Df M, DevMem2Df flowx, DevMem2Df flowy, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
updateFlow<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, M, flowx, flowy);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
/*__global__ void boxFilter(
const int height, const int width, const PtrStepf src,
const int ksizeHalf, const float boxAreaInv, PtrStepf dst)
{
const int y = by * bdy + ty;
const int x = bx * bdx + tx;
extern __shared__ float smem[];
volatile float *row = smem + ty * (bdx + 2*ksizeHalf);
if (y < height)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = int(bx * bdx) + i - ksizeHalf;
xExt = ::min(::max(xExt, 0), width - 1);
row[i] = src(y, xExt);
for (int j = 1; j <= ksizeHalf; ++j)
row[i] += src(::max(y - j, 0), xExt) + src(::min(y + j, height - 1), xExt);
}
if (x < width)
{
__syncthreads();
// Horizontal passs
row += tx + ksizeHalf;
float res = row[0];
for (int i = 1; i <= ksizeHalf; ++i)
res += row[-i] + row[i];
dst(y, x) = res * boxAreaInv;
}
}
}
void boxFilterGpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
{
dim3 block(256);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float);
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}*/
__global__ void boxFilter5(
const int height, const int width, const PtrStepf src,
const int ksizeHalf, const float boxAreaInv, PtrStepf dst)
{
const int y = by * bdy + ty;
const int x = bx * bdx + tx;
extern __shared__ float smem[];
const int smw = bdx + 2*ksizeHalf; // shared memory "width"
volatile float *row = smem + 5 * ty * smw;
if (y < height)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = int(bx * bdx) + i - ksizeHalf;
xExt = ::min(::max(xExt, 0), width - 1);
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] = src(k*height + y, xExt);
for (int j = 1; j <= ksizeHalf; ++j)
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] +=
src(k*height + ::max(y - j, 0), xExt) +
src(k*height + ::min(y + j, height - 1), xExt);
}
if (x < width)
{
__syncthreads();
// Horizontal passs
row += tx + ksizeHalf;
float res[5];
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] = row[k*smw];
for (int i = 1; i <= ksizeHalf; ++i)
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] += row[k*smw - i] + row[k*smw + i];
#pragma unroll
for (int k = 0; k < 5; ++k)
dst(k*height + y, x) = res[k] * boxAreaInv;
}
}
}
void boxFilter5Gpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
dim3 block(256);
dim3 grid(divUp(width, block.x), divUp(height, block.y));
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
__constant__ float c_gKer[MAX_KSIZE_HALF + 1];
template <typename Border>
__global__ void gaussianBlur(
const int height, const int width, const PtrStepf src, const int ksizeHalf,
const Border b, PtrStepf dst)
{
const int y = by * bdy + ty;
const int x = bx * bdx + tx;
extern __shared__ float smem[];
volatile float *row = smem + ty * (bdx + 2*ksizeHalf);
if (y < height)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = int(bx * bdx) + i - ksizeHalf;
xExt = b.idx_col(xExt);
row[i] = src(y, xExt) * c_gKer[0];
for (int j = 1; j <= ksizeHalf; ++j)
row[i] +=
(src(b.idx_row_low(y - j), xExt) +
src(b.idx_row_high(y + j), xExt)) * c_gKer[j];
}
if (x < width)
{
__syncthreads();
// Horizontal pass
row += tx + ksizeHalf;
float res = row[0] * c_gKer[0];
for (int i = 1; i <= ksizeHalf; ++i)
res += (row[-i] + row[i]) * c_gKer[i];
dst(y, x) = res;
}
}
}
void setGaussianBlurKernel(const float *gKer, int ksizeHalf)
{
cudaSafeCall(cudaMemcpyToSymbol(c_gKer, gKer, (ksizeHalf + 1) * sizeof(*gKer)));
}
template <typename Border>
void gaussianBlurCaller(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
{
int height = src.rows;
int width = src.cols;
dim3 block(256);
dim3 grid(divUp(width, block.x), divUp(height, block.y));
int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float);
Border b(height, width);
gaussianBlur<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
void gaussianBlurGpu(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
static const caller_t callers[] =
{
gaussianBlurCaller<BrdReflect101<float> >,
gaussianBlurCaller<BrdReplicate<float> >,
};
callers[borderMode](src, ksizeHalf, dst, stream);
}
template <typename Border>
__global__ void gaussianBlur5(
const int height, const int width, const PtrStepf src, const int ksizeHalf,
const Border b, PtrStepf dst)
{
const int y = by * bdy + ty;
const int x = bx * bdx + tx;
extern __shared__ float smem[];
const int smw = bdx + 2*ksizeHalf; // shared memory "width"
volatile float *row = smem + 5 * ty * smw;
if (y < height)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = int(bx * bdx) + i - ksizeHalf;
xExt = b.idx_col(xExt);
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] = src(k*height + y, xExt) * c_gKer[0];
for (int j = 1; j <= ksizeHalf; ++j)
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] +=
(src(k*height + b.idx_row_low(y - j), xExt) +
src(k*height + b.idx_row_high(y + j), xExt)) * c_gKer[j];
}
if (x < width)
{
__syncthreads();
// Horizontal pass
row += tx + ksizeHalf;
float res[5];
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] = row[k*smw] * c_gKer[0];
for (int i = 1; i <= ksizeHalf; ++i)
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
#pragma unroll
for (int k = 0; k < 5; ++k)
dst(k*height + y, x) = res[k];
}
}
}
template <typename Border>
void gaussianBlur5Caller(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
dim3 block(256);
dim3 grid(divUp(width, block.x), divUp(height, block.y));
int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float);
Border b(height, width);
gaussianBlur5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
void gaussianBlur5Gpu(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
static const caller_t callers[] =
{
gaussianBlur5Caller<BrdReflect101<float> >,
gaussianBlur5Caller<BrdReplicate<float> >,
};
callers[borderMode](src, ksizeHalf, dst, stream);
}
}}}} // namespace cv { namespace gpu { namespace device { namespace optflow_farneback
modules/gpu/src/optical_flow_farneback.cpp
0 → 100644
View file @
681ac9be
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <opencv2/highgui/highgui.hpp>
#define MIN_SIZE 32
#define S(x) StreamAccessor::getStream(x)
// GPU resize() is fast, but it differs from the CPU analog. Disabling this flag
// leads to an inefficient code. It's for debug purposes only.
#define ENABLE_GPU_RESIZE 1
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
gpu
;
#if !defined(HAVE_CUDA)
void
cv
::
gpu
::
FarnebackOpticalFlow
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
#else
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
optflow_farneback
{
void
setPolinomialExpansionConsts
(
int
polyN
,
const
float
*
g
,
const
float
*
xg
,
const
float
*
xxg
,
float
ig11
,
float
ig03
,
float
ig33
,
float
ig55
);
void
polynomialExpansionGpu
(
const
DevMem2Df
&
src
,
int
polyN
,
DevMem2Df
dst
,
cudaStream_t
stream
);
void
setUpdateMatricesConsts
();
void
updateMatricesGpu
(
const
DevMem2Df
flowx
,
const
DevMem2Df
flowy
,
const
DevMem2Df
R0
,
const
DevMem2Df
R1
,
DevMem2Df
M
,
cudaStream_t
stream
);
void
updateFlowGpu
(
const
DevMem2Df
M
,
DevMem2Df
flowx
,
DevMem2Df
flowy
,
cudaStream_t
stream
);
/*void boxFilterGpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream);*/
void
boxFilter5Gpu
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
cudaStream_t
stream
);
void
setGaussianBlurKernel
(
const
float
*
gKer
,
int
ksizeHalf
);
void
gaussianBlurGpu
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
int
borderType
,
cudaStream_t
stream
);
void
gaussianBlur5Gpu
(
const
DevMem2Df
src
,
int
ksizeHalf
,
DevMem2Df
dst
,
int
borderType
,
cudaStream_t
stream
);
}}}}
// namespace cv { namespace gpu { namespace device { namespace optflow_farneback
void
cv
::
gpu
::
FarnebackOpticalFlow
::
prepareGaussian
(
int
n
,
double
sigma
,
float
*
g
,
float
*
xg
,
float
*
xxg
,
double
&
ig11
,
double
&
ig03
,
double
&
ig33
,
double
&
ig55
)
{
double
s
=
0.
;
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
g
[
x
]
=
(
float
)
std
::
exp
(
-
x
*
x
/
(
2
*
sigma
*
sigma
));
s
+=
g
[
x
];
}
s
=
1.
/
s
;
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
g
[
x
]
=
(
float
)(
g
[
x
]
*
s
);
xg
[
x
]
=
(
float
)(
x
*
g
[
x
]);
xxg
[
x
]
=
(
float
)(
x
*
x
*
g
[
x
]);
}
Mat_
<
double
>
G
(
6
,
6
);
G
.
setTo
(
0
);
for
(
int
y
=
-
n
;
y
<=
n
;
y
++
)
{
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
G
(
0
,
0
)
+=
g
[
y
]
*
g
[
x
];
G
(
1
,
1
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
;
G
(
3
,
3
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
*
x
*
x
;
G
(
5
,
5
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
*
y
*
y
;
}
}
//G[0][0] = 1.;
G
(
2
,
2
)
=
G
(
0
,
3
)
=
G
(
0
,
4
)
=
G
(
3
,
0
)
=
G
(
4
,
0
)
=
G
(
1
,
1
);
G
(
4
,
4
)
=
G
(
3
,
3
);
G
(
3
,
4
)
=
G
(
4
,
3
)
=
G
(
5
,
5
);
// invG:
// [ x e e ]
// [ y ]
// [ y ]
// [ e z ]
// [ e z ]
// [ u ]
Mat_
<
double
>
invG
=
G
.
inv
(
DECOMP_CHOLESKY
);
ig11
=
invG
(
1
,
1
);
ig03
=
invG
(
0
,
3
);
ig33
=
invG
(
3
,
3
);
ig55
=
invG
(
5
,
5
);
}
void
cv
::
gpu
::
FarnebackOpticalFlow
::
setPolynomialExpansionConsts
(
int
n
,
double
sigma
)
{
vector
<
float
>
buf
(
n
*
6
+
3
);
float
*
g
=
&
buf
[
0
]
+
n
;
float
*
xg
=
g
+
n
*
2
+
1
;
float
*
xxg
=
xg
+
n
*
2
+
1
;
if
(
sigma
<
FLT_EPSILON
)
sigma
=
n
*
0.3
;
double
ig11
,
ig03
,
ig33
,
ig55
;
prepareGaussian
(
n
,
sigma
,
g
,
xg
,
xxg
,
ig11
,
ig03
,
ig33
,
ig55
);
device
::
optflow_farneback
::
setPolinomialExpansionConsts
(
n
,
g
,
xg
,
xxg
,
ig11
,
ig03
,
ig33
,
ig55
);
}
void
cv
::
gpu
::
FarnebackOpticalFlow
::
updateFlow_boxFilter
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
device
::
optflow_farneback
::
boxFilter5Gpu
(
M
,
blockSize
/
2
,
bufM
,
S
(
streams
[
0
]));
swap
(
M
,
bufM
);
for
(
int
i
=
1
;
i
<
5
;
++
i
)
streams
[
i
].
waitForCompletion
();
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
S
(
streams
[
0
]));
if
(
updateMatrices
)
device
::
optflow_farneback
::
updateMatricesGpu
(
flowx
,
flowy
,
R0
,
R1
,
M
,
S
(
streams
[
0
]));
}
void
cv
::
gpu
::
FarnebackOpticalFlow
::
updateFlow_gaussianBlur
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
device
::
optflow_farneback
::
gaussianBlur5Gpu
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE_GPU
,
S
(
streams
[
0
]));
swap
(
M
,
bufM
);
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
S
(
streams
[
0
]));
if
(
updateMatrices
)
device
::
optflow_farneback
::
updateMatricesGpu
(
flowx
,
flowy
,
R0
,
R1
,
M
,
S
(
streams
[
0
]));
}
void
cv
::
gpu
::
FarnebackOpticalFlow
::
operator
()(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
s
)
{
CV_Assert
(
frame0
.
type
()
==
CV_8U
&&
frame1
.
type
()
==
CV_8U
);
CV_Assert
(
frame0
.
size
()
==
frame1
.
size
());
CV_Assert
(
polyN
==
5
||
polyN
==
7
);
CV_Assert
(
!
fastPyramids
||
std
::
abs
(
pyrScale
-
0.5
)
<
1e-6
);
Stream
streams
[
5
];
if
(
S
(
s
))
streams
[
0
]
=
s
;
Size
size
=
frame0
.
size
();
GpuMat
prevFlowX
,
prevFlowY
,
curFlowX
,
curFlowY
;
flowx
.
create
(
size
,
CV_32F
);
flowy
.
create
(
size
,
CV_32F
);
GpuMat
flowx0
=
flowx
;
GpuMat
flowy0
=
flowy
;
// Crop unnecessary levels
double
scale
=
1
;
int
numLevelsCropped
=
0
;
for
(;
numLevelsCropped
<
numLevels
;
numLevelsCropped
++
)
{
scale
*=
pyrScale
;
if
(
size
.
width
*
scale
<
MIN_SIZE
||
size
.
height
*
scale
<
MIN_SIZE
)
break
;
}
streams
[
0
].
enqueueConvert
(
frame0
,
frames_
[
0
],
CV_32F
);
streams
[
1
].
enqueueConvert
(
frame1
,
frames_
[
1
],
CV_32F
);
if
(
fastPyramids
)
{
// Build Gaussian pyramids using pyrDown()
pyramid0_
.
resize
(
numLevelsCropped
+
1
);
pyramid1_
.
resize
(
numLevelsCropped
+
1
);
pyramid0_
[
0
]
=
frames_
[
0
];
pyramid1_
[
0
]
=
frames_
[
1
];
for
(
int
i
=
1
;
i
<=
numLevelsCropped
;
++
i
)
{
pyrDown
(
pyramid0_
[
i
-
1
],
pyramid0_
[
i
],
streams
[
0
]);
pyrDown
(
pyramid1_
[
i
-
1
],
pyramid1_
[
i
],
streams
[
1
]);
}
}
setPolynomialExpansionConsts
(
polyN
,
polySigma
);
device
::
optflow_farneback
::
setUpdateMatricesConsts
();
for
(
int
k
=
numLevelsCropped
;
k
>=
0
;
k
--
)
{
streams
[
0
].
waitForCompletion
();
scale
=
1
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
scale
*=
pyrScale
;
double
sigma
=
(
1.
/
scale
-
1
)
*
0.5
;
int
smoothSize
=
cvRound
(
sigma
*
5
)
|
1
;
smoothSize
=
std
::
max
(
smoothSize
,
3
);
int
width
=
cvRound
(
size
.
width
*
scale
);
int
height
=
cvRound
(
size
.
height
*
scale
);
if
(
fastPyramids
)
{
width
=
pyramid0_
[
k
].
cols
;
height
=
pyramid0_
[
k
].
rows
;
}
if
(
k
>
0
)
{
curFlowX
.
create
(
height
,
width
,
CV_32F
);
curFlowY
.
create
(
height
,
width
,
CV_32F
);
}
else
{
curFlowX
=
flowx0
;
curFlowY
=
flowy0
;
}
if
(
!
prevFlowX
.
data
)
{
if
(
flags
&
OPTFLOW_USE_INITIAL_FLOW
)
{
#if ENABLE_GPU_RESIZE
resize
(
flowx0
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
resize
(
flowy0
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
streams
[
0
].
enqueueConvert
(
curFlowX
,
curFlowX
,
curFlowX
.
depth
(),
scale
);
streams
[
1
].
enqueueConvert
(
curFlowY
,
curFlowY
,
curFlowY
.
depth
(),
scale
);
#else
Mat
tmp1
,
tmp2
;
flowx0
.
download
(
tmp1
);
resize
(
tmp1
,
tmp2
,
Size
(
width
,
height
),
0
,
0
,
INTER_AREA
);
tmp2
*=
scale
;
curFlowX
.
upload
(
tmp2
);
flowy0
.
download
(
tmp1
);
resize
(
tmp1
,
tmp2
,
Size
(
width
,
height
),
0
,
0
,
INTER_AREA
);
tmp2
*=
scale
;
curFlowY
.
upload
(
tmp2
);
#endif
}
else
{
streams
[
0
].
enqueueMemSet
(
curFlowX
,
0
);
streams
[
1
].
enqueueMemSet
(
curFlowY
,
0
);
}
}
else
{
#if ENABLE_GPU_RESIZE
resize
(
prevFlowX
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
resize
(
prevFlowY
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
streams
[
0
].
enqueueConvert
(
curFlowX
,
curFlowX
,
curFlowX
.
depth
(),
1.
/
pyrScale
);
streams
[
1
].
enqueueConvert
(
curFlowY
,
curFlowY
,
curFlowY
.
depth
(),
1.
/
pyrScale
);
#else
Mat
tmp1
,
tmp2
;
prevFlowX
.
download
(
tmp1
);
resize
(
tmp1
,
tmp2
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
);
tmp2
*=
1.
/
pyrScale
;
curFlowX
.
upload
(
tmp2
);
prevFlowY
.
download
(
tmp1
);
resize
(
tmp1
,
tmp2
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
);
tmp2
*=
1.
/
pyrScale
;
curFlowY
.
upload
(
tmp2
);
#endif
}
GpuMat
M
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
M_
);
GpuMat
bufM
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
bufM_
);
GpuMat
R
[
2
]
=
{
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
R_
[
0
]),
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
R_
[
1
])
};
if
(
fastPyramids
)
{
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyramid0_
[
k
],
polyN
,
R
[
0
],
S
(
streams
[
0
]));
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyramid1_
[
k
],
polyN
,
R
[
1
],
S
(
streams
[
1
]));
}
else
{
GpuMat
tmp
[
2
]
=
{
allocMatFromBuf
(
size
.
height
,
size
.
width
,
CV_32F
,
tmp_
[
0
]),
allocMatFromBuf
(
size
.
height
,
size
.
width
,
CV_32F
,
tmp_
[
1
])
};
GpuMat
I
[
2
]
=
{
allocMatFromBuf
(
height
,
width
,
CV_32F
,
I_
[
0
]),
allocMatFromBuf
(
height
,
width
,
CV_32F
,
I_
[
1
])
};
Mat
g
=
getGaussianKernel
(
smoothSize
,
sigma
,
CV_32F
);
device
::
optflow_farneback
::
setGaussianBlurKernel
(
g
.
ptr
<
float
>
(
smoothSize
/
2
),
smoothSize
/
2
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
device
::
optflow_farneback
::
gaussianBlurGpu
(
frames_
[
i
],
smoothSize
/
2
,
tmp
[
i
],
BORDER_REFLECT101_GPU
,
S
(
streams
[
i
]));
#if ENABLE_GPU_RESIZE
resize
(
tmp
[
i
],
I
[
i
],
Size
(
width
,
height
),
INTER_LINEAR
,
streams
[
i
]);
#else
Mat
tmp1
,
tmp2
;
tmp
[
i
].
download
(
tmp1
);
resize
(
tmp1
,
tmp2
,
Size
(
width
,
height
),
INTER_LINEAR
);
I
[
i
].
upload
(
tmp2
);
#endif
device
::
optflow_farneback
::
polynomialExpansionGpu
(
I
[
i
],
polyN
,
R
[
i
],
S
(
streams
[
i
]));
}
}
streams
[
1
].
waitForCompletion
();
device
::
optflow_farneback
::
updateMatricesGpu
(
curFlowX
,
curFlowY
,
R
[
0
],
R
[
1
],
M
,
S
(
streams
[
0
]));
if
(
flags
&
OPTFLOW_FARNEBACK_GAUSSIAN
)
{
Mat
g
=
getGaussianKernel
(
winSize
,
winSize
/
2
*
0.3
f
,
CV_32F
);
device
::
optflow_farneback
::
setGaussianBlurKernel
(
g
.
ptr
<
float
>
(
winSize
/
2
),
winSize
/
2
);
}
for
(
int
i
=
0
;
i
<
numIters
;
i
++
)
{
if
(
flags
&
OPTFLOW_FARNEBACK_GAUSSIAN
)
updateFlow_gaussianBlur
(
R
[
0
],
R
[
1
],
curFlowX
,
curFlowY
,
M
,
bufM
,
winSize
,
i
<
numIters
-
1
,
streams
);
else
updateFlow_boxFilter
(
R
[
0
],
R
[
1
],
curFlowX
,
curFlowY
,
M
,
bufM
,
winSize
,
i
<
numIters
-
1
,
streams
);
}
prevFlowX
=
curFlowX
;
prevFlowY
=
curFlowY
;
}
flowx
=
curFlowX
;
flowy
=
curFlowY
;
if
(
!
S
(
s
))
streams
[
0
].
waitForCompletion
();
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment