Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0e32d7cc
Commit
0e32d7cc
authored
Aug 27, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
Aug 27, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1346 from jet47:gpu-tvl1-optimization
parents
525b6eca
bff0fad6
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
11 deletions
+26
-11
perf_video.cpp
modules/gpu/perf/perf_video.cpp
+2
-2
tvl1flow.cu
modules/gpu/src/cuda/tvl1flow.cu
+9
-6
tvl1flow.cpp
modules/gpu/src/tvl1flow.cpp
+15
-3
No files found.
modules/gpu/perf/perf_video.cpp
View file @
0e32d7cc
...
...
@@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
TEST_CYCLE
()
d_alg
(
d_frame0
,
d_frame1
,
u
,
v
);
GPU_SANITY_CHECK
(
u
,
1e-
2
);
GPU_SANITY_CHECK
(
v
,
1e-
2
);
GPU_SANITY_CHECK
(
u
,
1e-
1
);
GPU_SANITY_CHECK
(
v
,
1e-
1
);
}
else
{
...
...
modules/gpu/src/cuda/tvl1flow.cu
View file @
0e32d7cc
...
...
@@ -211,7 +211,7 @@ namespace tvl1flow
const PtrStepf grad, const PtrStepf rho_c,
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
PtrStepf u1, PtrStepf u2, PtrStepf error,
const float l_t, const float theta)
const float l_t, const float theta
, const bool calcError
)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -265,21 +265,24 @@ namespace tvl1flow
u1(y, x) = u1NewVal;
u2(y, x) = u2NewVal;
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
if (calcError)
{
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
}
}
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta)
float l_t, float theta
, bool calcError
)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta
, calcError
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
modules/gpu/src/tvl1flow.cpp
View file @
0e32d7cc
...
...
@@ -173,7 +173,7 @@ namespace tvl1flow
PtrStepSzf
grad
,
PtrStepSzf
rho_c
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
error
,
float
l_t
,
float
theta
);
float
l_t
,
float
theta
,
bool
calcError
);
void
estimateDualVariables
(
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
float
taut
);
}
...
...
@@ -218,12 +218,24 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu
warpBackward
(
I0
,
I1
,
I1x
,
I1y
,
u1
,
u2
,
I1w
,
I1wx
,
I1wy
,
grad
,
rho_c
);
double
error
=
numeric_limits
<
double
>::
max
();
double
prevError
=
0.0
;
for
(
int
n
=
0
;
error
>
scaledEpsilon
&&
n
<
iterations
;
++
n
)
{
estimateU
(
I1wx
,
I1wy
,
grad
,
rho_c
,
p11
,
p12
,
p21
,
p22
,
u1
,
u2
,
diff
,
l_t
,
static_cast
<
float
>
(
theta
));
// some tweaks to make sum operation less frequently
bool
calcError
=
(
epsilon
>
0
)
&&
(
n
&
0x1
)
&&
(
prevError
<
scaledEpsilon
);
if
(
epsilon
>
0
)
estimateU
(
I1wx
,
I1wy
,
grad
,
rho_c
,
p11
,
p12
,
p21
,
p22
,
u1
,
u2
,
diff
,
l_t
,
static_cast
<
float
>
(
theta
),
calcError
);
if
(
calcError
)
{
error
=
gpu
::
sum
(
diff
,
norm_buf
)[
0
];
prevError
=
error
;
}
else
{
error
=
numeric_limits
<
double
>::
max
();
prevError
-=
scaledEpsilon
;
}
estimateDualVariables
(
u1
,
u2
,
p11
,
p12
,
p21
,
p22
,
taut
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment