Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5623701a
Commit
5623701a
authored
Jul 28, 2014
by
Ernest Galbrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
performance issue for cuda TVL1 when gamma = 0
parent
df8f1a43
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
55 additions
and
34 deletions
+55
-34
tvl1flow.cu
modules/cudaoptflow/src/cuda/tvl1flow.cu
+24
-18
tvl1flow.cpp
modules/cudaoptflow/src/tvl1flow.cpp
+31
-16
No files found.
modules/cudaoptflow/src/cuda/tvl1flow.cu
View file @
5623701a
...
...
@@ -226,7 +226,7 @@ namespace tvl1flow
const float gradVal = grad(y, x);
const float u1OldVal = u1(y, x);
const float u2OldVal = u2(y, x);
const float u3OldVal =
u3(y, x)
;
const float u3OldVal =
gamma ? u3(y, x) : 0
;
const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal);
...
...
@@ -240,20 +240,23 @@ namespace tvl1flow
{
d1 = l_t * I1wxVal;
d2 = l_t * I1wyVal;
d3 = l_t * gamma;
if (gamma)
d3 = l_t * gamma;
}
else if (rho > l_t * gradVal)
{
d1 = -l_t * I1wxVal;
d2 = -l_t * I1wyVal;
d3 = -l_t * gamma;
if (gamma)
d3 = -l_t * gamma;
}
else if (gradVal > numeric_limits<float>::epsilon())
{
const float fi = -rho / gradVal;
d1 = fi * I1wxVal;
d2 = fi * I1wyVal;
d3 = fi * gamma;
if (gamma)
d3 = fi * gamma;
}
const float v1 = u1OldVal + d1;
...
...
@@ -264,24 +267,24 @@ namespace tvl1flow
const float div_p1 = divergence(p11, p12, y, x);
const float div_p2 = divergence(p21, p22, y, x);
const float div_p3 =
divergence(p31, p32, y, x)
;
const float div_p3 =
gamma ? divergence(p31, p32, y, x) : 0
;
// estimate the values of the optical flow (u1, u2)
const float u1NewVal = v1 + theta * div_p1;
const float u2NewVal = v2 + theta * div_p2;
const float u3NewVal =
v3 + theta * div_p3
;
const float u3NewVal =
gamma ? v3 + theta * div_p3 : 0
;
u1(y, x) = u1NewVal;
u2(y, x) = u2NewVal;
u3(y, x) = u3NewVal;
if (gamma)
u3(y, x) = u3NewVal;
if (calcError)
{
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
const float n3 = 0;// (u3OldVal - u3NewVal) * (u3OldVal - u3NewVal);
error(y, x) = n1 + n2 + n3;
error(y, x) = n1 + n2;
}
}
...
...
@@ -307,7 +310,7 @@ namespace tvl1flow
namespace tvl1flow
{
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3,
PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut)
PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut
, const float gamma
)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -321,31 +324,34 @@ namespace tvl1flow
const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);
const float u3x =
u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x)
;
const float u3y =
u3(::min(y + 1, u1.rows - 1), x) - u3(y, x)
;
const float u3x =
gamma ? u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x) : 0
;
const float u3y =
gamma ? u3(::min(y + 1, u1.rows - 1), x) - u3(y, x) : 0
;
const float g1 = ::hypotf(u1x, u1y);
const float g2 = ::hypotf(u2x, u2y);
const float g3 =
::hypotf(u3x, u3y)
;
const float g3 =
gamma ? ::hypotf(u3x, u3y) : 0
;
const float ng1 = 1.0f + taut * g1;
const float ng2 = 1.0f + taut * g2;
const float ng3 =
1.0f + taut * g3
;
const float ng3 =
gamma ? 1.0f + taut * g3 : 0
;
p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
p31(y, x) = (p31(y, x) + taut * u3x) / ng3;
p32(y, x) = (p32(y, x) + taut * u3y) / ng3;
if (gamma)
{
p31(y, x) = (p31(y, x) + taut * u3x) / ng3;
p32(y, x) = (p32(y, x) + taut * u3y) / ng3;
}
}
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut)
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut
, float gamma
)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut);
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut
, gamma
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
modules/cudaoptflow/src/tvl1flow.cpp
View file @
5623701a
...
...
@@ -94,7 +94,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
u1s
[
0
]
=
flowx
;
u2s
[
0
]
=
flowy
;
u3s
[
0
].
create
(
I0
.
size
(),
CV_32FC1
);
if
(
gamma
)
u3s
[
0
].
create
(
I0
.
size
(),
CV_32FC1
);
I1x_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
I1y_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
...
...
@@ -110,9 +111,11 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
p12_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p21_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p22_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p31_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p32_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
if
(
gamma
)
{
p31_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p32_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
}
diff_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
// create the scales
...
...
@@ -140,7 +143,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
u1s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
u2s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
}
u3s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
if
(
gamma
)
u3s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
}
if
(
!
useInitialFlow
)
...
...
@@ -148,7 +152,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
u1s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
u2s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
}
u3s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
if
(
gamma
)
u3s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
// pyramidal structure for computing the optical flow
for
(
int
s
=
nscales
-
1
;
s
>=
0
;
--
s
)
...
...
@@ -165,7 +170,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
// zoom the optical flow for the next finer scale
cuda
::
resize
(
u1s
[
s
],
u1s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
cuda
::
resize
(
u2s
[
s
],
u2s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
cuda
::
resize
(
u3s
[
s
],
u3s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
if
(
gamma
)
cuda
::
resize
(
u3s
[
s
],
u3s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
// scale the optical flow with the appropriate zoom factor
cuda
::
multiply
(
u1s
[
s
-
1
],
Scalar
::
all
(
1
/
scaleStep
),
u1s
[
s
-
1
]);
...
...
@@ -182,7 +188,7 @@ namespace tvl1flow
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
error
,
float
l_t
,
float
theta
,
float
gamma
,
bool
calcError
);
void
estimateDualVariables
(
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
float
taut
);
void
estimateDualVariables
(
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
float
taut
,
const
float
gamma
);
}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
procOneScale
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
u1
,
GpuMat
&
u2
,
GpuMat
&
u3
)
...
...
@@ -211,14 +217,21 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G
GpuMat
p12
=
p12_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p21
=
p21_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p22
=
p22_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p31
=
p31_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p32
=
p32_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p31
,
p32
;
if
(
gamma
)
{
p31
=
p31_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
p32
=
p32_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
}
p11
.
setTo
(
Scalar
::
all
(
0
));
p12
.
setTo
(
Scalar
::
all
(
0
));
p21
.
setTo
(
Scalar
::
all
(
0
));
p22
.
setTo
(
Scalar
::
all
(
0
));
p31
.
setTo
(
Scalar
::
all
(
0
));
p32
.
setTo
(
Scalar
::
all
(
0
));
if
(
gamma
)
{
p31
.
setTo
(
Scalar
::
all
(
0
));
p32
.
setTo
(
Scalar
::
all
(
0
));
}
GpuMat
diff
=
diff_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
...
...
@@ -248,7 +261,7 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G
prevError
-=
scaledEpsilon
;
}
estimateDualVariables
(
u1
,
u2
,
u3
,
p11
,
p12
,
p21
,
p22
,
p31
,
p32
,
taut
);
estimateDualVariables
(
u1
,
u2
,
u3
,
p11
,
p12
,
p21
,
p22
,
p31
,
p32
,
taut
,
gamma
);
}
}
}
...
...
@@ -275,9 +288,11 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage()
p12_buf
.
release
();
p21_buf
.
release
();
p22_buf
.
release
();
p31_buf
.
release
();
p32_buf
.
release
();
if
(
gamma
)
{
p31_buf
.
release
();
p32_buf
.
release
();
}
diff_buf
.
release
();
norm_buf
.
release
();
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment