Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
05eb9f35
Commit
05eb9f35
authored
Mar 15, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1062 from pengli:dis_optflow
parents
1ecbb8f0
7ed6f778
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1120 additions
and
1 deletion
+1120
-1
perf_dis_optflow.cpp
modules/optflow/perf/opencl/perf_dis_optflow.cpp
+113
-0
dis_flow.cpp
modules/optflow/src/dis_flow.cpp
+383
-1
dis_flow.cl
modules/optflow/src/opencl/dis_flow.cl
+522
-0
precomp.hpp
modules/optflow/src/precomp.hpp
+2
-0
test_dis.cpp
modules/optflow/test/ocl/test_dis.cpp
+100
-0
No files found.
modules/optflow/perf/opencl/perf_dis_optflow.cpp
0 → 100644
View file @
05eb9f35
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
using
std
::
tr1
::
tuple
;
using
std
::
tr1
::
get
;
using
namespace
perf
;
using
namespace
testing
;
using
namespace
cv
;
using
namespace
cv
::
optflow
;
#ifdef HAVE_OPENCL
namespace
cvtest
{
namespace
ocl
{
void
MakeArtificialExample
(
UMat
&
dst_frame1
,
UMat
&
dst_frame2
);
typedef
tuple
<
String
,
Size
>
DISParams
;
typedef
TestBaseWithParam
<
DISParams
>
DenseOpticalFlow_DIS
;
OCL_PERF_TEST_P
(
DenseOpticalFlow_DIS
,
perf
,
Combine
(
Values
(
"PRESET_ULTRAFAST"
,
"PRESET_FAST"
,
"PRESET_MEDIUM"
),
Values
(
szVGA
,
sz720p
,
sz1080p
)))
{
DISParams
params
=
GetParam
();
// use strings to print preset names in the perf test results:
String
preset_string
=
get
<
0
>
(
params
);
int
preset
=
DISOpticalFlow
::
PRESET_FAST
;
if
(
preset_string
==
"PRESET_ULTRAFAST"
)
preset
=
DISOpticalFlow
::
PRESET_ULTRAFAST
;
else
if
(
preset_string
==
"PRESET_FAST"
)
preset
=
DISOpticalFlow
::
PRESET_FAST
;
else
if
(
preset_string
==
"PRESET_MEDIUM"
)
preset
=
DISOpticalFlow
::
PRESET_MEDIUM
;
Size
sz
=
get
<
1
>
(
params
);
UMat
frame1
(
sz
,
CV_8U
);
UMat
frame2
(
sz
,
CV_8U
);
UMat
flow
;
MakeArtificialExample
(
frame1
,
frame2
);
Ptr
<
DenseOpticalFlow
>
algo
=
createOptFlow_DIS
(
preset
);
OCL_TEST_CYCLE_N
(
10
)
{
algo
->
calc
(
frame1
,
frame2
,
flow
);
}
SANITY_CHECK_NOTHING
();
}
void
MakeArtificialExample
(
UMat
&
dst_frame1
,
UMat
&
dst_frame2
)
{
int
src_scale
=
2
;
int
OF_scale
=
6
;
double
sigma
=
dst_frame1
.
cols
/
300
;
UMat
tmp
(
Size
(
dst_frame1
.
cols
/
(
int
)
pow
(
2
,
src_scale
),
dst_frame1
.
rows
/
(
int
)
pow
(
2
,
src_scale
)),
CV_8U
);
randu
(
tmp
,
0
,
255
);
resize
(
tmp
,
dst_frame1
,
dst_frame1
.
size
(),
0.0
,
0.0
,
INTER_LINEAR
);
resize
(
tmp
,
dst_frame2
,
dst_frame2
.
size
(),
0.0
,
0.0
,
INTER_LINEAR
);
Mat
displacement_field
(
Size
(
dst_frame1
.
cols
/
(
int
)
pow
(
2
,
OF_scale
),
dst_frame1
.
rows
/
(
int
)
pow
(
2
,
OF_scale
)),
CV_32FC2
);
randn
(
displacement_field
,
0.0
,
sigma
);
resize
(
displacement_field
,
displacement_field
,
dst_frame2
.
size
(),
0.0
,
0.0
,
INTER_CUBIC
);
for
(
int
i
=
0
;
i
<
displacement_field
.
rows
;
i
++
)
for
(
int
j
=
0
;
j
<
displacement_field
.
cols
;
j
++
)
displacement_field
.
at
<
Vec2f
>
(
i
,
j
)
+=
Vec2f
((
float
)
j
,
(
float
)
i
);
remap
(
dst_frame2
,
dst_frame2
,
displacement_field
,
Mat
(),
INTER_LINEAR
,
BORDER_REPLICATE
);
}
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
modules/optflow/src/dis_flow.cpp
View file @
05eb9f35
...
...
@@ -42,6 +42,8 @@
#include "opencv2/core/hal/intrin.hpp"
#include "precomp.hpp"
#include "opencl_kernels_optflow.hpp"
using
namespace
std
;
#define EPS 0.001F
#define INF 1E+10F
...
...
@@ -166,6 +168,50 @@ class DISOpticalFlowImpl : public DISOpticalFlow
Mat
&
src_Sy
,
Mat
&
_I0
,
Mat
&
_I1
);
void
operator
()(
const
Range
&
range
)
const
;
};
#ifdef HAVE_OPENCL
vector
<
UMat
>
u_I0s
;
//!< Gaussian pyramid for the current frame
vector
<
UMat
>
u_I1s
;
//!< Gaussian pyramid for the next frame
vector
<
UMat
>
u_I1s_ext
;
//!< I1s with borders
vector
<
UMat
>
u_I0xs
;
//!< Gaussian pyramid for the x gradient of the current frame
vector
<
UMat
>
u_I0ys
;
//!< Gaussian pyramid for the y gradient of the current frame
vector
<
UMat
>
u_Ux
;
//!< x component of the flow vectors
vector
<
UMat
>
u_Uy
;
//!< y component of the flow vectors
vector
<
UMat
>
u_initial_Ux
;
//!< x component of the initial flow field, if one was passed as an input
vector
<
UMat
>
u_initial_Uy
;
//!< y component of the initial flow field, if one was passed as an input
UMat
u_U
;
//!< a buffer for the merged flow
UMat
u_Sx
;
//!< intermediate sparse flow representation (x component)
UMat
u_Sy
;
//!< intermediate sparse flow representation (y component)
/* Structure tensor components: */
UMat
u_I0xx_buf
;
//!< sum of squares of x gradient values
UMat
u_I0yy_buf
;
//!< sum of squares of y gradient values
UMat
u_I0xy_buf
;
//!< sum of x and y gradient products
/* Extra buffers that are useful if patch mean-normalization is used: */
UMat
u_I0x_buf
;
//!< sum of x gradient values
UMat
u_I0y_buf
;
//!< sum of y gradient values
/* Auxiliary buffers used in structure tensor computation: */
UMat
u_I0xx_buf_aux
;
UMat
u_I0yy_buf_aux
;
UMat
u_I0xy_buf_aux
;
UMat
u_I0x_buf_aux
;
UMat
u_I0y_buf_aux
;
bool
ocl_precomputeStructureTensor
(
UMat
&
dst_I0xx
,
UMat
&
dst_I0yy
,
UMat
&
dst_I0xy
,
UMat
&
dst_I0x
,
UMat
&
dst_I0y
,
UMat
&
I0x
,
UMat
&
I0y
);
void
ocl_prepareBuffers
(
UMat
&
I0
,
UMat
&
I1
,
UMat
&
flow
,
bool
use_flow
);
bool
ocl_calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
);
bool
ocl_Densification
(
UMat
&
dst_Ux
,
UMat
&
dst_Uy
,
UMat
&
src_Sx
,
UMat
&
src_Sy
,
UMat
&
_I0
,
UMat
&
_I1
);
bool
ocl_PatchInverseSearch
(
UMat
&
src_Ux
,
UMat
&
src_Uy
,
UMat
&
I0
,
UMat
&
I1
,
UMat
&
I0x
,
UMat
&
I0y
,
int
num_iter
,
int
pyr_level
);
#endif
};
DISOpticalFlowImpl
::
DISOpticalFlowImpl
()
...
...
@@ -1004,6 +1050,319 @@ void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) c
#undef UPDATE_SPARSE_J_COORDINATES
}
#ifdef HAVE_OPENCL
bool
DISOpticalFlowImpl
::
ocl_PatchInverseSearch
(
UMat
&
src_Ux
,
UMat
&
src_Uy
,
UMat
&
I0
,
UMat
&
I1
,
UMat
&
I0x
,
UMat
&
I0y
,
int
num_iter
,
int
pyr_level
)
{
size_t
globalSize
[]
=
{(
size_t
)
ws
,
(
size_t
)
hs
};
size_t
localSize
[]
=
{
16
,
16
};
int
idx
;
int
num_inner_iter
=
(
int
)
floor
(
grad_descent_iter
/
(
float
)
num_iter
);
for
(
int
iter
=
0
;
iter
<
num_iter
;
iter
++
)
{
if
(
iter
==
0
)
{
ocl
::
Kernel
k1
(
"dis_patch_inverse_search_fwd_1"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
size_t
global_sz
[]
=
{(
size_t
)
hs
*
8
};
size_t
local_sz
[]
=
{
8
};
idx
=
0
;
idx
=
k1
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
src_Ux
));
idx
=
k1
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
src_Uy
));
idx
=
k1
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0
));
idx
=
k1
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I1
));
idx
=
k1
.
set
(
idx
,
(
int
)
border_size
);
idx
=
k1
.
set
(
idx
,
(
int
)
patch_size
);
idx
=
k1
.
set
(
idx
,
(
int
)
patch_stride
);
idx
=
k1
.
set
(
idx
,
(
int
)
w
);
idx
=
k1
.
set
(
idx
,
(
int
)
h
);
idx
=
k1
.
set
(
idx
,
(
int
)
ws
);
idx
=
k1
.
set
(
idx
,
(
int
)
hs
);
idx
=
k1
.
set
(
idx
,
(
int
)
pyr_level
);
idx
=
k1
.
set
(
idx
,
ocl
::
KernelArg
::
PtrWriteOnly
(
u_Sx
));
idx
=
k1
.
set
(
idx
,
ocl
::
KernelArg
::
PtrWriteOnly
(
u_Sy
));
if
(
!
k1
.
run
(
1
,
global_sz
,
local_sz
,
false
))
return
false
;
ocl
::
Kernel
k2
(
"dis_patch_inverse_search_fwd_2"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
idx
=
0
;
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
src_Ux
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
src_Uy
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I1
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0x
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0y
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0xx_buf
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0yy_buf
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0xy_buf
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0x_buf
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0y_buf
));
idx
=
k2
.
set
(
idx
,
(
int
)
border_size
);
idx
=
k2
.
set
(
idx
,
(
int
)
patch_size
);
idx
=
k2
.
set
(
idx
,
(
int
)
patch_stride
);
idx
=
k2
.
set
(
idx
,
(
int
)
w
);
idx
=
k2
.
set
(
idx
,
(
int
)
h
);
idx
=
k2
.
set
(
idx
,
(
int
)
ws
);
idx
=
k2
.
set
(
idx
,
(
int
)
hs
);
idx
=
k2
.
set
(
idx
,
(
int
)
num_inner_iter
);
idx
=
k2
.
set
(
idx
,
(
int
)
pyr_level
);
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadWrite
(
u_Sx
));
idx
=
k2
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadWrite
(
u_Sy
));
if
(
!
k2
.
run
(
2
,
globalSize
,
localSize
,
false
))
return
false
;
}
else
{
ocl
::
Kernel
k3
(
"dis_patch_inverse_search_bwd_1"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
size_t
global_sz
[]
=
{(
size_t
)
hs
*
8
};
size_t
local_sz
[]
=
{
8
};
idx
=
0
;
idx
=
k3
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0
));
idx
=
k3
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I1
));
idx
=
k3
.
set
(
idx
,
(
int
)
border_size
);
idx
=
k3
.
set
(
idx
,
(
int
)
patch_size
);
idx
=
k3
.
set
(
idx
,
(
int
)
patch_stride
);
idx
=
k3
.
set
(
idx
,
(
int
)
w
);
idx
=
k3
.
set
(
idx
,
(
int
)
h
);
idx
=
k3
.
set
(
idx
,
(
int
)
ws
);
idx
=
k3
.
set
(
idx
,
(
int
)
hs
);
idx
=
k3
.
set
(
idx
,
(
int
)
pyr_level
);
idx
=
k3
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadWrite
(
u_Sx
));
idx
=
k3
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadWrite
(
u_Sy
));
if
(
!
k3
.
run
(
1
,
global_sz
,
local_sz
,
false
))
return
false
;
ocl
::
Kernel
k4
(
"dis_patch_inverse_search_bwd_2"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
idx
=
0
;
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I1
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0x
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
I0y
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0xx_buf
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0yy_buf
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0xy_buf
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0x_buf
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0y_buf
));
idx
=
k4
.
set
(
idx
,
(
int
)
border_size
);
idx
=
k4
.
set
(
idx
,
(
int
)
patch_size
);
idx
=
k4
.
set
(
idx
,
(
int
)
patch_stride
);
idx
=
k4
.
set
(
idx
,
(
int
)
w
);
idx
=
k4
.
set
(
idx
,
(
int
)
h
);
idx
=
k4
.
set
(
idx
,
(
int
)
ws
);
idx
=
k4
.
set
(
idx
,
(
int
)
hs
);
idx
=
k4
.
set
(
idx
,
(
int
)
num_inner_iter
);
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadWrite
(
u_Sx
));
idx
=
k4
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadWrite
(
u_Sy
));
if
(
!
k4
.
run
(
2
,
globalSize
,
localSize
,
false
))
return
false
;
}
}
return
true
;
}
bool
DISOpticalFlowImpl
::
ocl_Densification
(
UMat
&
dst_Ux
,
UMat
&
dst_Uy
,
UMat
&
src_Sx
,
UMat
&
src_Sy
,
UMat
&
_I0
,
UMat
&
_I1
)
{
size_t
globalSize
[]
=
{(
size_t
)
w
,
(
size_t
)
h
};
size_t
localSize
[]
=
{
16
,
16
};
ocl
::
Kernel
kernel
(
"dis_densification"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
kernel
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src_Sx
),
ocl
::
KernelArg
::
PtrReadOnly
(
src_Sy
),
ocl
::
KernelArg
::
PtrReadOnly
(
_I0
),
ocl
::
KernelArg
::
PtrReadOnly
(
_I1
),
(
int
)
patch_size
,
(
int
)
patch_stride
,
(
int
)
w
,
(
int
)
h
,
(
int
)
ws
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_Ux
),
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_Uy
));
return
kernel
.
run
(
2
,
globalSize
,
localSize
,
false
);
}
void
DISOpticalFlowImpl
::
ocl_prepareBuffers
(
UMat
&
I0
,
UMat
&
I1
,
UMat
&
flow
,
bool
use_flow
)
{
u_I0s
.
resize
(
coarsest_scale
+
1
);
u_I1s
.
resize
(
coarsest_scale
+
1
);
u_I1s_ext
.
resize
(
coarsest_scale
+
1
);
u_I0xs
.
resize
(
coarsest_scale
+
1
);
u_I0ys
.
resize
(
coarsest_scale
+
1
);
u_Ux
.
resize
(
coarsest_scale
+
1
);
u_Uy
.
resize
(
coarsest_scale
+
1
);
vector
<
UMat
>
flow_uv
(
2
);
if
(
use_flow
)
{
split
(
flow
,
flow_uv
);
u_initial_Ux
.
resize
(
coarsest_scale
+
1
);
u_initial_Uy
.
resize
(
coarsest_scale
+
1
);
}
int
fraction
=
1
;
int
cur_rows
=
0
,
cur_cols
=
0
;
for
(
int
i
=
0
;
i
<=
coarsest_scale
;
i
++
)
{
/* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */
if
(
i
==
finest_scale
)
{
cur_rows
=
I0
.
rows
/
fraction
;
cur_cols
=
I0
.
cols
/
fraction
;
u_I0s
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_8UC1
);
resize
(
I0
,
u_I0s
[
i
],
u_I0s
[
i
].
size
(),
0.0
,
0.0
,
INTER_AREA
);
u_I1s
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_8UC1
);
resize
(
I1
,
u_I1s
[
i
],
u_I1s
[
i
].
size
(),
0.0
,
0.0
,
INTER_AREA
);
/* These buffers are reused in each scale so we initialize them once on the finest scale: */
u_Sx
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_Sy
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0xx_buf
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0yy_buf
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0xy_buf
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0x_buf
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0y_buf
.
create
(
cur_rows
/
patch_stride
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0xx_buf_aux
.
create
(
cur_rows
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0yy_buf_aux
.
create
(
cur_rows
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0xy_buf_aux
.
create
(
cur_rows
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0x_buf_aux
.
create
(
cur_rows
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_I0y_buf_aux
.
create
(
cur_rows
,
cur_cols
/
patch_stride
,
CV_32FC1
);
u_U
.
create
(
cur_rows
,
cur_cols
,
CV_32FC2
);
}
else
if
(
i
>
finest_scale
)
{
cur_rows
=
u_I0s
[
i
-
1
].
rows
/
2
;
cur_cols
=
u_I0s
[
i
-
1
].
cols
/
2
;
u_I0s
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_8UC1
);
resize
(
u_I0s
[
i
-
1
],
u_I0s
[
i
],
u_I0s
[
i
].
size
(),
0.0
,
0.0
,
INTER_AREA
);
u_I1s
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_8UC1
);
resize
(
u_I1s
[
i
-
1
],
u_I1s
[
i
],
u_I1s
[
i
].
size
(),
0.0
,
0.0
,
INTER_AREA
);
}
if
(
i
>=
finest_scale
)
{
u_I1s_ext
[
i
].
create
(
cur_rows
+
2
*
border_size
,
cur_cols
+
2
*
border_size
,
CV_8UC1
);
copyMakeBorder
(
u_I1s
[
i
],
u_I1s_ext
[
i
],
border_size
,
border_size
,
border_size
,
border_size
,
BORDER_REPLICATE
);
u_I0xs
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_16SC1
);
u_I0ys
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_16SC1
);
spatialGradient
(
u_I0s
[
i
],
u_I0xs
[
i
],
u_I0ys
[
i
]);
u_Ux
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_32FC1
);
u_Uy
[
i
].
create
(
cur_rows
,
cur_cols
,
CV_32FC1
);
variational_refinement_processors
[
i
]
->
setAlpha
(
variational_refinement_alpha
);
variational_refinement_processors
[
i
]
->
setDelta
(
variational_refinement_delta
);
variational_refinement_processors
[
i
]
->
setGamma
(
variational_refinement_gamma
);
variational_refinement_processors
[
i
]
->
setSorIterations
(
5
);
variational_refinement_processors
[
i
]
->
setFixedPointIterations
(
variational_refinement_iter
);
if
(
use_flow
)
{
resize
(
flow_uv
[
0
],
u_initial_Ux
[
i
],
Size
(
cur_cols
,
cur_rows
));
divide
(
u_initial_Ux
[
i
],
static_cast
<
float
>
(
fraction
),
u_initial_Ux
[
i
]);
resize
(
flow_uv
[
1
],
u_initial_Uy
[
i
],
Size
(
cur_cols
,
cur_rows
));
divide
(
u_initial_Uy
[
i
],
static_cast
<
float
>
(
fraction
),
u_initial_Uy
[
i
]);
}
}
fraction
*=
2
;
}
}
bool
DISOpticalFlowImpl
::
ocl_precomputeStructureTensor
(
UMat
&
dst_I0xx
,
UMat
&
dst_I0yy
,
UMat
&
dst_I0xy
,
UMat
&
dst_I0x
,
UMat
&
dst_I0y
,
UMat
&
I0x
,
UMat
&
I0y
)
{
size_t
globalSizeX
[]
=
{(
size_t
)
h
};
size_t
localSizeX
[]
=
{
16
};
ocl
::
Kernel
kernelX
(
"dis_precomputeStructureTensor_hor"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
kernelX
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
I0x
),
ocl
::
KernelArg
::
PtrReadOnly
(
I0y
),
(
int
)
patch_size
,
(
int
)
patch_stride
,
(
int
)
w
,
(
int
)
h
,
(
int
)
ws
,
ocl
::
KernelArg
::
PtrWriteOnly
(
u_I0xx_buf_aux
),
ocl
::
KernelArg
::
PtrWriteOnly
(
u_I0yy_buf_aux
),
ocl
::
KernelArg
::
PtrWriteOnly
(
u_I0xy_buf_aux
),
ocl
::
KernelArg
::
PtrWriteOnly
(
u_I0x_buf_aux
),
ocl
::
KernelArg
::
PtrWriteOnly
(
u_I0y_buf_aux
));
if
(
!
kernelX
.
run
(
1
,
globalSizeX
,
localSizeX
,
false
))
return
false
;
size_t
globalSizeY
[]
=
{(
size_t
)
ws
};
size_t
localSizeY
[]
=
{
16
};
ocl
::
Kernel
kernelY
(
"dis_precomputeStructureTensor_ver"
,
ocl
::
optflow
::
dis_flow_oclsrc
);
kernelY
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0xx_buf_aux
),
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0yy_buf_aux
),
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0xy_buf_aux
),
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0x_buf_aux
),
ocl
::
KernelArg
::
PtrReadOnly
(
u_I0y_buf_aux
),
(
int
)
patch_size
,
(
int
)
patch_stride
,
(
int
)
w
,
(
int
)
h
,
(
int
)
ws
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_I0xx
),
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_I0yy
),
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_I0xy
),
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_I0x
),
ocl
::
KernelArg
::
PtrWriteOnly
(
dst_I0y
));
return
kernelY
.
run
(
1
,
globalSizeY
,
localSizeY
,
false
);
}
bool
DISOpticalFlowImpl
::
ocl_calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
)
{
UMat
I0Mat
=
I0
.
getUMat
();
UMat
I1Mat
=
I1
.
getUMat
();
bool
use_input_flow
=
false
;
if
(
flow
.
sameSize
(
I0
)
&&
flow
.
depth
()
==
CV_32F
&&
flow
.
channels
()
==
2
)
use_input_flow
=
true
;
else
flow
.
create
(
I1Mat
.
size
(),
CV_32FC2
);
UMat
&
u_flowMat
=
flow
.
getUMatRef
();
coarsest_scale
=
(
int
)(
log
((
2
*
I0Mat
.
cols
)
/
(
4.0
*
patch_size
))
/
log
(
2.0
)
+
0.5
)
-
1
;
ocl_prepareBuffers
(
I0Mat
,
I1Mat
,
u_flowMat
,
use_input_flow
);
u_Ux
[
coarsest_scale
].
setTo
(
0.0
f
);
u_Uy
[
coarsest_scale
].
setTo
(
0.0
f
);
for
(
int
i
=
coarsest_scale
;
i
>=
finest_scale
;
i
--
)
{
w
=
u_I0s
[
i
].
cols
;
h
=
u_I0s
[
i
].
rows
;
ws
=
1
+
(
w
-
patch_size
)
/
patch_stride
;
hs
=
1
+
(
h
-
patch_size
)
/
patch_stride
;
if
(
!
ocl_precomputeStructureTensor
(
u_I0xx_buf
,
u_I0yy_buf
,
u_I0xy_buf
,
u_I0x_buf
,
u_I0y_buf
,
u_I0xs
[
i
],
u_I0ys
[
i
]))
return
false
;
if
(
!
ocl_PatchInverseSearch
(
u_Ux
[
i
],
u_Uy
[
i
],
u_I0s
[
i
],
u_I1s_ext
[
i
],
u_I0xs
[
i
],
u_I0ys
[
i
],
2
,
i
))
return
false
;
if
(
!
ocl_Densification
(
u_Ux
[
i
],
u_Uy
[
i
],
u_Sx
,
u_Sy
,
u_I0s
[
i
],
u_I1s
[
i
]))
return
false
;
if
(
variational_refinement_iter
>
0
)
variational_refinement_processors
[
i
]
->
calcUV
(
u_I0s
[
i
],
u_I1s
[
i
],
u_Ux
[
i
].
getMat
(
ACCESS_WRITE
),
u_Uy
[
i
].
getMat
(
ACCESS_WRITE
));
if
(
i
>
finest_scale
)
{
resize
(
u_Ux
[
i
],
u_Ux
[
i
-
1
],
u_Ux
[
i
-
1
].
size
());
resize
(
u_Uy
[
i
],
u_Uy
[
i
-
1
],
u_Uy
[
i
-
1
].
size
());
multiply
(
u_Ux
[
i
-
1
],
2
,
u_Ux
[
i
-
1
]);
multiply
(
u_Uy
[
i
-
1
],
2
,
u_Uy
[
i
-
1
]);
}
}
vector
<
UMat
>
uxy
(
2
);
uxy
[
0
]
=
u_Ux
[
finest_scale
];
uxy
[
1
]
=
u_Uy
[
finest_scale
];
merge
(
uxy
,
u_U
);
resize
(
u_U
,
u_flowMat
,
u_flowMat
.
size
());
multiply
(
u_flowMat
,
1
<<
finest_scale
,
u_flowMat
);
return
true
;
}
#endif
void
DISOpticalFlowImpl
::
calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
)
{
CV_Assert
(
!
I0
.
empty
()
&&
I0
.
depth
()
==
CV_8U
&&
I0
.
channels
()
==
1
);
...
...
@@ -1012,6 +1371,10 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
CV_Assert
(
I0
.
isContinuous
());
CV_Assert
(
I1
.
isContinuous
());
CV_OCL_RUN
(
ocl
::
Device
::
getDefault
().
isIntel
()
&&
flow
.
isUMat
()
&&
(
patch_size
==
8
)
&&
(
use_spatial_propagation
==
true
),
ocl_calc
(
I0
,
I1
,
flow
));
Mat
I0Mat
=
I0
.
getMat
();
Mat
I1Mat
=
I1
.
getMat
();
bool
use_input_flow
=
false
;
...
...
@@ -1019,7 +1382,7 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
use_input_flow
=
true
;
else
flow
.
create
(
I1Mat
.
size
(),
CV_32FC2
);
Mat
&
flowMat
=
flow
.
getMatRef
();
Mat
flowMat
=
flow
.
getMat
();
coarsest_scale
=
(
int
)(
log
((
2
*
I0Mat
.
cols
)
/
(
4.0
*
patch_size
))
/
log
(
2.0
)
+
0.5
)
-
1
;
int
num_stripes
=
getNumThreads
();
...
...
@@ -1088,6 +1451,25 @@ void DISOpticalFlowImpl::collectGarbage()
I0yy_buf_aux
.
release
();
I0xy_buf_aux
.
release
();
#ifdef HAVE_OPENCL
u_I0s
.
clear
();
u_I1s
.
clear
();
u_I1s_ext
.
clear
();
u_I0xs
.
clear
();
u_I0ys
.
clear
();
u_Ux
.
clear
();
u_Uy
.
clear
();
u_U
.
release
();
u_Sx
.
release
();
u_Sy
.
release
();
u_I0xx_buf
.
release
();
u_I0yy_buf
.
release
();
u_I0xy_buf
.
release
();
u_I0xx_buf_aux
.
release
();
u_I0yy_buf_aux
.
release
();
u_I0xy_buf_aux
.
release
();
#endif
for
(
int
i
=
finest_scale
;
i
<=
coarsest_scale
;
i
++
)
variational_refinement_processors
[
i
]
->
collectGarbage
();
variational_refinement_processors
.
clear
();
...
...
modules/optflow/src/opencl/dis_flow.cl
0 → 100644
View file @
05eb9f35
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
#
define
EPS
0.001f
#
define
INF
1E+10F
__kernel
void
dis_precomputeStructureTensor_hor
(
__global
const
short
*I0x,
__global
const
short
*I0y,
int
patch_size,
int
patch_stride,
int
w,
int
h,
int
ws,
__global
float
*I0xx_aux_ptr,
__global
float
*I0yy_aux_ptr,
__global
float
*I0xy_aux_ptr,
__global
float
*I0x_aux_ptr,
__global
float
*I0y_aux_ptr
)
{
int
i
=
get_global_id
(
0
)
;
if
(
i
>=
h
)
return
;
const
__global
short
*x_row
=
I0x
+
i
*
w
;
const
__global
short
*y_row
=
I0y
+
i
*
w
;
float
sum_xx
=
0.0f,
sum_yy
=
0.0f,
sum_xy
=
0.0f,
sum_x
=
0.0f,
sum_y
=
0.0f
;
float8
x_vec
=
convert_float8
(
vload8
(
0
,
x_row
))
;
float8
y_vec
=
convert_float8
(
vload8
(
0
,
y_row
))
;
sum_xx
=
dot
(
x_vec.lo,
x_vec.lo
)
+
dot
(
x_vec.hi,
x_vec.hi
)
;
sum_yy
=
dot
(
y_vec.lo,
y_vec.lo
)
+
dot
(
y_vec.hi,
y_vec.hi
)
;
sum_xy
=
dot
(
x_vec.lo,
y_vec.lo
)
+
dot
(
x_vec.hi,
y_vec.hi
)
;
sum_x
=
dot
(
x_vec.lo,
1.0f
)
+
dot
(
x_vec.hi,
1.0f
)
;
sum_y
=
dot
(
y_vec.lo,
1.0f
)
+
dot
(
y_vec.hi,
1.0f
)
;
I0xx_aux_ptr[i
*
ws]
=
sum_xx
;
I0yy_aux_ptr[i
*
ws]
=
sum_yy
;
I0xy_aux_ptr[i
*
ws]
=
sum_xy
;
I0x_aux_ptr[i
*
ws]
=
sum_x
;
I0y_aux_ptr[i
*
ws]
=
sum_y
;
int
js
=
1
;
for
(
int
j
=
patch_size
; j < w; j++)
{
short
x_val1
=
x_row[j]
;
short
x_val2
=
x_row[j
-
patch_size]
;
short
y_val1
=
y_row[j]
;
short
y_val2
=
y_row[j
-
patch_size]
;
sum_xx
+=
(
x_val1
*
x_val1
-
x_val2
*
x_val2
)
;
sum_yy
+=
(
y_val1
*
y_val1
-
y_val2
*
y_val2
)
;
sum_xy
+=
(
x_val1
*
y_val1
-
x_val2
*
y_val2
)
;
sum_x
+=
(
x_val1
-
x_val2
)
;
sum_y
+=
(
y_val1
-
y_val2
)
;
if
((
j
-
patch_size
+
1
)
%
patch_stride
==
0
)
{
int
index
=
i
*
ws
+
js
;
I0xx_aux_ptr[index]
=
sum_xx
;
I0yy_aux_ptr[index]
=
sum_yy
;
I0xy_aux_ptr[index]
=
sum_xy
;
I0x_aux_ptr[index]
=
sum_x
;
I0y_aux_ptr[index]
=
sum_y
;
js++
;
}
}
}
__kernel
void
dis_precomputeStructureTensor_ver
(
__global
const
float
*I0xx_aux_ptr,
__global
const
float
*I0yy_aux_ptr,
__global
const
float
*I0xy_aux_ptr,
__global
const
float
*I0x_aux_ptr,
__global
const
float
*I0y_aux_ptr,
int
patch_size,
int
patch_stride,
int
w,
int
h,
int
ws,
__global
float
*I0xx_ptr,
__global
float
*I0yy_ptr,
__global
float
*I0xy_ptr,
__global
float
*I0x_ptr,
__global
float
*I0y_ptr
)
{
int
j
=
get_global_id
(
0
)
;
if
(
j
>=
ws
)
return
;
float
sum_xx,
sum_yy,
sum_xy,
sum_x,
sum_y
;
sum_xx
=
sum_yy
=
sum_xy
=
sum_x
=
sum_y
=
0.0f
;
for
(
int
i
=
0
; i < patch_size; i++)
{
sum_xx
+=
I0xx_aux_ptr[i
*
ws
+
j]
;
sum_yy
+=
I0yy_aux_ptr[i
*
ws
+
j]
;
sum_xy
+=
I0xy_aux_ptr[i
*
ws
+
j]
;
sum_x
+=
I0x_aux_ptr[i
*
ws
+
j]
;
sum_y
+=
I0y_aux_ptr[i
*
ws
+
j]
;
}
I0xx_ptr[j]
=
sum_xx
;
I0yy_ptr[j]
=
sum_yy
;
I0xy_ptr[j]
=
sum_xy
;
I0x_ptr[j]
=
sum_x
;
I0y_ptr[j]
=
sum_y
;
int
is
=
1
;
for
(
int
i
=
patch_size
; i < h; i++)
{
sum_xx
+=
(
I0xx_aux_ptr[i
*
ws
+
j]
-
I0xx_aux_ptr[
(
i
-
patch_size
)
*
ws
+
j]
)
;
sum_yy
+=
(
I0yy_aux_ptr[i
*
ws
+
j]
-
I0yy_aux_ptr[
(
i
-
patch_size
)
*
ws
+
j]
)
;
sum_xy
+=
(
I0xy_aux_ptr[i
*
ws
+
j]
-
I0xy_aux_ptr[
(
i
-
patch_size
)
*
ws
+
j]
)
;
sum_x
+=
(
I0x_aux_ptr[i
*
ws
+
j]
-
I0x_aux_ptr[
(
i
-
patch_size
)
*
ws
+
j]
)
;
sum_y
+=
(
I0y_aux_ptr[i
*
ws
+
j]
-
I0y_aux_ptr[
(
i
-
patch_size
)
*
ws
+
j]
)
;
if
((
i
-
patch_size
+
1
)
%
patch_stride
==
0
)
{
I0xx_ptr[is
*
ws
+
j]
=
sum_xx
;
I0yy_ptr[is
*
ws
+
j]
=
sum_yy
;
I0xy_ptr[is
*
ws
+
j]
=
sum_xy
;
I0x_ptr[is
*
ws
+
j]
=
sum_x
;
I0y_ptr[is
*
ws
+
j]
=
sum_y
;
is++
;
}
}
}
__kernel
void
dis_densification
(
__global
const
float
*sx,
__global
const
float
*sy,
__global
const
uchar
*i0,
__global
const
uchar
*i1,
int
psz,
int
pstr,
int
w,
int
h,
int
ws,
__global
float
*ux,
__global
float
*uy
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
i,
j
;
if
(
x
>=
w
|
| y >= h) return;
int start_is, end_is;
int start_js, end_js;
end_is = min(y / pstr, (h - psz) / pstr);
start_is = max(0, y - psz + pstr) / pstr;
start_is = min(start_is, end_is);
end_js = min(x / pstr, (w - psz) / pstr);
start_js = max(0, x - psz + pstr) / pstr;
start_js = min(start_js, end_js);
float coef, sum_coef = 0.0f;
float sum_Ux = 0.0f;
float sum_Uy = 0.0f;
int i_l, i_u;
int j_l, j_u;
float i_m, j_m, diff;
i = y;
j = x;
/* Iterate through all the patches that overlap the current location (i,j) */
for (int is = start_is; is <= end_is; is++)
for (int js = start_js; js <= end_js; js++)
{
float sx_val = sx[is * ws + js];
float sy_val = sy[is * ws + js];
uchar2 i1_vec1, i1_vec2;
j_m = min(max(j + sx_val, 0.0f), w - 1.0f - EPS);
i_m = min(max(i + sy_val, 0.0f), h - 1.0f - EPS);
j_l = (int)j_m;
j_u = j_l + 1;
i_l = (int)i_m;
i_u = i_l + 1;
i1_vec1 = vload2(0, i1 + i_u * w + j_l);
i1_vec2 = vload2(0, i1 + i_l * w + j_l);
diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y +
(j_u - j_m) * (i_m - i_l) * i1_vec1.x +
(j_m - j_l) * (i_u - i_m) * i1_vec2.y +
(j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j];
coef = 1 / max(1.0f, fabs(diff));
sum_Ux += coef * sx_val;
sum_Uy += coef * sy_val;
sum_coef += coef;
}
ux[i * w + j] = sum_Ux / sum_coef;
uy[i * w + j] = sum_Uy / sum_coef;
}
#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \
j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \
\
w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \
w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \
w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \
w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1);
float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
int I0_stride, int I1_stride,
float w00, float w01, float w10, float w11, int patch_sz, int i)
{
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
int n = patch_sz * patch_sz;
uchar8 I1_vec1, I1_vec2, I0_vec;
uchar I1_val1, I1_val2;
I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = vload8(0, I1_ptr + i * I1_stride);
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = I1_ptr[i * I1_stride + 8];
I1_val2 = I1_ptr[(i + 1) * I1_stride + 8];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
sum_diff = sub_group_reduce_add(sum_diff);
sum_diff_sq = sub_group_reduce_add(sum_diff_sq);
return sum_diff_sq - sum_diff * sum_diff / n;
}
__kernel void dis_patch_inverse_search_fwd_1(__global const float *Ux_ptr, __global const float *Uy_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int id = get_global_id(0);
int is = id / 8;
if (id >= (hs * 8)) return;
int i = is * patch_stride;
int j = 0;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float i_I1, j_I1, w00, w01, w10, w11;
float prev_Ux = Ux_ptr[(i + psz2) * w + j + psz2];
float prev_Uy = Uy_ptr[(i + psz2) * w + j + psz2];
Sx_ptr[is * ws] = prev_Ux;
Sy_ptr[is * ws] = prev_Uy;
j += patch_stride;
int sid = get_sub_group_local_id();
for (int js = 1; js < ws; js++, j += patch_stride)
{
float min_SSD, cur_SSD;
float Ux = Ux_ptr[(i + psz2) * w + j + psz2];
float Uy = Uy_ptr[(i + psz2) * w + j + psz2];
INIT_BILINEAR_WEIGHTS(Ux, Uy);
min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
INIT_BILINEAR_WEIGHTS(prev_Ux, prev_Uy);
cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
if (cur_SSD < min_SSD)
{
Ux = prev_Ux;
Uy = prev_Uy;
}
prev_Ux = Ux;
prev_Uy = Uy;
Sx_ptr[is * ws + js] = Ux;
Sy_ptr[is * ws + js] = Uy;
}
}
float3 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
const __global short *I0x_ptr, const __global short *I0y_ptr,
int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz, float x_grad_sum, float y_grad_sum)
{
float sum_diff = 0.0, sum_diff_sq = 0.0;
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
int n = patch_sz * patch_sz;
uchar8 I1_vec1, I1_vec2;
uchar I1_val1, I1_val2;
for (int i = 0; i < 8; i++)
{
uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = (i == 0) ? vload8(0, I1_ptr + i * I1_stride) : I1_vec2;
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = (i == 0) ? I1_ptr[i * I1_stride + patch_sz] : I1_val2;
I1_val2 = I1_ptr[(i + 1) * I1_stride + patch_sz];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride);
short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride);
sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo));
sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi));
sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo));
sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi));
}
float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n;
float SSD = sum_diff_sq - sum_diff * sum_diff / n;
return (float3)(SSD, dst_dUx, dst_dUy);
}
__kernel void dis_patch_inverse_search_fwd_2(__global const float *Ux_ptr, __global const float *Uy_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int num_inner_iter, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
int i = is * patch_stride;
int j = js * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
int index = is * ws + js;
if (js >= ws || is >= hs) return;
float Ux = Sx_ptr[index];
float Uy = Sy_ptr[index];
float cur_Ux = Ux;
float cur_Uy = Uy;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
if (fabs(detH) < EPS) detH = EPS;
float invH11 = cur_yy / detH;
float invH12 = -cur_xy / detH;
float invH22 = cur_xx / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
float3 res;
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
res = processPatchMeanNorm(I0_ptr + i * w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j,
I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
SSD = res.x;
dUx = res.y;
dUy = res.z;
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy);
if (dot(vec, vec) <= (float)(psz * psz))
{
Sx_ptr[index] = cur_Ux;
Sy_ptr[index] = cur_Uy;
}
}
__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int id = get_global_id(0);
int is = id / 8;
if (id >= (hs * 8)) return;
is = (hs - 1 - is);
int i = is * patch_stride;
int j = (ws - 2) * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float i_I1, j_I1, w00, w01, w10, w11;
int sid = get_sub_group_local_id();
for (int js = (ws - 2); js > -1; js--, j -= patch_stride)
{
float min_SSD, cur_SSD;
float2 Ux = vload2(0, Sx_ptr + is * ws + js);
float2 Uy = vload2(0, Sy_ptr + is * ws + js);
INIT_BILINEAR_WEIGHTS(Ux.x, Uy.x);
min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
INIT_BILINEAR_WEIGHTS(Ux.y, Uy.y);
cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
if (cur_SSD < min_SSD)
{
Sx_ptr[is * ws + js] = Ux.y;
Sy_ptr[is * ws + js] = Uy.y;
}
}
}
__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int num_inner_iter,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
if (js >= ws |
|
is
>=
hs
)
return
;
js
=
(
ws
-
1
-
js
)
;
is
=
(
hs
-
1
-
is
)
;
int
j
=
js
*
patch_stride
;
int
i
=
is
*
patch_stride
;
int
psz
=
patch_size
;
int
psz2
=
psz
/
2
;
int
w_ext
=
w
+
2
*
border_size
;
int
bsz
=
border_size
;
int
index
=
is
*
ws
+
js
;
float
Ux
=
Sx_ptr[index]
;
float
Uy
=
Sy_ptr[index]
;
float
cur_Ux
=
Ux
;
float
cur_Uy
=
Uy
;
float
cur_xx
=
xx_ptr[index]
;
float
cur_yy
=
yy_ptr[index]
;
float
cur_xy
=
xy_ptr[index]
;
float
detH
=
cur_xx
*
cur_yy
-
cur_xy
*
cur_xy
;
if
(
fabs
(
detH
)
<
EPS
)
detH
=
EPS
;
float
invH11
=
cur_yy
/
detH
;
float
invH12
=
-cur_xy
/
detH
;
float
invH22
=
cur_xx
/
detH
;
float
prev_SSD
=
INF,
SSD
;
float
x_grad_sum
=
x_ptr[index]
;
float
y_grad_sum
=
y_ptr[index]
;
float
i_lower_limit
=
bsz
-
psz
+
1.0f
;
float
i_upper_limit
=
bsz
+
h
-
1.0f
;
float
j_lower_limit
=
bsz
-
psz
+
1.0f
;
float
j_upper_limit
=
bsz
+
w
-
1.0f
;
float
dUx,
dUy,
i_I1,
j_I1,
w00,
w01,
w10,
w11,
dx,
dy
;
float3
res
;
for
(
int
t
=
0
; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS
(
cur_Ux,
cur_Uy
)
;
res
=
processPatchMeanNorm
(
I0_ptr
+
i
*
w
+
j,
I1_ptr
+
(
int
)
i_I1
*
w_ext
+
(
int
)
j_I1,
I0x_ptr
+
i
*
w
+
j,
I0y_ptr
+
i
*
w
+
j,
w,
w_ext,
w00,
w01,
w10,
w11,
psz,
x_grad_sum,
y_grad_sum
)
;
SSD
=
res.x
;
dUx
=
res.y
;
dUy
=
res.z
;
dx
=
invH11
*
dUx
+
invH12
*
dUy
;
dy
=
invH12
*
dUx
+
invH22
*
dUy
;
cur_Ux
-=
dx
;
cur_Uy
-=
dy
;
if
(
SSD
>=
prev_SSD
)
break
;
prev_SSD
=
SSD
;
}
float2
vec
=
(
float2
)(
cur_Ux
-
Ux,
cur_Uy
-
Uy
)
;
if
((
dot
(
vec,
vec
))
<=
(
float
)(
psz
*
psz
))
{
Sx_ptr[index]
=
cur_Ux
;
Sy_ptr[index]
=
cur_Uy
;
}
}
modules/optflow/src/precomp.hpp
View file @
05eb9f35
...
...
@@ -45,6 +45,8 @@ the use of this software, even if advised of the possibility of such damage.
#include <opencv2/optflow.hpp>
#include <opencv2/video.hpp>
#include <opencv2/imgproc.hpp>
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#include <algorithm>
...
...
modules/optflow/test/ocl/test_dis.cpp
0 → 100644
View file @
05eb9f35
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
using
namespace
cv
;
using
namespace
optflow
;
namespace
cvtest
{
namespace
ocl
{
PARAM_TEST_CASE
(
OCL_DenseOpticalFlow_DIS
,
int
)
{
int
preset
;
virtual
void
SetUp
()
{
preset
=
GET_PARAM
(
0
);
}
};
OCL_TEST_P
(
OCL_DenseOpticalFlow_DIS
,
Mat
)
{
Mat
frame1
,
frame2
,
GT
;
frame1
=
imread
(
TS
::
ptr
()
->
get_data_path
()
+
"optflow/RubberWhale1.png"
);
frame2
=
imread
(
TS
::
ptr
()
->
get_data_path
()
+
"optflow/RubberWhale2.png"
);
CV_Assert
(
!
frame1
.
empty
()
&&
!
frame2
.
empty
());
cvtColor
(
frame1
,
frame1
,
COLOR_BGR2GRAY
);
cvtColor
(
frame2
,
frame2
,
COLOR_BGR2GRAY
);
Ptr
<
DenseOpticalFlow
>
algo
;
// iterate over presets:
for
(
int
i
=
0
;
i
<
test_loop_times
;
i
++
)
{
Mat
flow
;
UMat
ocl_flow
;
algo
=
createOptFlow_DIS
(
preset
);
OCL_OFF
(
algo
->
calc
(
frame1
,
frame2
,
flow
));
OCL_ON
(
algo
->
calc
(
frame1
,
frame2
,
ocl_flow
));
ASSERT_EQ
(
flow
.
rows
,
ocl_flow
.
rows
);
ASSERT_EQ
(
flow
.
cols
,
ocl_flow
.
cols
);
EXPECT_MAT_SIMILAR
(
flow
,
ocl_flow
,
6e-3
);
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
Contrib
,
OCL_DenseOpticalFlow_DIS
,
Values
(
DISOpticalFlow
::
PRESET_ULTRAFAST
,
DISOpticalFlow
::
PRESET_FAST
,
DISOpticalFlow
::
PRESET_MEDIUM
));
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment