Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
54ea5bba
Commit
54ea5bba
authored
Nov 13, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
Nov 13, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1779 from perping:integral_2.4
parents
28e0d3d7
18505995
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
200 additions
and
109 deletions
+200
-109
image_processing.rst
modules/ocl/doc/image_processing.rst
+4
-4
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+4
-4
perf_match_template.cpp
modules/ocl/perf/perf_match_template.cpp
+2
-2
haar.cpp
modules/ocl/src/haar.cpp
+43
-4
imgproc.cpp
modules/ocl/src/imgproc.cpp
+34
-15
match_template.cpp
modules/ocl/src/match_template.cpp
+18
-6
imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+75
-66
test_imgproc.cpp
modules/ocl/test/test_imgproc.cpp
+20
-8
No files found.
modules/ocl/doc/image_processing.rst
View file @
54ea5bba
...
...
@@ -65,15 +65,15 @@ ocl::integral
-----------------
Computes an integral image.
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum
, int sdepth=-1
)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum
, int sdepth=-1
)
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
:param sum: Integral image containing 32-bit unsigned integer
values packed into ``CV_32SC1``
.
:param sum: Integral image containing 32-bit unsigned integer
or 32-bit floating-point
.
:param sqsum: Sqsum values is ``CV_32FC1`` type.
:param sqsum: Sqsum values is ``CV_32FC1``
or ``CV_64FC1``
type.
.. seealso:: :ocv:func:`integral`
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
54ea5bba
...
...
@@ -861,10 +861,10 @@ namespace cv
CV_EXPORTS
void
warpPerspective
(
const
oclMat
&
src
,
oclMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
=
INTER_LINEAR
);
//! computes the integral image and integral for the squared image
// sum will
have CV_32S type, sqsum - CV32F type
// sum will
support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
// supports only CV_8UC1 source type
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
,
int
sdepth
=-
1
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
int
sdepth
=-
1
);
CV_EXPORTS
void
cornerHarris
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
CV_EXPORTS
void
cornerHarris_dxdy
(
const
oclMat
&
src
,
oclMat
&
dst
,
oclMat
&
Dx
,
oclMat
&
Dy
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
...
...
@@ -939,7 +939,7 @@ namespace cv
Size
m_maxSize
;
vector
<
CvSize
>
sizev
;
vector
<
float
>
scalev
;
oclMat
gimg1
,
gsum
,
gsqsum
;
oclMat
gimg1
,
gsum
,
gsqsum
,
gsqsum_t
;
void
*
buffers
;
};
...
...
modules/ocl/perf/perf_match_template.cpp
View file @
54ea5bba
...
...
@@ -108,13 +108,13 @@ PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate, OCL_TYPICAL_MAT_SIZES)
oclDst
.
download
(
dst
);
SANITY_CHECK
(
dst
,
2
e-2
);
SANITY_CHECK
(
dst
,
3
e-2
);
}
else
if
(
RUN_PLAIN_IMPL
)
{
TEST_CYCLE
()
cv
::
matchTemplate
(
src
,
templ
,
dst
,
CV_TM_CCORR_NORMED
);
SANITY_CHECK
(
dst
,
2
e-2
);
SANITY_CHECK
(
dst
,
3
e-2
);
}
else
OCL_PERF_ELSE
...
...
modules/ocl/src/haar.cpp
View file @
54ea5bba
...
...
@@ -747,6 +747,15 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
oclMat
gsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32SC1
);
oclMat
gsqsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32FC1
);
int
sdepth
=
0
;
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
sdepth
=
CV_64FC1
;
else
sdepth
=
CV_32FC1
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
oclMat
gsqsum_t
(
totalheight
+
4
,
gimg
.
cols
+
1
,
type
);
cl_mem
stagebuffer
;
cl_mem
nodebuffer
;
cl_mem
candidatebuffer
;
...
...
@@ -754,6 +763,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
cv
::
Rect
roi
,
roi2
;
cv
::
Mat
imgroi
,
imgroisq
;
cv
::
ocl
::
oclMat
resizeroi
,
gimgroi
,
gimgroisq
;
int
grp_per_CU
=
12
;
size_t
blocksize
=
8
;
...
...
@@ -773,7 +783,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
roi2
=
Rect
(
0
,
0
,
sz
.
width
-
1
,
sz
.
height
-
1
);
resizeroi
=
gimg1
(
roi2
);
gimgroi
=
gsum
(
roi
);
gimgroisq
=
gsqsum
(
roi
);
gimgroisq
=
gsqsum
_t
(
roi
);
int
width
=
gimgroi
.
cols
-
1
-
cascade
->
orig_window_size
.
width
;
int
height
=
gimgroi
.
rows
-
1
-
cascade
->
orig_window_size
.
height
;
scaleinfo
[
i
].
width_height
=
(
width
<<
16
)
|
height
;
...
...
@@ -787,8 +797,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
scaleinfo
[
i
].
factor
=
factor
;
cv
::
ocl
::
resize
(
gimg
,
resizeroi
,
Size
(
sz
.
width
-
1
,
sz
.
height
-
1
),
0
,
0
,
INTER_LINEAR
);
cv
::
ocl
::
integral
(
resizeroi
,
gimgroi
,
gimgroisq
);
indexy
+=
sz
.
height
;
}
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
...
...
@@ -996,7 +1011,12 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
int
n_factors
=
0
;
oclMat
gsum
;
oclMat
gsqsum
;
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
oclMat
gsqsum_t
;
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum_t
);
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
CvSize
sz
;
vector
<
CvSize
>
sizev
;
vector
<
float
>
scalev
;
...
...
@@ -1271,12 +1291,16 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
roi2
=
Rect
(
0
,
0
,
sz
.
width
-
1
,
sz
.
height
-
1
);
resizeroi
=
gimg1
(
roi2
);
gimgroi
=
gsum
(
roi
);
gimgroisq
=
gsqsum
(
roi
);
gimgroisq
=
gsqsum
_t
(
roi
);
cv
::
ocl
::
resize
(
gimg
,
resizeroi
,
Size
(
sz
.
width
-
1
,
sz
.
height
-
1
),
0
,
0
,
INTER_LINEAR
);
cv
::
ocl
::
integral
(
resizeroi
,
gimgroi
,
gimgroisq
);
indexy
+=
sz
.
height
;
}
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
...
...
@@ -1338,7 +1362,11 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
}
else
{
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum_t
);
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
...
...
@@ -1564,6 +1592,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
gimg1
.
release
();
gsum
.
release
();
gsqsum
.
release
();
gsqsum_t
.
release
();
}
else
if
(
!
(
m_flags
&
CV_HAAR_SCALE_IMAGE
)
&&
(
flags
&
CV_HAAR_SCALE_IMAGE
))
{
...
...
@@ -1638,6 +1667,16 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
gsum
.
create
(
totalheight
+
4
,
cols
+
1
,
CV_32SC1
);
gsqsum
.
create
(
totalheight
+
4
,
cols
+
1
,
CV_32FC1
);
int
sdepth
=
0
;
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
sdepth
=
CV_64FC1
;
else
sdepth
=
CV_32FC1
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
gsqsum_t
.
create
(
totalheight
+
4
,
cols
+
1
,
type
);
scaleinfo
=
(
detect_piramid_info
*
)
malloc
(
sizeof
(
detect_piramid_info
)
*
loopcount
);
for
(
int
i
=
0
;
i
<
loopcount
;
i
++
)
{
...
...
modules/ocl/src/imgproc.cpp
View file @
54ea5bba
...
...
@@ -781,7 +781,7 @@ namespace cv
////////////////////////////////////////////////////////////////////////
// integral
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
)
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
,
int
sdepth
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
if
(
!
src
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
...
...
@@ -790,6 +790,11 @@ namespace cv
return
;
}
if
(
sdepth
<=
0
)
sdepth
=
CV_32S
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
int
vlen
=
4
;
int
offset
=
src
.
offset
/
vlen
;
int
pre_invalid
=
src
.
offset
%
vlen
;
...
...
@@ -797,17 +802,26 @@ namespace cv
oclMat
t_sum
,
t_sqsum
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
depth
=
src
.
depth
()
==
CV_8U
?
CV_32S
:
CV_64F
;
int
type
=
CV_MAKE_TYPE
(
depth
,
1
);
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
char
build_option
[
250
];
if
(
Context
::
getContext
()
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
))
{
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_64FC1
);
sqsum
.
create
(
h
,
w
,
CV_64FC1
);
sprintf
(
build_option
,
"-D TYPE=double -D TYPE4=double4 -D convert_TYPE4=convert_double4"
);
}
else
{
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_32FC1
);
sqsum
.
create
(
h
,
w
,
CV_32FC1
);
sprintf
(
build_option
,
"-D TYPE=float -D TYPE4=float4 -D convert_TYPE4=convert_float4"
);
}
int
sum_offset
=
sum
.
offset
/
vlen
;
int
sqsum_offset
=
sqsum
.
offset
/
vlen
;
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
int
sum_offset
=
sum
.
offset
/
sum
.
elemSize
();
int
sqsum_offset
=
sqsum
.
offset
/
sqsum
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
...
...
@@ -819,8 +833,9 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sqsum
.
step
));
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_cols"
,
gt
,
lt
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_cols"
,
gt
,
lt
,
args
,
-
1
,
sdepth
,
build_option
);
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
...
...
@@ -830,15 +845,16 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum_offset
));
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_rows"
,
gt2
,
lt2
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_rows"
,
gt2
,
lt2
,
args
,
-
1
,
sdepth
,
build_option
);
}
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
)
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
int
sdepth
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
int
vlen
=
4
;
...
...
@@ -846,10 +862,13 @@ namespace cv
int
pre_invalid
=
src
.
offset
%
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
if
(
sdepth
<=
0
)
sdepth
=
CV_32S
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
oclMat
t_sum
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
depth
=
src
.
depth
()
==
CV_8U
?
CV_32S
:
CV_32F
;
int
type
=
CV_MAKE_TYPE
(
depth
,
1
);
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
...
...
@@ -865,7 +884,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_cols"
,
gt
,
lt
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_cols"
,
gt
,
lt
,
args
,
-
1
,
s
depth
);
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
...
...
@@ -876,7 +895,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_rows"
,
gt2
,
lt2
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_rows"
,
gt2
,
lt2
,
args
,
-
1
,
s
depth
);
}
/////////////////////// corner //////////////////////////////
...
...
modules/ocl/src/match_template.cpp
View file @
54ea5bba
...
...
@@ -245,12 +245,15 @@ namespace cv
void
matchTemplate_CCORR_NORMED
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
{
cv
::
ocl
::
oclMat
temp
;
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
],
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
buf
.
image_sqsums
[
0
],
CV_32FC1
);
else
buf
.
image_sqsums
[
0
]
=
temp
;
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
Context
*
clCxt
=
image
.
clCxt
;
...
...
@@ -416,7 +419,12 @@ namespace cv
{
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
integral
(
image
,
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
cv
::
ocl
::
oclMat
temp
;
integral
(
image
,
buf
.
image_sums
[
0
],
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
buf
.
image_sqsums
[
0
],
CV_32FC1
);
else
buf
.
image_sqsums
[
0
]
=
temp
;
templ_sum
[
0
]
=
(
float
)
sum
(
templ
)[
0
];
...
...
@@ -452,10 +460,14 @@ namespace cv
templ_sum
*=
scale
;
buf
.
image_sums
.
resize
(
buf
.
images
.
size
());
buf
.
image_sqsums
.
resize
(
buf
.
images
.
size
());
cv
::
ocl
::
oclMat
temp
;
for
(
int
i
=
0
;
i
<
image
.
oclchannels
();
i
++
)
{
integral
(
buf
.
images
[
i
],
buf
.
image_sums
[
i
],
buf
.
image_sqsums
[
i
]);
integral
(
buf
.
images
[
i
],
buf
.
image_sums
[
i
],
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
buf
.
image_sqsums
[
i
],
CV_32FC1
);
else
buf
.
image_sqsums
[
i
]
=
temp
;
}
switch
(
image
.
oclchannels
())
...
...
modules/ocl/src/opencl/imgproc_integral.cl
View file @
54ea5bba
...
...
@@ -49,6 +49,9 @@
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CONVERT
(
step
)
((
step
)
>>1
)
#
else
#
define
CONVERT
(
step
)
((
step
))
#
endif
#
define
LSIZE
256
...
...
@@ -61,17 +64,17 @@
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
float
*sqsum,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
TYPE
*sqsum,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
,int
dst1_step
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
float
4
sqsum_t[2]
;
TYPE
4
sqsum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
int*
sum_p
;
__local
float
*
sqsum_p
;
__local
TYPE
*
sqsum_p
;
src_step
=
src_step
>>
2
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
...
...
@@ -80,17 +83,17 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
cols
-
1
)
]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sqsum[0][bf_loc]
=
convert_
float
4
(
src_t[0]
*
src_t[0]
)
;
lm_sqsum[0][bf_loc]
=
convert_
TYPE
4
(
src_t[0]
*
src_t[0]
)
;
lm_sum[1][bf_loc]
=
src_t[1]
;
lm_sqsum[1][bf_loc]
=
convert_
float
4
(
src_t[1]
*
src_t[1]
)
;
lm_sqsum[1][bf_loc]
=
convert_
TYPE
4
(
src_t[1]
*
src_t[1]
)
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
...
...
@@ -131,7 +134,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/
4
,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/4,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_sq0
=
gid
*
CONVERT
(
dst1_step
)
+
i
+
lid
-
1
-
pre_invalid
*
dst1_step
/
sizeof
(
TYPE
)
,
loc_sq1
=
loc_sq0
+
CONVERT
(
dst1_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
...
...
@@ -139,20 +143,20 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
sqsum[loc_s
0 + k * dst_step / 4
] = sqsum_p[k];
sqsum[loc_s
q0 + k * dst1_step / sizeof(TYPE)
] = sqsum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[1][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
sqsum[loc_s
1 + k * dst_step / 4
] = sqsum_p[k];
sqsum[loc_s
q1 + k * dst1_step / sizeof(TYPE)
] = sqsum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
...
...
@@ -160,30 +164,32 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
}
kernel void integral_rows_D4(__global int4 *srcsum,__global
float
4 * srcsqsum,__global int *sum ,
__global
float *sqsum,int rows,int cols,int src
_step,int sum_step,
kernel void integral_rows_D4(__global int4 *srcsum,__global
TYPE
4 * srcsqsum,__global int *sum ,
__global
TYPE *sqsum,int rows,int cols,int src_step,int src1
_step,int sum_step,
int sqsum_step,int sum_offset,int sqsum_offset)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
float
4 sqsrc_t[2],sqsum_t[2];
TYPE
4 sqsrc_t[2],sqsum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local
float
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local
TYPE
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
__local
float
*sqsum_p;
__local
TYPE
*sqsum_p;
src_step = src_step >> 4;
src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
gid <<= 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid
* 2
] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src
_step + gid * 2] : (float
4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid
* 2
+ 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src
_step + gid * 2 + 1] : (float
4)0;
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src
1_step + gid ] : (TYPE
4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid + 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src
1_step + gid + 1] : (TYPE
4)0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
float
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
float
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
...
...
@@ -239,17 +245,18 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
}
if(i + lid == 0)
{
int loc0 = gid
* 2
* sum_step;
int loc1 = gid
* 2 * sqsum_step
;
int loc0 = gid * sum_step;
int loc1 = gid
* CONVERT(sqsum_step)
;
for(int k = 1; k <= 8; k++)
{
if(gid *
8
+ k > cols) break;
if(gid *
4
+ k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step /
4
] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step /
sizeof(TYPE)
] = 0;
}
}
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
int loc_s0 = sum_offset + gid * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0];
...
...
@@ -257,37 +264,37 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[0][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid *
8
+ k >= cols) break;
if(gid *
4
+ k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq0 + k * sqsum_step /
4
] = sqsum_p[k];
sqsum[loc_sq0 + k * sqsum_step /
sizeof(TYPE)
] = sqsum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[1][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid *
8
+ 4 + k >= cols) break;
if(gid *
4
+ 4 + k >= cols) break;
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq1 + k * sqsum_step /
4
] = sqsum_p[k];
sqsum[loc_sq1 + k * sqsum_step /
sizeof(TYPE)
] = sqsum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
float
*sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
TYPE
*sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step
, int dst1_step
)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
float
4 sqsum_t[2];
TYPE
4 sqsum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local
float
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local
TYPE
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local float* sum_p;
__local
float
* sqsum_p;
__local
TYPE
* sqsum_p;
src_step = src_step >> 2;
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
...
...
@@ -296,17 +303,17 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
float
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
float
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sqsum[0][bf_loc] = convert_
float
4(src_t[0] * src_t[0]);
lm_sqsum[0][bf_loc] = convert_
TYPE
4(src_t[0] * src_t[0]);
lm_sum[1][bf_loc] = src_t[1];
lm_sqsum[1][bf_loc] = convert_
float
4(src_t[1] * src_t[1]);
lm_sqsum[1][bf_loc] = convert_
TYPE
4(src_t[1] * src_t[1]);
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
...
...
@@ -348,6 +355,7 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
}
barrier(CLK_LOCAL_MEM_FENCE);
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0];
...
...
@@ -355,20 +363,20 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[0][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_s
0
+
k
*
dst_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_s
q0
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_s
1
+
k
*
dst_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_s
q1
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -376,30 +384,31 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
}
kernel
void
integral_rows_D5
(
__global
float4
*srcsum,__global
float
4
*
srcsqsum,__global
float
*sum
,
__global
float
*sqsum,int
rows,int
cols,int
src_step,
int
sum_step,
kernel
void
integral_rows_D5
(
__global
float4
*srcsum,__global
TYPE
4
*
srcsqsum,__global
float
*sum
,
__global
TYPE
*sqsum,int
rows,int
cols,int
src_step,int
src1_step,
int
sum_step,
int
sqsum_step,int
sum_offset,int
sqsum_offset
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
float
4
sqsrc_t[2],sqsum_t[2]
;
TYPE
4
sqsrc_t[2],sqsum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
__local
float
*sqsum_p
;
__local
TYPE
*sqsum_p
;
src_step
=
src_step
>>
4
;
src1_step
=
(
src1_step
/
sizeof
(
TYPE
))
>>
2
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
_step
+
gid
*
2]
:
(
float
4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
1_step
+
gid
*
2]
:
(
TYPE
4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
_step
+
gid
*
2
+
1]
:
(
float
4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
1_step
+
gid
*
2
+
1]
:
(
TYPE
4
)
0
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
...
...
@@ -456,16 +465,16 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
if
(
i
+
lid
==
0
)
{
int
loc0
=
gid
*
2
*
sum_step
;
int
loc1
=
gid
*
2
*
sqsum_step
;
int
loc1
=
gid
*
2
*
CONVERT
(
sqsum_step
)
;
for
(
int
k
=
1
; k <= 8; k++)
{
if
(
gid
*
8
+
k
>
cols
)
break
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
4
]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
0
;
}
}
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
sqsum_step
+
sqsum_step
/
4
+
i
+
lid,
loc_sq1
=
loc_sq0
+
sqsum_step
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
CONVERT
(
sqsum_step
)
+
sqsum_step
/
sizeof
(
TYPE
)
+
i
+
lid,
loc_sq1
=
loc_sq0
+
CONVERT
(
sqsum_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
...
...
@@ -473,20 +482,20 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
modules/ocl/test/test_imgproc.cpp
View file @
54ea5bba
...
...
@@ -275,23 +275,33 @@ OCL_TEST_P(CornerHarris, Mat)
//////////////////////////////////integral/////////////////////////////////////////////////
typedef
ImgprocTestBase
Integral
;
struct
Integral
:
public
ImgprocTestBase
{
int
sdepth
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
blockSize
=
GET_PARAM
(
1
);
sdepth
=
GET_PARAM
(
2
);
useRoi
=
GET_PARAM
(
3
);
}
};
OCL_TEST_P
(
Integral
,
Mat1
)
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
random_roi
();
ocl
::
integral
(
gsrc_roi
,
gdst_roi
);
integral
(
src_roi
,
dst_roi
);
ocl
::
integral
(
gsrc_roi
,
gdst_roi
,
sdepth
);
integral
(
src_roi
,
dst_roi
,
sdepth
);
Near
();
}
}
// TODO wrong output type
OCL_TEST_P
(
Integral
,
DISABLED_Mat2
)
OCL_TEST_P
(
Integral
,
Mat2
)
{
Mat
dst1
;
ocl
::
oclMat
gdst1
;
...
...
@@ -300,10 +310,12 @@ OCL_TEST_P(Integral, DISABLED_Mat2)
{
random_roi
();
integral
(
src_roi
,
dst
1
,
dst_roi
);
ocl
::
integral
(
gsrc_roi
,
gdst
1
,
gdst_roi
);
integral
(
src_roi
,
dst
_roi
,
dst1
,
sdepth
);
ocl
::
integral
(
gsrc_roi
,
gdst
_roi
,
gdst1
,
sdepth
);
Near
();
if
(
gdst1
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
))
EXPECT_MAT_NEAR
(
dst1
,
Mat
(
gdst1
),
0.
);
}
}
...
...
@@ -543,7 +555,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Integral
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
// TODO does not work with CV_32F, CV_64F
Values
(
0
),
// not used
Values
(
0
),
// not used
Values
(
(
MatType
)
CV_32SC1
,
(
MatType
)
CV_32FC1
),
Bool
()));
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Threshold
,
Combine
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment