Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
50579d25
Commit
50579d25
authored
Nov 11, 2013
by
perping
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Make Integral sum support cv_32f, sqsum support cv_64f.
parent
0ac61240
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
134 additions
and
93 deletions
+134
-93
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+3
-3
imgproc.cpp
modules/ocl/src/imgproc.cpp
+35
-15
imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+76
-67
test_imgproc.cpp
modules/ocl/test/test_imgproc.cpp
+20
-8
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
50579d25
...
...
@@ -861,10 +861,10 @@ namespace cv
CV_EXPORTS
void
warpPerspective
(
const
oclMat
&
src
,
oclMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
=
INTER_LINEAR
);
//! computes the integral image and integral for the squared image
// sum will
have CV_32S type, sqsum - CV32F type
// sum will
support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
// supports only CV_8UC1 source type
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
,
int
sdepth
=-
1
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
int
sdepth
=-
1
);
CV_EXPORTS
void
cornerHarris
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
CV_EXPORTS
void
cornerHarris_dxdy
(
const
oclMat
&
src
,
oclMat
&
dst
,
oclMat
&
Dx
,
oclMat
&
Dy
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
...
...
modules/ocl/src/imgproc.cpp
View file @
50579d25
...
...
@@ -783,7 +783,7 @@ namespace cv
////////////////////////////////////////////////////////////////////////
// integral
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
)
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
,
int
sdepth
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
if
(
!
src
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
...
...
@@ -792,6 +792,12 @@ namespace cv
return
;
}
int
depth
=
src
.
depth
();
if
(
sdepth
<=
0
)
sdepth
=
CV_32S
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
int
vlen
=
4
;
int
offset
=
src
.
offset
/
vlen
;
int
pre_invalid
=
src
.
offset
%
vlen
;
...
...
@@ -799,17 +805,26 @@ namespace cv
oclMat
t_sum
,
t_sqsum
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
depth
=
src
.
depth
()
==
CV_8U
?
CV_32S
:
CV_64F
;
int
type
=
CV_MAKE_TYPE
(
depth
,
1
);
char
build_option
[
250
];
if
(
Context
::
getContext
()
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
))
{
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_64FC1
);
sqsum
.
create
(
h
,
w
,
CV_64FC1
);
sprintf
(
build_option
,
"-D TYPE=double -D TYPE4=double4 -D convert_TYPE4=convert_double4"
);
}
else
{
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_32FC1
);
sqsum
.
create
(
h
,
w
,
CV_32FC1
);
sprintf
(
build_option
,
"-D TYPE=float -D TYPE4=float4 -D convert_TYPE4=convert_float4"
);
}
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_32FC1
);
sqsum
.
create
(
h
,
w
,
CV_32FC1
);
int
sum_offset
=
sum
.
offset
/
vlen
;
int
sqsum_offset
=
sqsum
.
offset
/
vlen
;
int
sum_offset
=
sum
.
offset
/
sum
.
elemSize
();
int
sqsum_offset
=
sqsum
.
offset
/
sqsum
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
...
...
@@ -821,8 +836,9 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sqsum
.
step
));
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_cols"
,
gt
,
lt
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_cols"
,
gt
,
lt
,
args
,
-
1
,
sdepth
,
build_option
);
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
...
...
@@ -832,15 +848,16 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum_offset
));
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_rows"
,
gt2
,
lt2
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_rows"
,
gt2
,
lt2
,
args
,
-
1
,
sdepth
,
build_option
);
}
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
)
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
int
sdepth
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
int
vlen
=
4
;
...
...
@@ -848,10 +865,13 @@ namespace cv
int
pre_invalid
=
src
.
offset
%
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
if
(
sdepth
<=
0
)
sdepth
=
CV_32S
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
oclMat
t_sum
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
depth
=
src
.
depth
()
==
CV_8U
?
CV_32S
:
CV_32F
;
int
type
=
CV_MAKE_TYPE
(
depth
,
1
);
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
...
...
@@ -867,7 +887,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_cols"
,
gt
,
lt
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_cols"
,
gt
,
lt
,
args
,
-
1
,
s
depth
);
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
...
...
@@ -878,7 +898,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_rows"
,
gt2
,
lt2
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_rows"
,
gt2
,
lt2
,
args
,
-
1
,
s
depth
);
}
/////////////////////// corner //////////////////////////////
...
...
modules/ocl/src/opencl/imgproc_integral.cl
View file @
50579d25
...
...
@@ -49,6 +49,9 @@
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
define
CONVERT
(
step
)
((
step
)
>>1
)
#
else
#
define
CONVERT
(
step
)
((
step
))
#
endif
#
define
LSIZE
256
#
define
LSIZE_1
255
...
...
@@ -60,17 +63,17 @@
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
float
*sqsum,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
TYPE
*sqsum,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
,int
dst1_step
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
float
4
sqsum_t[2]
;
TYPE
4
sqsum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
int*
sum_p
;
__local
float
*
sqsum_p
;
__local
TYPE
*
sqsum_p
;
src_step
=
src_step
>>
2
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
...
...
@@ -79,17 +82,17 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
cols
-
1
)
]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sqsum[0][bf_loc]
=
convert_
float
4
(
src_t[0]
*
src_t[0]
)
;
lm_sqsum[0][bf_loc]
=
convert_
TYPE
4
(
src_t[0]
*
src_t[0]
)
;
lm_sum[1][bf_loc]
=
src_t[1]
;
lm_sqsum[1][bf_loc]
=
convert_
float
4
(
src_t[1]
*
src_t[1]
)
;
lm_sqsum[1][bf_loc]
=
convert_
TYPE
4
(
src_t[1]
*
src_t[1]
)
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
...
...
@@ -130,7 +133,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/
4
,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/4,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_sq0
=
gid
*
CONVERT
(
dst1_step
)
+
i
+
lid
-
1
-
pre_invalid
*
dst1_step
/
sizeof
(
TYPE
)
,
loc_sq1
=
loc_sq0
+
CONVERT
(
dst1_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
...
...
@@ -138,20 +142,20 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
sqsum[loc_s
0 + k * dst_step / 4
] = sqsum_p[k];
sqsum[loc_s
q0 + k * dst1_step / sizeof(TYPE)
] = sqsum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[1][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
sqsum[loc_s
1 + k * dst_step / 4
] = sqsum_p[k];
sqsum[loc_s
q1 + k * dst1_step / sizeof(TYPE)
] = sqsum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
...
...
@@ -159,30 +163,32 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
}
kernel void integral_rows_D4(__global int4 *srcsum,__global
float
4 * srcsqsum,__global int *sum ,
__global
float *sqsum,int rows,int cols,int src
_step,int sum_step,
kernel void integral_rows_D4(__global int4 *srcsum,__global
TYPE
4 * srcsqsum,__global int *sum ,
__global
TYPE *sqsum,int rows,int cols,int src_step,int src1
_step,int sum_step,
int sqsum_step,int sum_offset,int sqsum_offset)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
float
4 sqsrc_t[2],sqsum_t[2];
TYPE
4 sqsrc_t[2],sqsum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local
float
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local
TYPE
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
__local
float
*sqsum_p;
__local
TYPE
*sqsum_p;
src_step = src_step >> 4;
src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
gid <<= 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid
* 2
] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src
_step + gid * 2] : (float
4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid
* 2
+ 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src
_step + gid * 2 + 1] : (float
4)0;
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src
1_step + gid ] : (TYPE
4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid + 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src
1_step + gid + 1] : (TYPE
4)0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
float
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
float
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
...
...
@@ -238,17 +244,18 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
}
if(i + lid == 0)
{
int loc0 = gid
* 2
* sum_step;
int loc1 = gid
* 2 * sqsum_step
;
int loc0 = gid * sum_step;
int loc1 = gid
* CONVERT(sqsum_step)
;
for(int k = 1; k <= 8; k++)
{
if(gid *
8
+ k > cols) break;
if(gid *
4
+ k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step /
4
] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step /
sizeof(TYPE)
] = 0;
}
}
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
int loc_s0 = sum_offset + gid * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0];
...
...
@@ -256,37 +263,37 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[0][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid *
8
+ k >= cols) break;
if(gid *
4
+ k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq0 + k * sqsum_step /
4
] = sqsum_p[k];
sqsum[loc_sq0 + k * sqsum_step /
sizeof(TYPE)
] = sqsum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[1][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid *
8
+ 4 + k >= cols) break;
if(gid *
4
+ 4 + k >= cols) break;
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq1 + k * sqsum_step /
4
] = sqsum_p[k];
sqsum[loc_sq1 + k * sqsum_step /
sizeof(TYPE)
] = sqsum_p[k];
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
float
*sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
TYPE
*sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step
, int dst1_step
)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
float
4 sqsum_t[2];
TYPE
4 sqsum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local
float
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local
TYPE
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local float* sum_p;
__local
float
* sqsum_p;
__local
TYPE
* sqsum_p;
src_step = src_step >> 2;
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
...
...
@@ -295,17 +302,17 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
float
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
float
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sqsum[0][bf_loc] = convert_
float
4(src_t[0] * src_t[0]);
lm_sqsum[0][bf_loc] = convert_
TYPE
4(src_t[0] * src_t[0]);
lm_sum[1][bf_loc] = src_t[1];
lm_sqsum[1][bf_loc] = convert_
float
4(src_t[1] * src_t[1]);
lm_sqsum[1][bf_loc] = convert_
TYPE
4(src_t[1] * src_t[1]);
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
...
...
@@ -347,6 +354,7 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
}
barrier(CLK_LOCAL_MEM_FENCE);
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0];
...
...
@@ -354,20 +362,20 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[0][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_s
0
+
k
*
dst_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_s
q0
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_s
1
+
k
*
dst_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_s
q1
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -375,30 +383,31 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
}
kernel
void
integral_rows_D5
(
__global
float4
*srcsum,__global
float
4
*
srcsqsum,__global
float
*sum
,
__global
float
*sqsum,int
rows,int
cols,int
src_step,
int
sum_step,
kernel
void
integral_rows_D5
(
__global
float4
*srcsum,__global
TYPE
4
*
srcsqsum,__global
float
*sum
,
__global
TYPE
*sqsum,int
rows,int
cols,int
src_step,int
src1_step,
int
sum_step,
int
sqsum_step,int
sum_offset,int
sqsum_offset
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
float
4
sqsrc_t[2],sqsum_t[2]
;
TYPE
4
sqsrc_t[2],sqsum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
__local
float
*sqsum_p
;
__local
TYPE
*sqsum_p
;
src_step
=
src_step
>>
4
;
src1_step
=
(
src1_step
/
sizeof
(
TYPE
))
>>
2
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
_step
+
gid
*
2]
:
(
float
4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
1_step
+
gid
*
2]
:
(
TYPE
4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
_step
+
gid
*
2
+
1]
:
(
float
4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
1_step
+
gid
*
2
+
1]
:
(
TYPE
4
)
0
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
...
...
@@ -455,16 +464,16 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
if
(
i
+
lid
==
0
)
{
int
loc0
=
gid
*
2
*
sum_step
;
int
loc1
=
gid
*
2
*
sqsum_step
;
int
loc1
=
gid
*
2
*
CONVERT
(
sqsum_step
)
;
for
(
int
k
=
1
; k <= 8; k++)
{
if
(
gid
*
8
+
k
>
cols
)
break
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
4
]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
0
;
}
}
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
sqsum_step
+
sqsum_step
/
4
+
i
+
lid,
loc_sq1
=
loc_sq0
+
sqsum_step
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
CONVERT
(
sqsum_step
)
+
sqsum_step
/
sizeof
(
TYPE
)
+
i
+
lid,
loc_sq1
=
loc_sq0
+
CONVERT
(
sqsum_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
...
...
@@ -472,20 +481,20 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
4
]
=
sqsum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
modules/ocl/test/test_imgproc.cpp
View file @
50579d25
...
...
@@ -275,23 +275,33 @@ OCL_TEST_P(CornerHarris, Mat)
//////////////////////////////////integral/////////////////////////////////////////////////
typedef
ImgprocTestBase
Integral
;
struct
Integral
:
public
ImgprocTestBase
{
int
sdepth
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
blockSize
=
GET_PARAM
(
1
);
sdepth
=
GET_PARAM
(
2
);
useRoi
=
GET_PARAM
(
3
);
}
};
OCL_TEST_P
(
Integral
,
Mat1
)
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
random_roi
();
ocl
::
integral
(
gsrc_roi
,
gdst_roi
);
integral
(
src_roi
,
dst_roi
);
ocl
::
integral
(
gsrc_roi
,
gdst_roi
,
sdepth
);
integral
(
src_roi
,
dst_roi
,
sdepth
);
Near
();
}
}
// TODO wrong output type
OCL_TEST_P
(
Integral
,
DISABLED_Mat2
)
OCL_TEST_P
(
Integral
,
Mat2
)
{
Mat
dst1
;
ocl
::
oclMat
gdst1
;
...
...
@@ -300,10 +310,12 @@ OCL_TEST_P(Integral, DISABLED_Mat2)
{
random_roi
();
integral
(
src_roi
,
dst
1
,
dst_roi
);
ocl
::
integral
(
gsrc_roi
,
gdst
1
,
gdst_roi
);
integral
(
src_roi
,
dst
_roi
,
dst1
,
sdepth
);
ocl
::
integral
(
gsrc_roi
,
gdst
_roi
,
gdst1
,
sdepth
);
Near
();
if
(
gdst1
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
))
EXPECT_MAT_NEAR
(
dst1
,
Mat
(
gdst1
),
0.
);
}
}
...
...
@@ -543,7 +555,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Integral
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
// TODO does not work with CV_32F, CV_64F
Values
(
0
),
// not used
Values
(
0
),
// not used
Values
(
(
MatType
)
CV_32SC1
,
(
MatType
)
CV_32FC1
),
Bool
()));
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Threshold
,
Combine
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment