Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fc10ffef
Commit
fc10ffef
authored
Apr 03, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Enabled integer arithmetic for row_filter_C1_D0
parent
a3825ace
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
11 deletions
+22
-11
filter.cpp
modules/imgproc/src/filter.cpp
+6
-7
filterSepRow.cl
modules/imgproc/src/opencl/filterSepRow.cl
+16
-4
No files found.
modules/imgproc/src/filter.cpp
View file @
fc10ffef
...
@@ -3475,14 +3475,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
...
@@ -3475,14 +3475,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
int
bdepth
=
CV_32F
;
int
bdepth
=
CV_32F
;
bool
int_arithm
=
false
;
bool
int_arithm
=
false
;
if
(
sdepth
==
CV_8U
&&
if
(
sdepth
==
CV_8U
&&
ddepth
==
CV_8U
&&
((
rtype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
&&
rtype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
&&
ctype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
&&
ctype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
)
ddepth
==
CV_8U
)))
{
{
bdepth
=
CV_32S
;
bdepth
=
CV_32S
;
_kernelX
.
getMat
().
reshape
(
1
,
1
)
.
convertTo
(
kernelX
,
CV_32S
,
1
<<
shift_bits
);
kernelX
.
convertTo
(
kernelX
,
CV_32S
,
1
<<
shift_bits
);
_kernelY
.
getMat
().
reshape
(
1
,
1
)
.
convertTo
(
kernelY
,
CV_32S
,
1
<<
shift_bits
);
kernelY
.
convertTo
(
kernelY
,
CV_32S
,
1
<<
shift_bits
);
int_arithm
=
true
;
int_arithm
=
true
;
}
}
...
@@ -3500,7 +3499,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
...
@@ -3500,7 +3499,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
bool
fast8uc1
=
type
==
CV_8UC1
&&
srcOffset
.
x
%
4
==
0
&&
bool
fast8uc1
=
type
==
CV_8UC1
&&
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
&&
!
int_arithm
;
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
Size
srcSize
=
src
.
size
();
Size
srcSize
=
src
.
size
();
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
...
...
modules/imgproc/src/opencl/filterSepRow.cl
View file @
fc10ffef
...
@@ -141,6 +141,14 @@
...
@@ -141,6 +141,14 @@
#
define
DIG
(
a
)
a,
#
define
DIG
(
a
)
a,
__constant
dstT1
mat_kernel[]
=
{
COEFF
}
;
__constant
dstT1
mat_kernel[]
=
{
COEFF
}
;
#
ifndef
INTEGER_ARITHMETIC
#
define
dstT4
float4
#
define
convertDstVec
convert_float4
#
else
#
define
dstT4
int4
#
define
convertDstVec
convert_int4
#
endif
__kernel
void
row_filter_C1_D0
(
__global
const
uchar
*
src,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
__kernel
void
row_filter_C1_D0
(
__global
const
uchar
*
src,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
__global
float
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
__global
float
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
...
@@ -156,7 +164,7 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
...
@@ -156,7 +164,7 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
int
start_y
=
y
+
src_offset_y
-
radiusy
;
int
start_y
=
y
+
src_offset_y
-
radiusy
;
int
start_addr
=
mad24
(
start_y,
src_step_in_pixel,
start_x
)
;
int
start_addr
=
mad24
(
start_y,
src_step_in_pixel,
start_x
)
;
float
4
sum
;
dstT
4
sum
;
uchar4
temp[READ_TIMES_ROW]
;
uchar4
temp[READ_TIMES_ROW]
;
__local
uchar4
LDS_DAT[LSIZE1][READ_TIMES_ROW
*
LSIZE0
+
1]
;
__local
uchar4
LDS_DAT[LSIZE1][READ_TIMES_ROW
*
LSIZE0
+
1]
;
...
@@ -250,19 +258,23 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
...
@@ -250,19 +258,23 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
read
pixels
from
lds
and
calculate
the
result
//
read
pixels
from
lds
and
calculate
the
result
sum
=
convert
_float4
(
vload4
(
0
,
(
__local
uchar
*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset
))
*
mat_kernel[RADIUSX]
;
sum
=
convert
DstVec
(
vload4
(
0
,
(
__local
uchar
*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset
))
*
mat_kernel[RADIUSX]
;
for
(
int
i
=
1
; i <= RADIUSX; ++i)
for
(
int
i
=
1
; i <= RADIUSX; ++i)
{
{
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
sum
+=
mad
(
convert_float4
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convert_float4
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
ifndef
INTEGER_ARITHMETIC
sum
+=
mad
(
convertDstVec
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convertDstVec
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
else
sum
+=
mad24
(
convertDstVec
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convertDstVec
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
endif
}
}
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
)
;
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
)
;
//
write
the
result
to
dst
//
write
the
result
to
dst
if
((
x+3<dst_cols
)
&
(
y<dst_rows
))
if
((
x+3<dst_cols
)
&
(
y<dst_rows
))
*
(
__global
float
4*
)
&dst[start_addr]
=
sum
;
*
(
__global
dstT
4*
)
&dst[start_addr]
=
sum
;
else
if
((
x+2<dst_cols
)
&&
(
y<dst_rows
))
else
if
((
x+2<dst_cols
)
&&
(
y<dst_rows
))
{
{
dst[start_addr]
=
sum.x
;
dst[start_addr]
=
sum.x
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment