Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b449b0bf
Commit
b449b0bf
authored
Mar 19, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
simplified cv::sepFilter2D OpenCL part
parent
82e6edfb
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
163 additions
and
416 deletions
+163
-416
filter.cpp
modules/imgproc/src/filter.cpp
+54
-79
filterSepCol.cl
modules/imgproc/src/opencl/filterSepCol.cl
+22
-40
filterSepRow.cl
modules/imgproc/src/opencl/filterSepRow.cl
+79
-290
filterSep_singlePass.cl
modules/imgproc/src/opencl/filterSep_singlePass.cl
+5
-6
test_sepfilter2D.cpp
modules/imgproc/test/ocl/test_sepfilter2D.cpp
+3
-1
No files found.
modules/imgproc/src/filter.cpp
View file @
b449b0bf
...
@@ -41,6 +41,7 @@
...
@@ -41,6 +41,7 @@
//M*/
//M*/
#include "precomp.hpp"
#include "precomp.hpp"
#define CV_OPENCL_RUN_ASSERT
#include "opencl_kernels.hpp"
#include "opencl_kernels.hpp"
#include <sstream>
#include <sstream>
...
@@ -3317,11 +3318,9 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
...
@@ -3317,11 +3318,9 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
}
}
static
bool
ocl_sepRowFilter2D
(
UMat
&
src
,
UMat
&
buf
,
Mat
&
kernelX
,
int
anchor
,
int
borderType
,
bool
sync
)
static
bool
ocl_sepRowFilter2D
(
UMat
&
src
,
UMat
&
buf
,
Mat
&
kernelX
,
int
anchor
,
int
borderType
)
{
{
int
type
=
src
.
type
();
int
type
=
src
.
type
(),
cn
=
CV_MAT_CN
(
type
),
sdepth
=
CV_MAT_DEPTH
(
type
);
int
cn
=
CV_MAT_CN
(
type
);
int
sdepth
=
CV_MAT_DEPTH
(
type
);
Size
bufSize
=
buf
.
size
();
Size
bufSize
=
buf
.
size
();
#ifdef ANDROID
#ifdef ANDROID
...
@@ -3329,27 +3328,14 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
...
@@ -3329,27 +3328,14 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
#else
#else
size_t
localsize
[
2
]
=
{
16
,
16
};
size_t
localsize
[
2
]
=
{
16
,
16
};
#endif
#endif
size_t
globalsize
[
2
]
=
{
DIVUP
(
bufSize
.
width
,
localsize
[
0
])
*
localsize
[
0
],
DIVUP
(
bufSize
.
height
,
localsize
[
1
])
*
localsize
[
1
]};
size_t
globalsize
[
2
]
=
{
DIVUP
(
bufSize
.
width
,
localsize
[
0
])
*
localsize
[
0
],
DIVUP
(
bufSize
.
height
,
localsize
[
1
])
*
localsize
[
1
]};
if
(
CV_8U
==
sdepth
)
if
(
type
==
CV_8UC1
)
{
switch
(
cn
)
{
case
1
:
globalsize
[
0
]
=
DIVUP
((
bufSize
.
width
+
3
)
>>
2
,
localsize
[
0
])
*
localsize
[
0
];
globalsize
[
0
]
=
DIVUP
((
bufSize
.
width
+
3
)
>>
2
,
localsize
[
0
])
*
localsize
[
0
];
break
;
case
2
:
globalsize
[
0
]
=
DIVUP
((
bufSize
.
width
+
1
)
>>
1
,
localsize
[
0
])
*
localsize
[
0
];
break
;
case
4
:
globalsize
[
0
]
=
DIVUP
(
bufSize
.
width
,
localsize
[
0
])
*
localsize
[
0
];
break
;
}
}
int
radiusX
=
anchor
;
int
radiusX
=
anchor
,
radiusY
=
(
buf
.
rows
-
src
.
rows
)
>>
1
;
int
radiusY
=
(
int
)((
buf
.
rows
-
src
.
rows
)
>>
1
);
bool
is
IsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bool
is
olated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
},
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
},
*
const
btype
=
borderMap
[
borderType
&
~
BORDER_ISOLATED
];
*
const
btype
=
borderMap
[
borderType
&
~
BORDER_ISOLATED
];
...
@@ -3358,49 +3344,38 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
...
@@ -3358,49 +3344,38 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
extra_extrapolation
|=
src
.
cols
<
(
int
)((
-
radiusX
+
globalsize
[
0
]
+
8
*
localsize
[
0
]
+
3
)
>>
1
)
+
1
;
extra_extrapolation
|=
src
.
cols
<
(
int
)((
-
radiusX
+
globalsize
[
0
]
+
8
*
localsize
[
0
]
+
3
)
>>
1
)
+
1
;
extra_extrapolation
|=
src
.
cols
<
radiusX
;
extra_extrapolation
|=
src
.
cols
<
radiusX
;
cv
::
String
build_options
=
cv
::
format
(
"-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s"
,
char
cvt
[
40
];
radiusX
,
(
int
)
localsize
[
0
],
(
int
)
localsize
[
1
],
cn
,
cv
::
String
build_options
=
cv
::
format
(
"-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s"
btype
,
" -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s"
,
radiusX
,
(
int
)
localsize
[
0
],
(
int
)
localsize
[
1
],
cn
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32FC
(
cn
)),
ocl
::
convertTypeStr
(
sdepth
,
CV_32F
,
cn
,
cvt
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
CV_32F
));
build_options
+=
ocl
::
kernelToStr
(
kernelX
,
CV_32F
);
build_options
+=
ocl
::
kernelToStr
(
kernelX
,
CV_32F
);
Size
srcWholeSize
;
Point
srcOffset
;
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
std
::
stringstream
strKernel
;
String
kernelName
(
"row_filter"
);
strKernel
<<
"row_filter"
;
if
(
type
==
CV_8UC1
)
if
(
-
1
!=
cn
)
kernelName
+=
"_C1_D0"
;
strKernel
<<
"_C"
<<
cn
;
if
(
-
1
!=
sdepth
)
strKernel
<<
"_D"
<<
sdepth
;
ocl
::
Kernel
k
ernelRow
;
ocl
::
Kernel
k
(
kernelName
.
c_str
(),
cv
::
ocl
::
imgproc
::
filterSepRow_oclsrc
,
if
(
!
kernelRow
.
create
(
strKernel
.
str
().
c_str
(),
cv
::
ocl
::
imgproc
::
filterSepRow_oclsrc
,
build_options
);
build_options
))
if
(
k
.
empty
(
))
return
false
;
return
false
;
int
idxArg
=
0
;
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)(
src
.
step
/
src
.
elemSize
()),
srcOffset
.
x
,
idxArg
=
kernelRow
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
srcOffset
.
y
,
src
.
cols
,
src
.
rows
,
srcWholeSize
.
width
,
srcWholeSize
.
height
,
idxArg
=
kernelRow
.
set
(
idxArg
,
(
int
)(
src
.
step
/
src
.
elemSize
()));
ocl
::
KernelArg
::
PtrWriteOnly
(
buf
),
(
int
)(
buf
.
step
/
buf
.
elemSize
()),
buf
.
cols
,
buf
.
rows
,
radiusY
);
idxArg
=
kernelRow
.
set
(
idxArg
,
srcOffset
.
x
);
idxArg
=
kernelRow
.
set
(
idxArg
,
srcOffset
.
y
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
idxArg
=
kernelRow
.
set
(
idxArg
,
src
.
cols
);
idxArg
=
kernelRow
.
set
(
idxArg
,
src
.
rows
);
idxArg
=
kernelRow
.
set
(
idxArg
,
srcWholeSize
.
width
);
idxArg
=
kernelRow
.
set
(
idxArg
,
srcWholeSize
.
height
);
idxArg
=
kernelRow
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrWriteOnly
(
buf
));
idxArg
=
kernelRow
.
set
(
idxArg
,
(
int
)(
buf
.
step
/
buf
.
elemSize
()));
idxArg
=
kernelRow
.
set
(
idxArg
,
buf
.
cols
);
idxArg
=
kernelRow
.
set
(
idxArg
,
buf
.
rows
);
idxArg
=
kernelRow
.
set
(
idxArg
,
radiusY
);
return
kernelRow
.
run
(
2
,
globalsize
,
localsize
,
sync
);
}
}
static
bool
ocl_sepColFilter2D
(
const
UMat
&
buf
,
UMat
&
dst
,
Mat
&
kernelY
,
int
anchor
,
bool
sync
)
static
bool
ocl_sepColFilter2D
(
const
UMat
&
buf
,
UMat
&
dst
,
Mat
&
kernelY
,
int
anchor
)
{
{
#ifdef ANDROID
#ifdef ANDROID
size_t
localsize
[
2
]
=
{
16
,
10
};
size_t
localsize
[
2
]
=
{
16
,
10
};
...
@@ -3420,28 +3395,23 @@ static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anc
...
@@ -3420,28 +3395,23 @@ static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anc
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
localsize
[
0
])
*
localsize
[
0
];
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
localsize
[
0
])
*
localsize
[
0
];
char
cvt
[
40
];
char
cvt
[
40
];
cv
::
String
build_options
=
cv
::
format
(
"-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s"
,
cv
::
String
build_options
=
cv
::
format
(
"-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
anchor
,
(
int
)
localsize
[
0
],
(
int
)
localsize
[
1
],
cn
,
ocl
::
typeToStr
(
buf
.
type
()),
" -D srcT=%s -D dstT=%s -D convertToDstT=%s"
,
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
CV_32F
,
ddepth
,
cn
,
cvt
));
anchor
,
(
int
)
localsize
[
0
],
(
int
)
localsize
[
1
],
cn
,
ocl
::
typeToStr
(
buf
.
type
()),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
CV_32F
,
ddepth
,
cn
,
cvt
));
build_options
+=
ocl
::
kernelToStr
(
kernelY
,
CV_32F
);
build_options
+=
ocl
::
kernelToStr
(
kernelY
,
CV_32F
);
ocl
::
Kernel
kernelCol
;
ocl
::
Kernel
k
(
"col_filter"
,
cv
::
ocl
::
imgproc
::
filterSepCol_oclsrc
,
if
(
!
kernelCol
.
create
(
"col_filter"
,
cv
::
ocl
::
imgproc
::
filterSepCol_oclsrc
,
build_options
))
build_options
);
if
(
k
.
empty
())
return
false
;
return
false
;
int
idxArg
=
0
;
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
buf
),
(
int
)(
buf
.
step
/
buf
.
elemSize
()),
buf
.
cols
,
idxArg
=
kernelCol
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
buf
));
buf
.
rows
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst
),
(
int
)(
dst
.
offset
/
dst
.
elemSize
()),
idxArg
=
kernelCol
.
set
(
idxArg
,
(
int
)(
buf
.
step
/
buf
.
elemSize
()));
(
int
)(
dst
.
step
/
dst
.
elemSize
()),
dst
.
cols
,
dst
.
rows
);
idxArg
=
kernelCol
.
set
(
idxArg
,
buf
.
cols
);
idxArg
=
kernelCol
.
set
(
idxArg
,
buf
.
rows
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
idxArg
=
kernelCol
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst
));
idxArg
=
kernelCol
.
set
(
idxArg
,
(
int
)(
dst
.
offset
/
dst
.
elemSize
()));
idxArg
=
kernelCol
.
set
(
idxArg
,
(
int
)(
dst
.
step
/
dst
.
elemSize
()));
idxArg
=
kernelCol
.
set
(
idxArg
,
dst
.
cols
);
idxArg
=
kernelCol
.
set
(
idxArg
,
dst
.
rows
);
return
kernelCol
.
run
(
2
,
globalsize
,
localsize
,
sync
);
}
}
const
int
optimizedSepFilterLocalSize
=
16
;
const
int
optimizedSepFilterLocalSize
=
16
;
...
@@ -3473,12 +3443,14 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
...
@@ -3473,12 +3443,14 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
String
opts
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
String
opts
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
" -D %s"
,
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
_row_kernel
.
size
().
height
/
2
,
_col_kernel
.
size
().
height
/
2
,
" -D %s -D srcT1=%s -D dstT1=%s -D cn=%d"
,
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
_row_kernel
.
size
().
height
/
2
,
_col_kernel
.
size
().
height
/
2
,
ocl
::
kernelToStr
(
_row_kernel
,
CV_32F
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
_row_kernel
,
CV_32F
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
_col_kernel
,
CV_32F
,
"KERNEL_MATRIX_Y"
).
c_str
(),
ocl
::
kernelToStr
(
_col_kernel
,
CV_32F
,
"KERNEL_MATRIX_Y"
).
c_str
(),
ocl
::
typeToStr
(
stype
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
stype
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
borderMap
[
borderType
]);
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
borderMap
[
borderType
],
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
cn
);
ocl
::
Kernel
k
(
"sep_filter"
,
ocl
::
imgproc
::
filterSep_singlePass_oclsrc
,
opts
);
ocl
::
Kernel
k
(
"sep_filter"
,
ocl
::
imgproc
::
filterSep_singlePass_oclsrc
,
opts
);
if
(
k
.
empty
())
if
(
k
.
empty
())
...
@@ -3529,10 +3501,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
...
@@ -3529,10 +3501,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
if
(
ddepth
<
0
)
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
ddepth
=
sdepth
;
CV_OCL_RUN_
(
kernelY
.
rows
<=
21
&&
kernelX
.
rows
<=
21
&&
// printf("%d %d\n", imgSize.width, optimizedSepFilterLocalSize + (kernelX.rows >> 1));
imgSize
.
width
>
optimizedSepFilterLocalSize
+
(
kernelX
.
rows
>>
1
)
&&
// printf("%d %d\n", imgSize.height, optimizedSepFilterLocalSize + (kernelY.rows >> 1));
imgSize
.
height
>
optimizedSepFilterLocalSize
+
(
kernelY
.
rows
>>
1
),
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
_kernelX
,
_kernelY
,
borderType
,
ddepth
),
true
)
// CV_OCL_RUN_(kernelY.rows <= 21 && kernelX.rows <= 21 &&
// imgSize.width > optimizedSepFilterLocalSize + (kernelX.rows >> 1) &&
// imgSize.height > optimizedSepFilterLocalSize + (kernelY.rows >> 1),
// ocl_sepFilter2D_SinglePass(_src, _dst, _kernelX, _kernelY, borderType, ddepth), true)
UMat
src
=
_src
.
getUMat
();
UMat
src
=
_src
.
getUMat
();
Size
srcWholeSize
;
Point
srcOffset
;
Size
srcWholeSize
;
Point
srcOffset
;
...
@@ -3546,12 +3521,12 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
...
@@ -3546,12 +3521,12 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
Size
srcSize
=
src
.
size
();
Size
srcSize
=
src
.
size
();
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
UMat
buf
;
buf
.
create
(
bufSize
,
CV_MAKETYPE
(
CV_32F
,
cn
));
UMat
buf
;
buf
.
create
(
bufSize
,
CV_MAKETYPE
(
CV_32F
,
cn
));
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
false
))
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
))
return
false
;
return
false
;
_dst
.
create
(
srcSize
,
CV_MAKETYPE
(
ddepth
,
cn
));
_dst
.
create
(
srcSize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
anchor
.
y
,
false
);
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
anchor
.
y
);
}
}
#endif
#endif
...
...
modules/imgproc/src/opencl/filterSepCol.cl
View file @
b449b0bf
...
@@ -36,16 +36,6 @@
...
@@ -36,16 +36,6 @@
#
define
READ_TIMES_COL
((
2*
(
RADIUSY+LSIZE1
)
-1
)
/LSIZE1
)
#
define
READ_TIMES_COL
((
2*
(
RADIUSY+LSIZE1
)
-1
)
/LSIZE1
)
#
define
RADIUS
1
#
define
RADIUS
1
#
if
CN
==1
#
define
ALIGN
(((
RADIUS
)
+3
)
>>2<<2
)
#
elif
CN==2
#
define
ALIGN
(((
RADIUS
)
+1
)
>>1<<1
)
#
elif
CN==3
#
define
ALIGN
(((
RADIUS
)
+3
)
>>2<<2
)
#
elif
CN==4
#
define
ALIGN
(
RADIUS
)
#
define
READ_TIMES_ROW
((
2*
(
RADIUS+LSIZE0
)
-1
)
/LSIZE0
)
#
endif
#
define
noconvert
#
define
noconvert
...
@@ -65,16 +55,8 @@ The info above maybe obsolete.
...
@@ -65,16 +55,8 @@ The info above maybe obsolete.
#
define
DIG
(
a
)
a,
#
define
DIG
(
a
)
a,
__constant
float
mat_kernel[]
=
{
COEFF
}
;
__constant
float
mat_kernel[]
=
{
COEFF
}
;
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
col_filter
__kernel
void
col_filter
(
__global
const
srcT
*
src,
int
src_step_in_pixel,
int
src_whole_cols,
int
src_whole_rows,
(
__global
const
GENTYPE_SRC
*
restrict
src,
__global
dstT
*
dst,
int
dst_offset_in_pixel,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows
)
const
int
src_step_in_pixel,
const
int
src_whole_cols,
const
int
src_whole_rows,
__global
GENTYPE_DST
*
dst,
const
int
dst_offset_in_pixel,
const
int
dst_step_in_pixel,
const
int
dst_cols,
const
int
dst_rows
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
...
@@ -85,35 +67,35 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
...
@@ -85,35 +67,35 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
int
start_addr
=
mad24
(
y,
src_step_in_pixel,
x
)
;
int
start_addr
=
mad24
(
y,
src_step_in_pixel,
x
)
;
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
int
i
;
srcT
sum,
temp[READ_TIMES_COL]
;
GENTYPE_SRC
sum,
temp[READ_TIMES_COL]
;
__local
srcT
LDS_DAT[LSIZE1
*
READ_TIMES_COL][LSIZE0
+
1]
;
__local
GENTYPE_SRC
LDS_DAT[LSIZE1
*
READ_TIMES_COL][LSIZE0
+
1]
;
//read
pixels
from
src
//
read
pixels
from
src
for
(
i
=
0
;i<READ_TIMES_COL;i++
)
for
(
int
i
=
0
; i < READ_TIMES_COL; ++i
)
{
{
int
current_addr
=
start_addr+i*LSIZE1*src_step_in_pixel
;
int
current_addr
=
mad24
(
i,
LSIZE1
*
src_step_in_pixel,
start_addr
)
;
current_addr
=
current_addr
<
end_addr
?
current_addr
:
0
;
current_addr
=
current_addr
<
end_addr
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
temp[i]
=
src[current_addr]
;
}
}
//save
pixels
to
lds
for
(
i
=
0
;i<READ_TIMES_COL;i++)
//
save
pixels
to
lds
{
for
(
int
i
=
0
; i < READ_TIMES_COL; ++i)
LDS_DAT[l_y+i*LSIZE1][l_x]
=
temp[i]
;
LDS_DAT[mad24
(
i,
LSIZE1,
l_y
)
][l_x]
=
temp[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//read
pixels
from
lds
and
calculate
the
result
sum
=
LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY]
;
//
read
pixels
from
lds
and
calculate
the
result
for
(
i=1
;i<=RADIUSY;i++)
sum
=
LDS_DAT[l_y
+
RADIUSY][l_x]
*
mat_kernel[RADIUSY]
;
for
(
int
i
=
1
; i <= RADIUSY; ++i)
{
{
temp[0]
=LDS_DAT[l_y+RADIUSY-
i][l_x]
;
temp[0]
=
LDS_DAT[l_y
+
RADIUSY
-
i][l_x]
;
temp[1]
=LDS_DAT[l_y+RADIUSY+
i][l_x]
;
temp[1]
=
LDS_DAT[l_y
+
RADIUSY
+
i][l_x]
;
sum
+=
temp[0]
*
mat_kernel[RADIUSY-i]+temp[1]
*
mat_kernel[RADIUSY+i]
;
sum
+=
mad
(
temp[0],
mat_kernel[RADIUSY
-
i],
temp[1]
*
mat_kernel[RADIUSY
+
i]
)
;
}
}
//write
the
result
to
dst
if
((
x<dst_cols
)
&
(
y<dst_rows
))
//
write
the
result
to
dst
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
{
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
+
dst_offset_in_pixel
)
;
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
+
dst_offset_in_pixel
)
;
dst[start_addr]
=
convert
_to_DS
T
(
sum
)
;
dst[start_addr]
=
convert
ToDst
T
(
sum
)
;
}
}
}
}
modules/imgproc/src/opencl/filterSepRow.cl
View file @
b449b0bf
...
@@ -35,40 +35,28 @@
...
@@ -35,40 +35,28 @@
//
//
#
define
READ_TIMES_ROW
((
2*
(
RADIUSX+LSIZE0
)
-1
)
/LSIZE0
)
//for
c4
only
#
define
READ_TIMES_ROW
((
2*
(
RADIUSX+LSIZE0
)
-1
)
/LSIZE0
)
//for
c4
only
#
define
READ_TIMES_COL
((
2*
(
RADIUSY+LSIZE1
)
-1
)
/LSIZE1
)
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
RADIUS
1
#
define
RADIUS
1
#
if
CN
==1
#
define
ALIGN
(((
RADIUS
)
+3
)
>>2<<2
)
#
elif
CN==2
#
define
ALIGN
(((
RADIUS
)
+1
)
>>1<<1
)
#
elif
CN==3
#
define
ALIGN
(((
RADIUS
)
+3
)
>>2<<2
)
#
elif
CN==4
#
define
ALIGN
(
RADIUS
)
#
endif
#
ifdef
BORDER_REPLICATE
#
ifdef
BORDER_REPLICATE
//
BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
//
BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
endif
#
endif
#
ifdef
BORDER_REFLECT
#
ifdef
BORDER_REFLECT
//
BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
//
BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
endif
#
endif
#
ifdef
BORDER_REFLECT_101
#
ifdef
BORDER_REFLECT_101
//
BORDER_REFLECT_101:
gfedcb|abcdefgh|gfedcba
//
BORDER_REFLECT_101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
endif
#
endif
//blur
function
does
not
support
BORDER_WRAP
#
ifdef
BORDER_WRAP
#
ifdef
BORDER_WRAP
//
BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
//
BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
endif
#
endif
...
@@ -127,35 +115,26 @@
...
@@ -127,35 +115,26 @@
#
endif
//BORDER_CONSTANT
#
endif
//BORDER_CONSTANT
#
endif
//EXTRA_EXTRAPOLATION
#
endif
//EXTRA_EXTRAPOLATION
/**********************************************************************************
#
define
noconvert
These
kernels
are
written
for
separable
filters
such
as
Sobel,
Scharr,
GaussianBlur.
Now
(
6/29/2011
)
the
kernels
only
support
8U
data
type
and
the
anchor
of
the
convovle
#
if
cn
!=
3
kernel
must
be
in
the
center.
ROI
is
not
supported
either.
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
For
channels
=1,2,4,
each
kernels
read
4
elements
(
not
4
pixels
)
,
and
for
channels
=3,
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
the
kernel
read
4
pixels,
save
them
to
LDS
and
read
the
data
needed
from
LDS
to
#
define
SRCSIZE
((
int
)
sizeof
(
srcT
))
calculate
the
result.
#
define
DSTSIZE
((
int
)
sizeof
(
dstT
))
The
length
of
the
convovle
kernel
supported
is
related
to
the
LSIZE0
and
the
MAX
size
#
else
of
LDS,
which
is
HW
related.
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
For
channels
=
1
,
3
the
RADIUS
is
no
more
than
LSIZE0*2
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
For
channels
=
2
,
the
RADIUS
is
no
more
than
LSIZE0
#
define
SRCSIZE
((
int
)
sizeof
(
srcT1
)
*3
)
For
channels
=
4
,
arbitary
RADIUS
is
supported
unless
the
LDS
is
not
enough
#
define
DSTSIZE
((
int
)
sizeof
(
dstT1
)
*3
)
Niko
#
endif
6/29/2011
The
info
above
maybe
obsolete.
***********************************************************************************
/
#
define
DIG
(
a
)
a,
#
define
DIG
(
a
)
a,
__constant
float
mat_kernel[]
=
{
COEFF
}
;
__constant
float
mat_kernel[]
=
{
COEFF
}
;
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C1_D0
__kernel
void
row_filter_C1_D0
(
__global
const
uchar
*
src,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
(
__global
uchar
*
restrict
src,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
int
src_step_in_pixel,
__global
float
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
int
src_offset_x,
int
src_offset_y,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
__global
float
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
int
radiusy
)
int
radiusy
)
{
{
int
x
=
get_global_id
(
0
)
<<2
;
int
x
=
get_global_id
(
0
)
<<2
;
...
@@ -163,29 +142,29 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -163,29 +142,29 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
int
l_x
=
get_local_id
(
0
)
;
int
l_x
=
get_local_id
(
0
)
;
int
l_y
=
get_local_id
(
1
)
;
int
l_y
=
get_local_id
(
1
)
;
int
start_x
=
x
+
src_offset_x
-
RADIUSX
&
0xfffffffc
;
int
start_x
=
x
+
src_offset_x
-
RADIUSX
&
0xfffffffc
;
int
offset
=
src_offset_x
-
RADIUSX
&
3
;
int
offset
=
src_offset_x
-
RADIUSX
&
3
;
int
start_y
=
y
+
src_offset_y
-
radiusy
;
int
start_y
=
y
+
src_offset_y
-
radiusy
;
int
start_addr
=
mad24
(
start_y,
src_step_in_pixel,
start_x
)
;
int
start_addr
=
mad24
(
start_y,
src_step_in_pixel,
start_x
)
;
int
i
;
float4
sum
;
float4
sum
;
uchar4
temp[READ_TIMES_ROW]
;
uchar4
temp[READ_TIMES_ROW]
;
__local
uchar4
LDS_DAT[LSIZE1][READ_TIMES_ROW
*LSIZE0+
1]
;
__local
uchar4
LDS_DAT[LSIZE1][READ_TIMES_ROW
*
LSIZE0
+
1]
;
#
ifdef
BORDER_CONSTANT
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
//
read
pixels
from
src
//
read
pixels
from
src
for
(
i
=
0
; i < READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
int
current_addr
=
start_addr+i*LSIZE0*4
;
int
current_addr
=
mad24
(
i,
LSIZE0
<<
2
,
start_addr
)
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
current_addr
=
current_addr
<
end_addr
&&
current_addr
>
0
?
current_addr
:
0
;
temp[i]
=
*
(
__global
uchar4
*
)
&src[current_addr]
;
temp[i]
=
*
(
__global
const
uchar4
*
)
&src[current_addr]
;
}
}
//
judge
if
read
out
of
boundary
//
judge
if
read
out
of
boundary
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
temp[i].x
=
ELEM
(
start_x+i*LSIZE0*4,
src_offset_x,
src_offset_x
+
src_cols,
0
,
temp[i].x
)
;
temp[i].x
=
ELEM
(
start_x+i*LSIZE0*4,
src_offset_x,
src_offset_x
+
src_cols,
0
,
temp[i].x
)
;
temp[i].y
=
ELEM
(
start_x+i*LSIZE0*4+1,
src_offset_x,
src_offset_x
+
src_cols,
0
,
temp[i].y
)
;
temp[i].y
=
ELEM
(
start_x+i*LSIZE0*4+1,
src_offset_x,
src_offset_x
+
src_cols,
0
,
temp[i].y
)
;
...
@@ -194,7 +173,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -194,7 +173,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
temp[i]
=
ELEM
(
start_y,
src_offset_y,
src_offset_y
+
src_rows,
(
uchar4
)
0
,
temp[i]
)
;
temp[i]
=
ELEM
(
start_y,
src_offset_y,
src_offset_y
+
src_rows,
(
uchar4
)
0
,
temp[i]
)
;
}
}
#
else
#
else
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
temp[i].x
=
ELEM
(
start_x+i*LSIZE0*4,
0
,
src_whole_cols,
0
,
temp[i].x
)
;
temp[i].x
=
ELEM
(
start_x+i*LSIZE0*4,
0
,
src_whole_cols,
0
,
temp[i].x
)
;
temp[i].y
=
ELEM
(
start_x+i*LSIZE0*4+1,
0
,
src_whole_cols,
0
,
temp[i].y
)
;
temp[i].y
=
ELEM
(
start_x+i*LSIZE0*4+1,
0
,
src_whole_cols,
0
,
temp[i].y
)
;
...
@@ -209,16 +188,15 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -209,16 +188,15 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
#else
#else
int not_all_in_range = (start_x<0) |
(
start_x
+
READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols
)
| (start_y<0) |
(
start_y
>=
src_whole_rows
)
;
int not_all_in_range = (start_x<0) |
(
start_x
+
READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols
)
| (start_y<0) |
(
start_y
>=
src_whole_rows
)
;
#
endif
#
endif
int4
index[READ_TIMES_ROW]
;
int4
index[READ_TIMES_ROW],
addr
;
int4
addr
;
int
s_y
;
int
s_y
;
if
(
not_all_in_range
)
if
(
not_all_in_range
)
{
{
//
judge
if
read
out
of
boundary
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i < READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
index[i]
=
(
int4
)(
start_x+i*LSIZE0*4
)
+
(
int4
)(
0
,
1
,
2
,
3
)
;
index[i]
=
(
int4
)(
mad24
(
i,
LSIZE0
<<
2
,
start_x
)
)
+
(
int4
)(
0
,
1
,
2
,
3
)
;
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
EXTRAPOLATE
(
index[i].x,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
index[i].x,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
index[i].y,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
index[i].y,
src_offset_x,
src_offset_x
+
src_cols
)
;
...
@@ -231,6 +209,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -231,6 +209,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
EXTRAPOLATE
(
index[i].w,
0
,
src_whole_cols
)
;
EXTRAPOLATE
(
index[i].w,
0
,
src_whole_cols
)
;
#
endif
#
endif
}
}
s_y
=
start_y
;
s_y
=
start_y
;
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
EXTRAPOLATE
(
s_y,
src_offset_y,
src_offset_y
+
src_rows
)
;
EXTRAPOLATE
(
s_y,
src_offset_y,
src_offset_y
+
src_rows
)
;
...
@@ -239,9 +218,9 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -239,9 +218,9 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
#
endif
#
endif
//
read
pixels
from
src
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
addr
=
mad24
((
int4
)
s_y,
(
int4
)
src_step_in_pixel,
index[i]
)
;
addr
=
mad24
((
int4
)
s_y,
(
int4
)
src_step_in_pixel,
index[i]
)
;
temp[i].x
=
src[addr.x]
;
temp[i].x
=
src[addr.x]
;
temp[i].y
=
src[addr.y]
;
temp[i].y
=
src[addr.y]
;
temp[i].z
=
src[addr.z]
;
temp[i].z
=
src[addr.z]
;
...
@@ -251,26 +230,26 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -251,26 +230,26 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
else
else
{
{
//
read
pixels
from
src
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
temp[i]
=
*
(
__global
uchar4*
)
&src[
start_addr+i*LSIZE0*4
]
;
temp[i]
=
*
(
__global
uchar4*
)
&src[
mad24
(
i,
LSIZE0
<<
2
,
start_addr
)
]
;
}
}
#
endif
//BORDER_CONSTANT
#
endif
//BORDER_CONSTANT
//
save
pixels
to
lds
//
save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
LDS_DAT[l_y][
l_x+i*LSIZE0]=
temp[i]
;
LDS_DAT[l_y][
mad24
(
i,
LSIZE0,
l_x
)
]
=
temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
read
pixels
from
lds
and
calculate
the
result
//
read
pixels
from
lds
and
calculate
the
result
sum
=
convert_float4
(
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset
))
*
mat_kernel[RADIUSX]
;
sum
=
convert_float4
(
vload4
(
0
,
(
__local
uchar
*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset
))
*
mat_kernel[RADIUSX]
;
for
(
i
=1
; i<=RADIUSX; i++
)
for
(
i
nt
i
=
1
; i <= RADIUSX; ++i
)
{
{
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
sum
+=
convert_float4
(
temp[0]
)
*
mat_kernel[RADIUSX-i]
+
convert_float4
(
temp[1]
)
*
mat_kernel[RADIUSX+i]
;
sum
+=
mad
(
convert_float4
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convert_float4
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
}
}
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
)
;
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
)
;
//
write
the
result
to
dst
//
write
the
result
to
dst
if
((
x+3<dst_cols
)
&
(
y<dst_rows
))
if
((
x+3<dst_cols
)
&
(
y<dst_rows
))
...
@@ -290,154 +269,58 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -290,154 +269,58 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
dst[start_addr]
=
sum.x
;
dst[start_addr]
=
sum.x
;
}
}
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C4_D0
__kernel
void
row_filter
(
__global
const
srcT
*
src,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
(
__global
uchar4
*
restrict
src,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
int
src_step_in_pixel,
__global
dstT
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
int
src_offset_x,
int
src_offset_y,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
__global
float4
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
int
radiusy
)
int
radiusy
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
int
l_x
=
get_local_id
(
0
)
;
int
l_x
=
get_local_id
(
0
)
;
int
l_y
=
get_local_id
(
1
)
;
int
l_y
=
get_local_id
(
1
)
;
int
start_x
=
x
+src_offset_x-
RADIUSX
;
int
start_x
=
x
+
src_offset_x
-
RADIUSX
;
int
start_y
=
y
+src_offset_y-
radiusy
;
int
start_y
=
y
+
src_offset_y
-
radiusy
;
int
start_addr
=
mad24
(
start_y,
src_step_in_pixel,
start_x
)
;
int
start_addr
=
mad24
(
start_y,
src_step_in_pixel,
start_x
)
;
int
i
;
float4
sum
;
dstT
sum
;
uchar4
temp[READ_TIMES_ROW]
;
srcT
temp[READ_TIMES_ROW]
;
__local
uchar4
LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+
1]
;
__local
srcT
LDS_DAT[LSIZE1][READ_TIMES_ROW
*
LSIZE0
+
1]
;
#
ifdef
BORDER_CONSTANT
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
//
read
pixels
from
src
//
read
pixels
from
src
for
(
i
=
0
; i<
READ_TIMES_ROW; i++)
for
(
i
nt
i
=
0
; i <
READ_TIMES_ROW; i++)
{
{
int
current_addr
=
start_addr+i*LSIZE0
;
int
current_addr
=
mad24
(
i,
LSIZE0,
start_addr
)
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
current_addr
=
current_addr
<
end_addr
&&
current_addr
>
0
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
temp[i]
=
src[current_addr]
;
}
}
//judge
if
read
out
of
boundary
//
judge
if
read
out
of
boundary
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
temp[i]
=
ELEM
(
start_x+i*LSIZE0,
src_offset_x,
src_offset_x
+
src_cols,
(
uchar4
)
0
,
temp[i]
)
;
temp[i]
=
ELEM
(
mad24
(
i,
LSIZE0,
start_x
)
,
src_offset_x,
src_offset_x
+
src_cols,
(
srcT
)(
0
)
,
temp[i]
)
;
temp[i]
=
ELEM
(
start_y,
src_offset_y,
src_offset_y
+
src_rows,
(
uchar4
)
0
,
temp[i]
)
;
temp[i]
=
ELEM
(
start_y,
src_offset_y,
src_offset_y
+
src_rows,
(
srcT
)(
0
)
,
temp[i]
)
;
}
}
#
else
#
else
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
temp[i]
=
ELEM
(
start_x+i*LSIZE0,
0
,
src_whole_cols,
(
uchar4
)
0
,
temp[i]
)
;
temp[i]
=
ELEM
(
mad24
(
i,
LSIZE0,
start_x
)
,
0
,
src_whole_cols,
(
srcT
)(
0
)
,
temp[i]
)
;
temp[i]
=
ELEM
(
start_y,
0
,
src_whole_rows,
(
uchar4
)
0
,
temp[i]
)
;
temp[i]
=
ELEM
(
start_y,
0
,
src_whole_rows,
(
srcT
)(
0
)
,
temp[i]
)
;
}
}
#
endif
#
endif
#
else
#
else
int
index[READ_TIMES_ROW]
;
int
index[READ_TIMES_ROW]
;
int
s_x,s_y
;
int
s_x,
s_y
;
//
judge
if
read
out
of
boundary
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
{
{
s_x
=
start_x+i*LSIZE0
;
s_x
=
mad24
(
i,
LSIZE0,
start_x
)
;
s_y
=
start_y
;
s_y
=
start_y
;
#
ifdef
BORDER_ISOLATED
EXTRAPOLATE
(
s_x,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
s_y,
src_offset_y,
src_offset_y
+
src_rows
)
;
#
else
EXTRAPOLATE
(
s_x,
0
,
src_whole_cols
)
;
EXTRAPOLATE
(
s_y,
0
,
src_whole_rows
)
;
#
endif
index[i]=mad24
(
s_y,
src_step_in_pixel,
s_x
)
;
}
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
temp[i]
=
src[index[i]]
;
#
endif
//BORDER_CONSTANT
//save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//read
pixels
from
lds
and
calculate
the
result
sum
=convert_float4
(
LDS_DAT[l_y][l_x+RADIUSX]
)
*mat_kernel[RADIUSX]
;
for
(
i=1
; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i]
;
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i]
;
sum
+=
convert_float4
(
temp[0]
)
*mat_kernel[RADIUSX-i]+convert_float4
(
temp[1]
)
*mat_kernel[RADIUSX+i]
;
}
//write
the
result
to
dst
if
(
x<dst_cols
&&
y<dst_rows
)
{
start_addr
=
mad24
(
y,dst_step_in_pixel,x
)
;
dst[start_addr]
=
sum
;
}
}
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C1_D5
(
__global
float
*
restrict
src,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
__global
float
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
int
radiusy
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
l_x
=
get_local_id
(
0
)
;
int
l_y
=
get_local_id
(
1
)
;
int
start_x
=
x+src_offset_x-RADIUSX
;
int
start_y
=
y+src_offset_y-radiusy
;
int
start_addr
=
mad24
(
start_y,src_step_in_pixel,start_x
)
;
int
i
;
float
sum
;
float
temp[READ_TIMES_ROW]
;
__local
float
LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1]
;
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,src_whole_cols
)
;
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
int
current_addr
=
start_addr+i*LSIZE0
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
}
//
judge
if
read
out
of
boundary
#
ifdef
BORDER_ISOLATED
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,
src_offset_x,
src_offset_x
+
src_cols,
(
float
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,
src_offset_y,
src_offset_y
+
src_rows,
(
float
)
0
,
temp[i]
)
;
}
#
else
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,
0
,
src_whole_cols,
(
float
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,
0
,
src_whole_rows,
(
float
)
0
,
temp[i]
)
;
}
#
endif
#
else
//
BORDER_CONSTANT
int
index[READ_TIMES_ROW]
;
int
s_x,s_y
;
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
s_x
=
start_x
+
i*LSIZE0,
s_y
=
start_y
;
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
EXTRAPOLATE
(
s_x,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
s_x,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
s_y,
src_offset_y,
src_offset_y
+
src_rows
)
;
EXTRAPOLATE
(
s_y,
src_offset_y,
src_offset_y
+
src_rows
)
;
...
@@ -445,125 +328,31 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
...
@@ -445,125 +328,31 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
EXTRAPOLATE
(
s_x,
0
,
src_whole_cols
)
;
EXTRAPOLATE
(
s_x,
0
,
src_whole_cols
)
;
EXTRAPOLATE
(
s_y,
0
,
src_whole_rows
)
;
EXTRAPOLATE
(
s_y,
0
,
src_whole_rows
)
;
#
endif
#
endif
index[i]
=
mad24
(
s_y,
src_step_in_pixel,
s_x
)
;
index[i]=mad24
(
s_y,
src_step_in_pixel,
s_x
)
;
}
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
temp[i]
=
src[index[i]]
;
#
endif//
BORDER_CONSTANT
//save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
read
pixels
from
lds
and
calculate
the
result
sum
=LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX]
;
for
(
i=1
; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i]
;
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i]
;
sum
+=
temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i]
;
}
//
write
the
result
to
dst
if
(
x<dst_cols
&&
y<dst_rows
)
{
start_addr
=
mad24
(
y,dst_step_in_pixel,x
)
;
dst[start_addr]
=
sum
;
}
}
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C4_D5
(
__global
float4
*
restrict
src,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
int
src_cols,
int
src_rows,
int
src_whole_cols,
int
src_whole_rows,
__global
float4
*
dst,
int
dst_step_in_pixel,
int
dst_cols,
int
dst_rows,
int
radiusy
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
l_x
=
get_local_id
(
0
)
;
int
l_y
=
get_local_id
(
1
)
;
int
start_x
=
x+src_offset_x-RADIUSX
;
int
start_y
=
y+src_offset_y-radiusy
;
int
start_addr
=
mad24
(
start_y,src_step_in_pixel,start_x
)
;
int
i
;
float4
sum
;
float4
temp[READ_TIMES_ROW]
;
__local
float4
LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1]
;
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,src_whole_cols
)
;
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
int
current_addr
=
start_addr+i*LSIZE0
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
}
//
judge
if
read
out
of
boundary
#
ifdef
BORDER_ISOLATED
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,
src_offset_x,
src_offset_x
+
src_cols,
(
float4
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,
src_offset_y,
src_offset_y
+
src_rows,
(
float4
)
0
,
temp[i]
)
;
}
}
#
else
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,
0
,
src_whole_cols,
(
float4
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,
0
,
src_whole_rows,
(
float4
)
0
,
temp[i]
)
;
}
#
endif
#
else
int
index[READ_TIMES_ROW]
;
int
s_x,s_y
;
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
s_x
=
start_x
+
i*LSIZE0,
s_y
=
start_y
;
#
ifdef
BORDER_ISOLATED
EXTRAPOLATE
(
s_x,
src_offset_x,
src_offset_x
+
src_cols
)
;
EXTRAPOLATE
(
s_y,
src_offset_y,
src_offset_y
+
src_rows
)
;
#
else
EXTRAPOLATE
(
s_x,
0
,
src_whole_cols
)
;
EXTRAPOLATE
(
s_y,
0
,
src_whole_rows
)
;
#
endif
index[i]=mad24
(
s_y,src_step_in_pixel,s_x
)
;
}
//
read
pixels
from
src
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
temp[i]
=
src[index[i]]
;
temp[i]
=
src[index[i]]
;
#
endif
#
endif
//
BORDER_CONSTANT
//
save
pixels
to
lds
//
save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++
)
for
(
i
nt
i
=
0
; i < READ_TIMES_ROW; ++i
)
LDS_DAT[l_y][
l_x+i*LSIZE0]=
temp[i]
;
LDS_DAT[l_y][
mad24
(
i,
LSIZE0,
l_x
)
]
=
temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
read
pixels
from
lds
and
calculate
the
result
//
read
pixels
from
lds
and
calculate
the
result
sum
=
LDS_DAT[l_y][l_x+RADIUSX]*
mat_kernel[RADIUSX]
;
sum
=
convertToDstT
(
LDS_DAT[l_y][l_x
+
RADIUSX]
)
*
mat_kernel[RADIUSX]
;
for
(
i
=1
; i<=RADIUSX; i++
)
for
(
i
nt
i
=
1
; i <= RADIUSX; ++i
)
{
{
temp[0]
=LDS_DAT[l_y][l_x+RADIUSX-
i]
;
temp[0]
=
LDS_DAT[l_y][l_x
+
RADIUSX
-
i]
;
temp[1]
=LDS_DAT[l_y][l_x+RADIUSX+
i]
;
temp[1]
=
LDS_DAT[l_y][l_x
+
RADIUSX
+
i]
;
sum
+=
temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i]
;
sum
+=
mad
(
convertToDstT
(
temp[0]
)
,
mat_kernel[RADIUSX
-
i],
convertToDstT
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
}
}
//
write
the
result
to
dst
//
write
the
result
to
dst
if
(
x
<dst_cols
&&
y<
dst_rows
)
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
{
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
)
;
start_addr
=
mad24
(
y,
dst_step_in_pixel,
x
)
;
dst[start_addr]
=
sum
;
dst[start_addr]
=
sum
;
}
}
}
}
modules/imgproc/src/opencl/filterSep_singlePass.cl
View file @
b449b0bf
...
@@ -75,6 +75,7 @@
...
@@ -75,6 +75,7 @@
#endif
#endif
#define SRC(_x,_y) convertToWT(((global srcT*)(Src+(_y)*src_step))[_x])
#define SRC(_x,_y) convertToWT(((global srcT*)(Src+(_y)*src_step))[_x])
#define DST(_x,_y) (((global dstT*)(Dst+dst_offset+(_y)*dst_step))[_x])
#ifdef BORDER_CONSTANT
#ifdef BORDER_CONSTANT
// CCCCCC|abcdefgh|CCCCCCC
// CCCCCC|abcdefgh|CCCCCCC
...
@@ -83,8 +84,6 @@
...
@@ -83,8 +84,6 @@
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
SRC
((
_x
)
,
(
_y
))
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
SRC
((
_x
)
,
(
_y
))
#
endif
#
endif
#
define
DST
(
_x,_y
)
(((
global
dstT*
)(
Dst+dst_offset+
(
_y
)
*dst_step
))
[_x]
)
#
define
noconvert
#
define
noconvert
//
horizontal
and
vertical
filter
kernels
//
horizontal
and
vertical
filter
kernels
...
@@ -101,15 +100,15 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
...
@@ -101,15 +100,15 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
//
all
these
should
be
defined
on
host
during
compile
time
//
all
these
should
be
defined
on
host
during
compile
time
//
first
lsmem
array
for
source
pixels
used
in
first
pass,
//
first
lsmem
array
for
source
pixels
used
in
first
pass,
//
second
lsmemDy
for
storing
first
pass
results
//
second
lsmemDy
for
storing
first
pass
results
__local
WT
lsmem[BLK_Y
+2*RADIUSY][BLK_X+2*
RADIUSX]
;
__local
WT
lsmem[BLK_Y
+
2
*
RADIUSY][BLK_X
+
2
*
RADIUSX]
;
__local
WT
lsmemDy[BLK_Y][BLK_X
+2*
RADIUSX]
;
__local
WT
lsmemDy[BLK_Y][BLK_X
+
2
*
RADIUSX]
;
//
get
local
and
global
ids
-
used
as
image
and
local
memory
array
indexes
//
get
local
and
global
ids
-
used
as
image
and
local
memory
array
indexes
int
lix
=
get_local_id
(
0
)
;
int
lix
=
get_local_id
(
0
)
;
int
liy
=
get_local_id
(
1
)
;
int
liy
=
get_local_id
(
1
)
;
int
x
=
(
int
)
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
(
int
)
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
//
calculate
pixel
position
in
source
image
taking
image
offset
into
account
//
calculate
pixel
position
in
source
image
taking
image
offset
into
account
int
srcX
=
x
+
srcOffsetX
-
RADIUSX
;
int
srcX
=
x
+
srcOffsetX
-
RADIUSX
;
...
...
modules/imgproc/test/ocl/test_sepfilter2D.cpp
View file @
b449b0bf
...
@@ -79,12 +79,14 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
...
@@ -79,12 +79,14 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
ksize
.
width
++
;
ksize
.
width
++
;
if
(
1
!=
(
ksize
.
height
%
2
))
if
(
1
!=
(
ksize
.
height
%
2
))
ksize
.
height
++
;
ksize
.
height
++
;
Mat
temp
=
randomMat
(
Size
(
ksize
.
width
,
1
),
CV_MAKE_TYPE
(
CV_32F
,
1
),
-
MAX_VALUE
,
MAX_VALUE
);
Mat
temp
=
randomMat
(
Size
(
ksize
.
width
,
1
),
CV_MAKE_TYPE
(
CV_32F
,
1
),
-
MAX_VALUE
,
MAX_VALUE
);
cv
::
normalize
(
temp
,
kernelX
,
1.0
,
0.0
,
NORM_L1
);
cv
::
normalize
(
temp
,
kernelX
,
1.0
,
0.0
,
NORM_L1
);
temp
=
randomMat
(
Size
(
1
,
ksize
.
height
),
CV_MAKE_TYPE
(
CV_32F
,
1
),
-
MAX_VALUE
,
MAX_VALUE
);
temp
=
randomMat
(
Size
(
1
,
ksize
.
height
),
CV_MAKE_TYPE
(
CV_32F
,
1
),
-
MAX_VALUE
,
MAX_VALUE
);
cv
::
normalize
(
temp
,
kernelY
,
1.0
,
0.0
,
NORM_L1
);
cv
::
normalize
(
temp
,
kernelY
,
1.0
,
0.0
,
NORM_L1
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
Size
roiSize
=
randomSize
(
ksize
.
width
+
16
,
MAX_VALUE
,
ksize
.
height
+
20
,
MAX_VALUE
);
std
::
cout
<<
roiSize
<<
std
::
endl
;
int
rest
=
roiSize
.
width
%
4
;
int
rest
=
roiSize
.
width
%
4
;
if
(
0
!=
rest
)
if
(
0
!=
rest
)
roiSize
.
width
+=
(
4
-
rest
);
roiSize
.
width
+=
(
4
-
rest
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment