Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9dca7555
Commit
9dca7555
authored
Sep 27, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed ocl::minMax
parent
178f0272
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
132 additions
and
203 deletions
+132
-203
arithm.cpp
modules/ocl/src/arithm.cpp
+59
-78
arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+72
-124
test_arithm.cpp
modules/ocl/test/test_arithm.cpp
+1
-1
No files found.
modules/ocl/src/arithm.cpp
View file @
9dca7555
...
@@ -68,7 +68,6 @@ namespace cv
...
@@ -68,7 +68,6 @@ namespace cv
extern
const
char
*
arithm_sum
;
extern
const
char
*
arithm_sum
;
extern
const
char
*
arithm_sum_3
;
extern
const
char
*
arithm_sum_3
;
extern
const
char
*
arithm_minMax
;
extern
const
char
*
arithm_minMax
;
extern
const
char
*
arithm_minMax_mask
;
extern
const
char
*
arithm_minMaxLoc
;
extern
const
char
*
arithm_minMaxLoc
;
extern
const
char
*
arithm_minMaxLoc_mask
;
extern
const
char
*
arithm_minMaxLoc_mask
;
extern
const
char
*
arithm_LUT
;
extern
const
char
*
arithm_LUT
;
...
@@ -455,139 +454,121 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
...
@@ -455,139 +454,121 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
//////////////////////////////////// minMax /////////////////////////////////
//////////////////////////////////// minMax /////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
arithmetic_minMax_run
(
const
oclMat
&
src
,
const
oclMat
&
mask
,
cl_mem
&
dst
,
int
vlen
,
int
groupnum
,
string
kernelName
)
template
<
typename
T
,
typename
WT
>
static
void
arithmetic_minMax_run
(
const
oclMat
&
src
,
const
oclMat
&
mask
,
cl_mem
&
dst
,
int
groupnum
,
string
kernelName
)
{
{
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
all_cols
=
src
.
step
/
(
vlen
*
src
.
elemSize1
());
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
(
vlen
*
src
.
elemSize1
());
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
(
vlen
*
src
.
elemSize1
())
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;
int
offset
=
src
.
offset
/
(
vlen
*
src
.
elemSize1
());
int
offset
=
src
.
offset
/
src
.
elemSize
();
int
repeat_s
=
src
.
offset
/
src
.
elemSize1
()
-
offset
*
vlen
;
int
repeat_e
=
(
offset
+
cols
)
*
vlen
-
src
.
offset
/
src
.
elemSize1
()
-
src
.
cols
*
src
.
oclchannels
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
char
build_options
[
50
];
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
};
sprintf
(
build_options
,
"-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d"
,
src
.
depth
(),
repeat_s
,
repeat_e
);
ostringstream
stream
;
stream
<<
"-D T="
<<
typeMap
[
src
.
depth
()]
<<
channelMap
[
src
.
channels
()];
stream
<<
" -D MAX_VAL="
<<
(
WT
)
numeric_limits
<
T
>::
max
();
stream
<<
" -D MIN_VAL="
<<
(
WT
)
numeric_limits
<
T
>::
min
();
string
buildOptions
=
stream
.
str
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
int
minvalid_cols
=
0
,
moffset
=
0
;
if
(
!
mask
.
empty
())
if
(
!
mask
.
empty
())
{
{
int
mall_cols
=
mask
.
step
/
(
vlen
*
mask
.
elemSize1
()
);
int
mall_cols
=
mask
.
step
/
mask
.
elemSize
(
);
int
mpre_cols
=
(
mask
.
offset
%
mask
.
step
)
/
(
vlen
*
mask
.
elemSize1
()
);
int
mpre_cols
=
(
mask
.
offset
%
mask
.
step
)
/
mask
.
elemSize
(
);
int
msec_cols
=
mall_cols
-
(
mask
.
offset
%
mask
.
step
+
mask
.
cols
*
mask
.
elemSize
()
-
1
)
/
(
vlen
*
mask
.
elemSize1
()
)
-
1
;
int
msec_cols
=
mall_cols
-
(
mask
.
offset
%
mask
.
step
+
mask
.
cols
*
mask
.
elemSize
()
-
1
)
/
mask
.
elemSize
(
)
-
1
;
int
minvalid_cols
=
mpre_cols
+
msec_cols
;
minvalid_cols
=
mpre_cols
+
msec_cols
;
int
moffset
=
mask
.
offset
/
(
vlen
*
mask
.
elemSize1
()
);
moffset
=
mask
.
offset
/
mask
.
elemSize
(
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
minvalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
minvalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
moffset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
moffset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
kernelName
+=
"_mask"
;
}
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
size_t
gt
[
3
]
=
{
groupnum
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
kernelName
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
}
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
static
void
arithmetic_minMax_mask_run
(
const
oclMat
&
src
,
const
oclMat
&
mask
,
cl_mem
&
dst
,
int
vlen
,
int
groupnum
,
string
kernelName
)
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
kernelName
,
globalThreads
,
localThreads
,
{
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
size_t
gt
[
3
]
=
{
groupnum
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
char
build_options
[
50
];
if
(
src
.
oclchannels
()
==
1
)
{
int
cols
=
(
src
.
cols
-
1
)
/
vlen
+
1
;
int
invalid_cols
=
src
.
step
/
(
vlen
*
src
.
elemSize1
())
-
cols
;
int
offset
=
src
.
offset
/
src
.
elemSize1
();
int
repeat_me
=
vlen
-
(
mask
.
cols
%
vlen
==
0
?
vlen
:
mask
.
cols
%
vlen
);
int
minvalid_cols
=
mask
.
step
/
(
vlen
*
mask
.
elemSize1
())
-
cols
;
int
moffset
=
mask
.
offset
/
mask
.
elemSize1
();
int
elemnum
=
cols
*
src
.
rows
;
sprintf
(
build_options
,
"-D DEPTH_%d -D REPEAT_E%d"
,
src
.
depth
(),
repeat_me
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
minvalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
moffset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax_mask
,
kernelName
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
}
}
}
template
<
typename
T
>
void
arithmetic_minMax
(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
template
<
typename
T
,
typename
WT
>
void
arithmetic_minMax
(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
,
oclMat
&
buf
)
const
oclMat
&
mask
,
oclMat
&
buf
)
{
{
size_t
groupnum
=
src
.
clCxt
->
computeUnits
();
size_t
groupnum
=
src
.
clCxt
->
computeUnits
();
CV_Assert
(
groupnum
!=
0
);
CV_Assert
(
groupnum
!=
0
);
groupnum
=
groupnum
*
2
;
int
vlen
=
8
;
int
dbsize
=
groupnum
*
2
*
vlen
*
sizeof
(
T
)
;
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
buf
);
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
buf
);
cl_mem
buf_data
=
reinterpret_cast
<
cl_mem
>
(
buf
.
data
);
cl_mem
buf_data
=
reinterpret_cast
<
cl_mem
>
(
buf
.
data
);
arithmetic_minMax_run
<
T
,
WT
>
(
src
,
mask
,
buf_data
,
groupnum
,
"arithm_op_minMax"
);
if
(
mask
.
empty
())
{
arithmetic_minMax_run
(
src
,
mask
,
buf_data
,
vlen
,
groupnum
,
"arithm_op_minMax"
);
}
else
{
arithmetic_minMax_mask_run
(
src
,
mask
,
buf_data
,
vlen
,
groupnum
,
"arithm_op_minMax_mask"
);
}
Mat
matbuf
=
Mat
(
buf
);
Mat
matbuf
=
Mat
(
buf
);
T
*
p
=
matbuf
.
ptr
<
T
>
();
T
*
p
=
matbuf
.
ptr
<
T
>
();
if
(
minVal
!=
NULL
)
if
(
minVal
!=
NULL
)
{
{
*
minVal
=
std
::
numeric_limits
<
double
>::
max
();
*
minVal
=
std
::
numeric_limits
<
double
>::
max
();
for
(
int
i
=
0
;
i
<
vlen
*
(
int
)
groupnum
;
i
++
)
for
(
int
i
=
0
,
end
=
src
.
oclchannels
()
*
(
int
)
groupnum
;
i
<
end
;
i
++
)
{
*
minVal
=
*
minVal
<
p
[
i
]
?
*
minVal
:
p
[
i
];
*
minVal
=
*
minVal
<
p
[
i
]
?
*
minVal
:
p
[
i
];
}
}
}
if
(
maxVal
!=
NULL
)
if
(
maxVal
!=
NULL
)
{
{
*
maxVal
=
-
std
::
numeric_limits
<
double
>::
max
();
*
maxVal
=
-
std
::
numeric_limits
<
double
>::
max
();
for
(
int
i
=
vlen
*
(
int
)
groupnum
;
i
<
2
*
vlen
*
(
int
)
groupnum
;
i
++
)
for
(
int
i
=
src
.
oclchannels
()
*
(
int
)
groupnum
,
end
=
i
<<
1
;
i
<
end
;
i
++
)
{
*
maxVal
=
*
maxVal
>
p
[
i
]
?
*
maxVal
:
p
[
i
];
*
maxVal
=
*
maxVal
>
p
[
i
]
?
*
maxVal
:
p
[
i
];
}
}
}
}
}
typedef
void
(
*
minMaxFunc
)(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
,
oclMat
&
buf
);
void
cv
::
ocl
::
minMax
(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
minMax
(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
)
{
{
oclMat
buf
;
oclMat
buf
;
minMax_buf
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
minMax_buf
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
}
}
typedef
void
(
*
minMaxFunc
)(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
,
oclMat
&
buf
);
void
cv
::
ocl
::
minMax_buf
(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
,
oclMat
&
buf
)
void
cv
::
ocl
::
minMax_buf
(
const
oclMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
oclMat
&
mask
,
oclMat
&
buf
)
{
{
CV_Assert
(
src
.
oclchannels
()
==
1
);
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
size
()
==
mask
.
size
()
||
mask
.
empty
());
CV_Assert
(
src
.
step
%
src
.
elemSize
()
==
0
);
if
(
minVal
==
NULL
&&
maxVal
==
NULL
)
return
;
if
(
!
src
.
clCxt
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
if
(
!
src
.
clCxt
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
{
{
CV_Error
(
CV_GpuNotSupported
,
"Selected device doesn't support double"
);
CV_Error
(
CV_GpuNotSupported
,
"Selected device doesn't support double"
);
}
}
static
minMaxFunc
functab
[
8
]
=
static
minMaxFunc
functab
[
8
]
=
{
{
arithmetic_minMax
<
uchar
>
,
arithmetic_minMax
<
uchar
,
int
>
,
arithmetic_minMax
<
char
>
,
arithmetic_minMax
<
char
,
int
>
,
arithmetic_minMax
<
ushort
>
,
arithmetic_minMax
<
ushort
,
int
>
,
arithmetic_minMax
<
short
>
,
arithmetic_minMax
<
short
,
int
>
,
arithmetic_minMax
<
int
>
,
arithmetic_minMax
<
int
,
int
>
,
arithmetic_minMax
<
float
>
,
arithmetic_minMax
<
float
,
float
>
,
arithmetic_minMax
<
double
>
,
arithmetic_minMax
<
double
,
double
>
,
0
0
};
};
minMaxFunc
func
;
minMaxFunc
func
;
func
=
functab
[
src
.
depth
()];
func
=
functab
[
src
.
depth
()];
func
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
func
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
...
...
modules/ocl/src/opencl/arithm_minMax.cl
View file @
9dca7555
...
@@ -53,169 +53,117 @@
...
@@ -53,169 +53,117 @@
#
endif
#
endif
#
endif
#
endif
#
if
defined
(
DEPTH_0
)
#
define
VEC_TYPE
uchar8
#
define
CONVERT_TYPE
convert_uchar8
#
define
MIN_VAL
0
#
define
MAX_VAL
255
#
endif
#
if
defined
(
DEPTH_1
)
#
define
VEC_TYPE
char8
#
define
CONVERT_TYPE
convert_char8
#
define
MIN_VAL
-128
#
define
MAX_VAL
127
#
endif
#
if
defined
(
DEPTH_2
)
#
define
VEC_TYPE
ushort8
#
define
CONVERT_TYPE
convert_ushort8
#
define
MIN_VAL
0
#
define
MAX_VAL
65535
#
endif
#
if
defined
(
DEPTH_3
)
#
define
VEC_TYPE
short8
#
define
CONVERT_TYPE
convert_short8
#
define
MIN_VAL
-32768
#
define
MAX_VAL
32767
#
endif
#
if
defined
(
DEPTH_4
)
#
define
VEC_TYPE
int8
#
define
CONVERT_TYPE
convert_int8
#
define
MIN_VAL
INT_MIN
#
define
MAX_VAL
INT_MAX
#
endif
#
if
defined
(
DEPTH_5
)
#
define
VEC_TYPE
float8
#
define
CONVERT_TYPE
convert_float8
#
define
MIN_VAL
(
-FLT_MAX
)
#
define
MAX_VAL
FLT_MAX
#
endif
#
if
defined
(
DEPTH_6
)
#
define
VEC_TYPE
double8
#
define
CONVERT_TYPE
convert_double8
#
define
MIN_VAL
(
-DBL_MAX
)
#
define
MAX_VAL
DBL_MAX
#
endif
#
if
defined
(
REPEAT_S0
)
#
define
repeat_s
(
a
)
a
=
a
;
#
endif
#
if
defined
(
REPEAT_S1
)
#
define
repeat_s
(
a
)
a.s0
=
a.s1
;
#
endif
#
if
defined
(
REPEAT_S2
)
#
define
repeat_s
(
a
)
a.s0
=
a.s2
;a.s1 = a.s2;
#
endif
#
if
defined
(
REPEAT_S3
)
#
define
repeat_s
(
a
)
a.s0
=
a.s3
;a.s1 = a.s3;a.s2 = a.s3;
#
endif
#
if
defined
(
REPEAT_S4
)
#
define
repeat_s
(
a
)
a.s0
=
a.s4
;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4;
#
endif
#
if
defined
(
REPEAT_S5
)
#
define
repeat_s
(
a
)
a.s0
=
a.s5
;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5;
#
endif
#
if
defined
(
REPEAT_S6
)
#
define
repeat_s
(
a
)
a.s0
=
a.s6
;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6;
#
endif
#
if
defined
(
REPEAT_S7
)
#
define
repeat_s
(
a
)
a.s0
=
a.s7
;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7;
#
endif
#
if
defined
(
REPEAT_E0
)
#
define
repeat_e
(
a
)
a
=
a
;
#
endif
#
if
defined
(
REPEAT_E1
)
#
define
repeat_e
(
a
)
a.s7
=
a.s6
;
#
endif
#
if
defined
(
REPEAT_E2
)
#
define
repeat_e
(
a
)
a.s7
=
a.s5
;a.s6 = a.s5;
#
endif
#
if
defined
(
REPEAT_E3
)
#
define
repeat_e
(
a
)
a.s7
=
a.s4
;a.s6 = a.s4;a.s5 = a.s4;
#
endif
#
if
defined
(
REPEAT_E4
)
#
define
repeat_e
(
a
)
a.s7
=
a.s3
;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3;
#
endif
#
if
defined
(
REPEAT_E5
)
#
define
repeat_e
(
a
)
a.s7
=
a.s2
;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2;
#
endif
#
if
defined
(
REPEAT_E6
)
#
define
repeat_e
(
a
)
a.s7
=
a.s1
;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1;
#
endif
#
if
defined
(
REPEAT_E7
)
#
define
repeat_e
(
a
)
a.s7
=
a.s0
;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
#
endif
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_extended_atomics:enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_extended_atomics:enable
/**************************************Array
minMax**************************************/
/**************************************Array
minMax**************************************/
__kernel
void
arithm_op_minMax
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum,
__global
VEC_TYPE
*src,
__global
VEC_TYPE
*dst
)
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
unsigned
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
VEC_TYPE
localmem_max[128],localmem_min[128]
;
VEC_TYPE
minval,maxval,temp
;
__local
T
localmem_max[128],
localmem_min[128]
;
if
(
id
<
elemnum
)
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
temp
=
src[idx]
;
if
(
id
%
cols
==
0
)
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
if
(
lid
>
127
)
{
{
repeat_s
(
temp
)
;
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
}
}
if
(
id
%
cols
==
cols
-
1
)
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
{
{
repeat_e
(
temp
)
;
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
}
minval
=
temp
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
maxval
=
temp
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
else
if
(
lid
==
0
)
{
{
minval
=
MAX_VAL
;
dst[gid]
=
localmem_min[0]
;
maxval
=
MIN_VAL
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
}
}
for
(
id=id
+
(
groupnum
<<
8
)
; id < elemnum;id = id + (groupnum << 8))
}
__kernel
void
arithm_op_minMax_mask
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
const
__global
uchar
*
mask,
int
minvalid_cols,
int
moffset
)
{
unsigned
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
unsigned
int
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
if
(
id
%
cols
==
0
)
{
if
(
mask[midx]
)
repeat_s
(
temp
)
;
}
if
(
id
%
cols
==
cols
-
1
)
{
{
repeat_e
(
temp
)
;
temp
=
src[idx]
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
}
minval
=
min
(
minval,temp
)
;
maxval
=
max
(
maxval,temp
)
;
}
}
if
(
lid
>
127
)
if
(
lid
>
127
)
{
{
localmem_min[lid
-
128]
=
minval
;
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
localmem_max[lid
-
128]
=
maxval
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
if
(
lid
<
128
)
{
{
localmem_min[lid]
=
min
(
minval,localmem_min[lid]
)
;
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,localmem_max[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
{
if
(
lid
<
lsize
)
if
(
lid
<
lsize
)
{
{
int
lid2
=
lsize
+
lid
;
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid]
,
localmem_min[lid2]
)
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid]
,
localmem_max[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lid
==
0
)
if
(
lid
==
0
)
{
{
dst[gid]
=
localmem_min[0]
;
dst[gid]
=
localmem_min[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
...
...
modules/ocl/test/test_arithm.cpp
View file @
9dca7555
...
@@ -753,7 +753,7 @@ TEST_P(MinMax, MAT)
...
@@ -753,7 +753,7 @@ TEST_P(MinMax, MAT)
}
}
}
}
TEST_P
(
MinMax
,
DISABLED_
MASK
)
TEST_P
(
MinMax
,
MASK
)
{
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment