Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ba5e8bef
Commit
ba5e8bef
authored
Apr 26, 2018
by
Li Peng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fp16 ocl support for more layers
Signed-off-by:
Li Peng
<
peng.li@intel.com
>
parent
3dd91688
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
120 additions
and
49 deletions
+120
-49
batch_norm_layer.cpp
modules/dnn/src/layers/batch_norm_layer.cpp
+11
-2
blank_layer.cpp
modules/dnn/src/layers/blank_layer.cpp
+1
-1
detection_output_layer.cpp
modules/dnn/src/layers/detection_output_layer.cpp
+43
-7
normalize_bbox_layer.cpp
modules/dnn/src/layers/normalize_bbox_layer.cpp
+4
-1
permute_layer.cpp
modules/dnn/src/layers/permute_layer.cpp
+4
-2
prior_box_layer.cpp
modules/dnn/src/layers/prior_box_layer.cpp
+10
-3
proposal_layer.cpp
modules/dnn/src/layers/proposal_layer.cpp
+4
-1
region_layer.cpp
modules/dnn/src/layers/region_layer.cpp
+2
-2
reorg_layer.cpp
modules/dnn/src/layers/reorg_layer.cpp
+3
-2
reshape_layer.cpp
modules/dnn/src/layers/reshape_layer.cpp
+1
-1
batchnorm.cl
modules/dnn/src/opencl/batchnorm.cl
+16
-13
permute.cl
modules/dnn/src/opencl/permute.cl
+3
-1
prior_box.cl
modules/dnn/src/opencl/prior_box.cl
+14
-13
reorg.cl
modules/dnn/src/opencl/reorg.cl
+4
-0
No files found.
modules/dnn/src/layers/batch_norm_layer.cpp
View file @
ba5e8bef
...
...
@@ -120,12 +120,16 @@ public:
std
::
vector
<
UMat
>
inputs
;
std
::
vector
<
UMat
>
outputs
;
bool
use_half
=
(
inputs_
.
depth
()
==
CV_16S
);
inputs_
.
getUMatVector
(
inputs
);
outputs_
.
getUMatVector
(
outputs
);
CV_Assert
(
blobs
.
size
()
>=
2
);
CV_Assert
(
inputs
.
size
()
==
1
);
if
(
use_half
&&
inputs
[
0
].
dims
==
2
)
return
false
;
if
(
umat_weight
.
empty
())
{
umat_weight
=
weights_
.
getUMat
(
ACCESS_READ
);
...
...
@@ -139,6 +143,7 @@ public:
int
rows
=
inpBlob
.
dims
>
2
?
inpBlob
.
size
[
2
]
:
1
;
int
cols
=
inpBlob
.
dims
>
2
?
inpBlob
.
size
[
3
]
:
1
;
String
opts
=
(
use_half
)
?
" -DDtype=half"
:
" -DDtype=float"
;
for
(
size_t
ii
=
0
;
ii
<
outputs
.
size
();
ii
++
)
{
if
(
inpBlob
.
dims
==
2
)
...
...
@@ -154,8 +159,12 @@ public:
UMat
src
=
inputs
[
ii
].
reshape
(
1
,
s
.
size
(),
&
s
[
0
]);
UMat
dst
=
outputs
[
ii
].
reshape
(
1
,
s
.
size
(),
&
s
[
0
]);
int
number
=
(
s
[
1
]
%
8
==
0
)
?
8
:
((
s
[
1
]
%
4
==
0
)
?
4
:
1
);
String
buildopt
=
format
(
"-DNUM=%d"
,
number
);
String
buildopt
=
format
(
"-DNUM=%d"
,
number
)
+
opts
;
String
kname
=
format
(
"batch_norm%d"
,
number
);
if
(
number
==
1
)
buildopt
+=
format
(
" -Dconvert_T=convert_%s"
,
use_half
?
"half"
:
"float"
);
else
buildopt
+=
format
(
" -Dconvert_T=convert_%s%d"
,
use_half
?
"half"
:
"float"
,
number
);
ocl
::
Kernel
kernel
(
kname
.
c_str
(),
ocl
::
dnn
::
batchnorm_oclsrc
,
buildopt
);
if
(
kernel
.
empty
())
return
false
;
...
...
@@ -181,7 +190,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/blank_layer.cpp
View file @
ba5e8bef
...
...
@@ -95,7 +95,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/detection_output_layer.cpp
View file @
ba5e8bef
...
...
@@ -307,8 +307,24 @@ public:
std
::
vector
<
UMat
>
inputs
;
std
::
vector
<
UMat
>
outputs
;
inps
.
getUMatVector
(
inputs
);
outs
.
getUMatVector
(
outputs
);
bool
use_half
=
(
inps
.
depth
()
==
CV_16S
);
if
(
use_half
)
{
std
::
vector
<
UMat
>
orig_inputs
;
std
::
vector
<
UMat
>
orig_outputs
;
inps
.
getUMatVector
(
orig_inputs
);
outs
.
getUMatVector
(
orig_outputs
);
inputs
.
resize
(
orig_inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
orig_inputs
.
size
();
i
++
)
convertFp16
(
orig_inputs
[
i
],
inputs
[
i
]);
}
else
{
inps
.
getUMatVector
(
inputs
);
outs
.
getUMatVector
(
outputs
);
}
std
::
vector
<
LabelBBox
>
allDecodedBBoxes
;
std
::
vector
<
Mat
>
allConfidenceScores
;
...
...
@@ -342,7 +358,13 @@ public:
{
// Set confidences to zeros.
Range
ranges
[]
=
{
Range
::
all
(),
Range
::
all
(),
Range
::
all
(),
Range
(
2
,
3
)};
outputs
[
0
](
ranges
).
setTo
(
0
);
if
(
use_half
)
{
std
::
vector
<
UMat
>
orig_outputs
;
outs
.
getUMatVector
(
orig_outputs
);
orig_outputs
[
0
](
ranges
).
setTo
(
0
);
}
else
outputs
[
0
](
ranges
).
setTo
(
0
);
return
true
;
}
int
outputShape
[]
=
{
1
,
1
,
(
int
)
numKept
,
7
};
...
...
@@ -360,9 +382,23 @@ public:
}
CV_Assert
(
count
==
numKept
);
}
outputs
.
clear
();
outputs
.
push_back
(
umat
);
outs
.
assign
(
outputs
);
if
(
use_half
)
{
UMat
half_umat
;
convertFp16
(
umat
,
half_umat
);
std
::
vector
<
UMat
>
orig_outputs
;
outs
.
getUMatVector
(
orig_outputs
);
orig_outputs
.
clear
();
orig_outputs
.
push_back
(
half_umat
);
outs
.
assign
(
orig_outputs
);
}
else
{
outputs
.
clear
();
outputs
.
push_back
(
umat
);
outs
.
assign
(
outputs
);
}
return
true
;
}
#endif
...
...
@@ -372,7 +408,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/normalize_bbox_layer.cpp
View file @
ba5e8bef
...
...
@@ -87,6 +87,9 @@ public:
std
::
vector
<
UMat
>
outputs
;
std
::
vector
<
UMat
>
internals
;
if
(
inputs_
.
depth
()
==
CV_16S
)
return
false
;
inputs_
.
getUMatVector
(
inputs
);
outputs_
.
getUMatVector
(
outputs
);
internals_
.
getUMatVector
(
internals
);
...
...
@@ -162,7 +165,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/permute_layer.cpp
View file @
ba5e8bef
...
...
@@ -288,9 +288,11 @@ public:
if
(
!
_needsPermute
)
return
false
;
bool
use_half
=
(
inps
.
depth
()
==
CV_16S
);
String
opts
=
format
(
"-DDtype=%s"
,
use_half
?
"half"
:
"float"
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
ocl
::
Kernel
kernel
(
"permute"
,
ocl
::
dnn
::
permute_oclsrc
);
ocl
::
Kernel
kernel
(
"permute"
,
ocl
::
dnn
::
permute_oclsrc
,
opts
);
kernel
.
set
(
0
,
(
int
)
_count
);
kernel
.
set
(
1
,
ocl
::
KernelArg
::
PtrReadOnly
(
inputs
[
i
]));
...
...
@@ -313,7 +315,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/prior_box_layer.cpp
View file @
ba5e8bef
...
...
@@ -316,6 +316,7 @@ public:
std
::
vector
<
UMat
>
inputs
;
std
::
vector
<
UMat
>
outputs
;
bool
use_half
=
(
inps
.
depth
()
==
CV_16S
);
inps
.
getUMatVector
(
inputs
);
outs
.
getUMatVector
(
outputs
);
...
...
@@ -340,9 +341,15 @@ public:
heights
.
copyTo
(
umat_heights
);
}
String
opts
;
if
(
use_half
)
opts
=
"-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4"
;
else
opts
=
"-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4"
;
size_t
nthreads
=
_layerHeight
*
_layerWidth
;
ocl
::
Kernel
kernel
(
"prior_box"
,
ocl
::
dnn
::
prior_box_oclsrc
,
opts
);
ocl
::
Kernel
kernel
(
"prior_box"
,
ocl
::
dnn
::
prior_box_oclsrc
);
kernel
.
set
(
0
,
(
int
)
nthreads
);
kernel
.
set
(
1
,
(
float
)
_stepX
);
kernel
.
set
(
2
,
(
float
)
_stepY
);
...
...
@@ -375,7 +382,7 @@ public:
// set the variance.
{
ocl
::
Kernel
kernel
(
"set_variance"
,
ocl
::
dnn
::
prior_box_oclsrc
);
ocl
::
Kernel
kernel
(
"set_variance"
,
ocl
::
dnn
::
prior_box_oclsrc
,
opts
);
int
offset
=
total
(
shape
(
outputs
[
0
]),
2
);
size_t
nthreads
=
_layerHeight
*
_layerWidth
*
_numPriors
;
kernel
.
set
(
0
,
(
int
)
nthreads
);
...
...
@@ -395,7 +402,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/proposal_layer.cpp
View file @
ba5e8bef
...
...
@@ -158,6 +158,9 @@ public:
std
::
vector
<
UMat
>
outputs
;
std
::
vector
<
UMat
>
internals
;
if
(
inputs_
.
depth
()
==
CV_16S
)
return
false
;
inputs_
.
getUMatVector
(
inputs
);
outputs_
.
getUMatVector
(
outputs
);
internals_
.
getUMatVector
(
internals
);
...
...
@@ -237,7 +240,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/region_layer.cpp
View file @
ba5e8bef
...
...
@@ -127,7 +127,7 @@ public:
std
::
vector
<
UMat
>
outputs
;
// TODO: implement a logistic activation to classification scores.
if
(
useLogistic
)
if
(
useLogistic
||
inps
.
depth
()
==
CV_16S
)
return
false
;
inps
.
getUMatVector
(
inputs
);
...
...
@@ -191,7 +191,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/reorg_layer.cpp
View file @
ba5e8bef
...
...
@@ -96,9 +96,10 @@ public:
std
::
vector
<
UMat
>
inputs
;
std
::
vector
<
UMat
>
outputs
;
bool
use_half
=
(
inps
.
depth
()
==
CV_16S
);
inps
.
getUMatVector
(
inputs
);
outs
.
getUMatVector
(
outputs
);
String
buildopt
=
String
(
"-DDtype="
)
+
ocl
::
typeToStr
(
inputs
[
0
].
type
())
+
String
(
"
"
);
String
buildopt
=
format
(
"-DDtype=%s "
,
use_half
?
"half"
:
"float
"
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
...
...
@@ -134,7 +135,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/layers/reshape_layer.cpp
View file @
ba5e8bef
...
...
@@ -219,7 +219,7 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_OCL_RUN
(
(
preferableTarget
==
DNN_TARGET_OPENCL
)
&&
CV_OCL_RUN
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
OCL_PERFORMANCE_CHECK
(
ocl
::
Device
::
getDefault
().
isIntel
()),
forward_ocl
(
inputs_arr
,
outputs_arr
,
internals_arr
))
...
...
modules/dnn/src/opencl/batchnorm.cl
View file @
ba5e8bef
...
...
@@ -40,24 +40,27 @@
//
//M*/
#
define
Dtype
float
#
define
Dtype4
float4
#
define
Dtype8
float8
#
if
defined
(
cl_khr_fp16
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
endif
#
if
NUM
==
8
#
define
load
(
src,
index
)
vload8
(
0
,
src
+
index
)
#
define
store
(
vec,
dst,
index
)
vstore8
(
vec,
0
,
dst
+
index
)
#
define
vec_type
Dtype8
#
define
float_type
float8
#
define
convert_f
convert_float8
#
define
BATCH_NORM
batch_norm8
#
elif
NUM
==
4
#
define
load
(
src,
index
)
vload4
(
0
,
src
+
index
)
#
define
store
(
vec,
dst,
index
)
vstore4
(
vec,
0
,
dst
+
index
)
#
define
vec_type
Dtype4
#
define
float_type
float4
#
define
convert_f
convert_float4
#
define
BATCH_NORM
batch_norm4
#
elif
NUM
==
1
#
define
load
(
src,
index
)
src[index]
#
define
store
(
vec,
dst,
index
)
dst[index]
=
vec
#
define
vec_type
Dtype
#
define
float_type
float
#
define
convert_f
convert_float
#
define
BATCH_NORM
batch_norm1
#
endif
...
...
@@ -65,8 +68,8 @@ __kernel void BATCH_NORM(__global const Dtype* src,
const
int
rows,
const
int
cols,
const
int
channels,
__global
const
Dtype
*
weight,
__global
const
Dtype
*
bias,
__global
const
float
*
weight,
__global
const
float
*
bias,
__global
Dtype*
dst
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -76,9 +79,9 @@ __kernel void BATCH_NORM(__global const Dtype* src,
if
(
x
>=
rows
||
y
>=
cols
)
return
;
Dtype
w
=
weight[x
%
channels]
;
Dtype
b
=
bias[x
%
channels]
;
vec_type
src_vec
=
load
(
src,
index
)
;
vec_type
dst_vec
=
src_vec
*
w
+
(
vec
_type
)
b
;
store
(
dst_vec
,
dst,
index
)
;
float
w
=
weight[x
%
channels]
;
float
b
=
bias[x
%
channels]
;
float_type
src_vec
=
convert_f
(
load
(
src,
index
)
)
;
float_type
dst_vec
=
src_vec
*
w
+
(
float
_type
)
b
;
store
(
convert_T
(
dst_vec
)
,
dst,
index
)
;
}
modules/dnn/src/opencl/permute.cl
View file @
ba5e8bef
...
...
@@ -40,7 +40,9 @@
//
//M*/
#
define
Dtype
float
#
if
defined
(
cl_khr_fp16
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
endif
__kernel
void
permute
(
const
int
nthreads,
__global
Dtype*
bottom_data,
...
...
modules/dnn/src/opencl/prior_box.cl
View file @
ba5e8bef
...
...
@@ -39,17 +39,18 @@
//
//M*/
#
define
Dtype
float
#
define
Dtype4
float4
#
if
defined
(
cl_khr_fp16
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
endif
__kernel
void
prior_box
(
const
int
nthreads,
const
Dtype
stepX,
const
Dtype
stepY,
__global
const
Dtype
*
_offsetsX,
__global
const
Dtype
*
_offsetsY,
const
float
stepX,
const
float
stepY,
__global
const
float
*
_offsetsX,
__global
const
float
*
_offsetsY,
const
int
offsetsX_size,
__global
const
Dtype
*
_widths,
__global
const
Dtype
*
_heights,
__global
const
float
*
_widths,
__global
const
float
*
_heights,
const
int
widths_size,
__global
Dtype*
dst,
const
int
_layerHeight,
...
...
@@ -65,7 +66,7 @@ __kernel void prior_box(const int nthreads,
outputPtr
=
dst
+
index
*
4
*
offsetsX_size
*
widths_size
;
Dtype
_boxWidth,
_boxHeight
;
float
_boxWidth,
_boxHeight
;
Dtype4
vec
;
for
(
int
i
=
0
; i < widths_size; ++i)
{
...
...
@@ -73,8 +74,8 @@ __kernel void prior_box(const int nthreads,
_boxHeight
=
_heights[i]
;
for
(
int
j
=
0
; j < offsetsX_size; ++j)
{
float
center_x
=
(
w
+
_offsetsX[j]
)
*
stepX
;
float
center_y
=
(
h
+
_offsetsY[j]
)
*
stepY
;
Dtype
center_x
=
(
w
+
_offsetsX[j]
)
*
(
Dtype
)
stepX
;
Dtype
center_y
=
(
h
+
_offsetsY[j]
)
*
(
Dtype
)
stepY
;
vec.x
=
(
center_x
-
_boxWidth
*
0.5f
)
/
imgWidth
; // xmin
vec.y
=
(
center_y
-
_boxHeight
*
0.5f
)
/
imgHeight
; // ymin
...
...
@@ -91,7 +92,7 @@ __kernel void prior_box(const int nthreads,
__kernel
void
set_variance
(
const
int
nthreads,
const
int
offset,
const
int
variance_size,
__global
const
Dtype
*
variance,
__global
const
float
*
variance,
__global
Dtype*
dst
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < nthreads; index += get_global_size(0))
...
...
@@ -101,7 +102,7 @@ __kernel void set_variance(const int nthreads,
if
(
variance_size
==
1
)
var_vec
=
(
Dtype4
)(
variance[0]
)
;
else
var_vec
=
vload4
(
0
,
variance
)
;
var_vec
=
convert_T
(
vload4
(
0
,
variance
)
)
;
vstore4
(
var_vec,
0
,
dst
+
offset
+
index
*
4
)
;
}
...
...
modules/dnn/src/opencl/reorg.cl
View file @
ba5e8bef
...
...
@@ -39,6 +39,10 @@
//
//M*/
#
if
defined
(
cl_khr_fp16
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
endif
__kernel
void
reorg
(
const
int
count,
__global
const
Dtype*
src,
const
int
channels,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment