Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b6a0acb8
Commit
b6a0acb8
authored
Feb 08, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
core: keep history of convert.cpp
parents
5527c414
1629e29b
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
527 additions
and
0 deletions
+527
-0
convert.simd.hpp
modules/core/src/convert.simd.hpp
+527
-0
No files found.
modules/core/src/convert.simd.hpp
0 → 100644
View file @
b6a0acb8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.hpp"
namespace
cv
{
/*namespace hal {
void cvt16f32f( const float16_t* src, float* dst, int len )
{
int j = 0;
#if CV_SIMD
const int VECSZ = v_float32::nlanes;
for( ; j < len; j += VECSZ )
{
if( j > len - VECSZ )
{
if( j == 0 )
break;
j = len - VECSZ;
}
v_store(dst + j, vx_load_expand(src + j));
}
#endif
for( ; j < len; j++ )
dst[j] = (float)src[j];
}
void cvt32f16f( const float* src, float16_t* dst, int len )
{
int j = 0;
#if CV_SIMD
const int VECSZ = v_float32::nlanes;
for( ; j < len; j += VECSZ )
{
if( j > len - VECSZ )
{
if( j == 0 )
break;
j = len - VECSZ;
}
v_pack_store(dst + j, vx_load(src + j));
}
#endif
for( ; j < len; j++ )
dst[j] = float16_t(src[j]);
}
/*void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
{
// the loop is simple enough, so we let the compiler to vectorize it
for( int i = 0; i < len; i++ )
arr[i] = scaleBiasPairs[i*2 + 1];
}
void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len )
{
// the loop is simple enough, so we let the compiler to vectorize it
for( int i = 0; i < len; i++ )
arr[i] = scaleBiasPairs[i*2 + 1];
}
}*/
template
<
typename
_Ts
,
typename
_Td
,
typename
_Twvec
>
inline
void
cvt_
(
const
_Ts
*
src
,
size_t
sstep
,
_Td
*
dst
,
size_t
dstep
,
Size
size
)
{
sstep
/=
sizeof
(
src
[
0
]);
dstep
/=
sizeof
(
dst
[
0
]);
for
(
int
i
=
0
;
i
<
size
.
height
;
i
++
,
src
+=
sstep
,
dst
+=
dstep
)
{
int
j
=
0
;
#if CV_SIMD
const
int
VECSZ
=
_Twvec
::
nlanes
*
2
;
for
(
;
j
<
size
.
width
;
j
+=
VECSZ
)
{
if
(
j
>
size
.
width
-
VECSZ
)
{
if
(
j
==
0
||
src
==
(
_Ts
*
)
dst
)
break
;
j
=
size
.
width
-
VECSZ
;
}
_Twvec
v0
,
v1
;
vx_load_pair_as
(
src
+
j
,
v0
,
v1
);
v_store_pair_as
(
dst
+
j
,
v0
,
v1
);
}
#endif
for
(
;
j
<
size
.
width
;
j
++
)
dst
[
j
]
=
saturate_cast
<
_Td
>
(
src
[
j
]);
}
}
// in order to reduce the code size, for (16f <-> ...) conversions
// we add a conversion function without loop unrolling
template
<
typename
_Ts
,
typename
_Td
,
typename
_Twvec
>
inline
void
cvt1_
(
const
_Ts
*
src
,
size_t
sstep
,
_Td
*
dst
,
size_t
dstep
,
Size
size
)
{
sstep
/=
sizeof
(
src
[
0
]);
dstep
/=
sizeof
(
dst
[
0
]);
for
(
int
i
=
0
;
i
<
size
.
height
;
i
++
,
src
+=
sstep
,
dst
+=
dstep
)
{
int
j
=
0
;
#if CV_SIMD
const
int
VECSZ
=
_Twvec
::
nlanes
;
for
(
;
j
<
size
.
width
;
j
+=
VECSZ
)
{
if
(
j
>
size
.
width
-
VECSZ
)
{
if
(
j
==
0
||
src
==
(
_Ts
*
)
dst
)
break
;
j
=
size
.
width
-
VECSZ
;
}
_Twvec
v
;
vx_load_as
(
src
+
j
,
v
);
v_store_as
(
dst
+
j
,
v
);
}
vx_cleanup
();
#endif
for
(
;
j
<
size
.
width
;
j
++
)
dst
[
j
]
=
saturate_cast
<
_Td
>
(
src
[
j
]);
}
}
static
void
cvtCopy
(
const
uchar
*
src
,
size_t
sstep
,
uchar
*
dst
,
size_t
dstep
,
Size
size
,
size_t
elemsize
)
{
size_t
len
=
size
.
width
*
elemsize
;
for
(
int
i
=
0
;
i
<
size
.
height
;
i
++
,
src
+=
sstep
,
dst
+=
dstep
)
{
memcpy
(
dst
,
src
,
len
);
}
}
#define DEF_CVT_FUNC(suffix, cvtfunc, _Ts, _Td, _Twvec) \
static void cvt##suffix(const _Ts* src, size_t sstep, uchar*, size_t, \
_Td* dst, size_t dstep, Size size, void*) \
{ cvtfunc<_Ts, _Td, _Twvec>(src, sstep, dst, dstep, size); }
////////////////////// 8u -> ... ////////////////////////
DEF_CVT_FUNC
(
8u8
s
,
cvt_
,
uchar
,
schar
,
v_int16
)
DEF_CVT_FUNC
(
8u16u
,
cvt_
,
uchar
,
ushort
,
v_uint16
)
DEF_CVT_FUNC
(
8u16
s
,
cvt_
,
uchar
,
short
,
v_int16
)
DEF_CVT_FUNC
(
8u32
s
,
cvt_
,
uchar
,
int
,
v_int32
)
DEF_CVT_FUNC
(
8u32
f
,
cvt_
,
uchar
,
float
,
v_float32
)
DEF_CVT_FUNC
(
8u64
f
,
cvt_
,
uchar
,
double
,
v_int32
)
//DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
////////////////////// 8s -> ... ////////////////////////
DEF_CVT_FUNC
(
8
s8u
,
cvt_
,
schar
,
uchar
,
v_int16
)
DEF_CVT_FUNC
(
8
s16u
,
cvt_
,
schar
,
ushort
,
v_uint16
)
DEF_CVT_FUNC
(
8
s16s
,
cvt_
,
schar
,
short
,
v_int16
)
DEF_CVT_FUNC
(
8
s32s
,
cvt_
,
schar
,
int
,
v_int32
)
DEF_CVT_FUNC
(
8
s32f
,
cvt_
,
schar
,
float
,
v_float32
)
DEF_CVT_FUNC
(
8
s64f
,
cvt_
,
schar
,
double
,
v_int32
)
//DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
////////////////////// 16u -> ... ////////////////////////
DEF_CVT_FUNC
(
16u8u
,
cvt_
,
ushort
,
uchar
,
v_uint16
)
DEF_CVT_FUNC
(
16u8
s
,
cvt_
,
ushort
,
schar
,
v_uint16
)
DEF_CVT_FUNC
(
16u16
s
,
cvt_
,
ushort
,
short
,
v_int32
)
DEF_CVT_FUNC
(
16u32
s
,
cvt_
,
ushort
,
int
,
v_int32
)
DEF_CVT_FUNC
(
16u32
f
,
cvt_
,
ushort
,
float
,
v_float32
)
DEF_CVT_FUNC
(
16u64
f
,
cvt_
,
ushort
,
double
,
v_int32
)
//DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
////////////////////// 16s -> ... ////////////////////////
DEF_CVT_FUNC
(
16
s8u
,
cvt_
,
short
,
uchar
,
v_int16
)
DEF_CVT_FUNC
(
16
s8s
,
cvt_
,
short
,
schar
,
v_int16
)
DEF_CVT_FUNC
(
16
s16u
,
cvt_
,
short
,
ushort
,
v_int32
)
DEF_CVT_FUNC
(
16
s32s
,
cvt_
,
short
,
int
,
v_int32
)
DEF_CVT_FUNC
(
16
s32f
,
cvt_
,
short
,
float
,
v_float32
)
DEF_CVT_FUNC
(
16
s64f
,
cvt_
,
short
,
double
,
v_int32
)
//DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
////////////////////// 32s -> ... ////////////////////////
DEF_CVT_FUNC
(
32
s8u
,
cvt_
,
int
,
uchar
,
v_int32
)
DEF_CVT_FUNC
(
32
s8s
,
cvt_
,
int
,
schar
,
v_int32
)
DEF_CVT_FUNC
(
32
s16u
,
cvt_
,
int
,
ushort
,
v_int32
)
DEF_CVT_FUNC
(
32
s16s
,
cvt_
,
int
,
short
,
v_int32
)
DEF_CVT_FUNC
(
32
s32f
,
cvt_
,
int
,
float
,
v_float32
)
DEF_CVT_FUNC
(
32
s64f
,
cvt_
,
int
,
double
,
v_int32
)
//DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
////////////////////// 32f -> ... ////////////////////////
DEF_CVT_FUNC
(
32
f8u
,
cvt_
,
float
,
uchar
,
v_float32
)
DEF_CVT_FUNC
(
32
f8s
,
cvt_
,
float
,
schar
,
v_float32
)
DEF_CVT_FUNC
(
32
f16u
,
cvt_
,
float
,
ushort
,
v_float32
)
DEF_CVT_FUNC
(
32
f16s
,
cvt_
,
float
,
short
,
v_float32
)
DEF_CVT_FUNC
(
32
f32s
,
cvt_
,
float
,
int
,
v_float32
)
DEF_CVT_FUNC
(
32
f64f
,
cvt_
,
float
,
double
,
v_float32
)
DEF_CVT_FUNC
(
32
f16f
,
cvt1_
,
float
,
float16_t
,
v_float32
)
////////////////////// 64f -> ... ////////////////////////
DEF_CVT_FUNC
(
64
f8u
,
cvt_
,
double
,
uchar
,
v_int32
)
DEF_CVT_FUNC
(
64
f8s
,
cvt_
,
double
,
schar
,
v_int32
)
DEF_CVT_FUNC
(
64
f16u
,
cvt_
,
double
,
ushort
,
v_int32
)
DEF_CVT_FUNC
(
64
f16s
,
cvt_
,
double
,
short
,
v_int32
)
DEF_CVT_FUNC
(
64
f32s
,
cvt_
,
double
,
int
,
v_int32
)
DEF_CVT_FUNC
(
64
f32f
,
cvt_
,
double
,
float
,
v_float32
)
//DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
////////////////////// 16f -> ... ////////////////////////
//DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
//DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32)
//DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
//DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32)
//DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32)
DEF_CVT_FUNC
(
16
f32f
,
cvt1_
,
float16_t
,
float
,
v_float32
)
//DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
///////////// "conversion" w/o conversion ///////////////
static
void
cvt8u
(
const
uchar
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
uchar
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
(
src
,
sstep
,
dst
,
dstep
,
size
,
1
);
}
static
void
cvt16u
(
const
ushort
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
ushort
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
2
);
}
static
void
cvt32s
(
const
int
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
int
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
4
);
}
static
void
cvt64s
(
const
int64
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
int64
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
8
);
}
/* [TODO] Recover IPP calls
#if defined(HAVE_IPP)
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#else
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
#endif
#define DEF_CVT_FUNC(suffix, stype, dtype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CPY_FUNC(suffix, stype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
stype* dst, size_t dstep, Size size, double*) \
{ \
cpy_(src, sstep, dst, dstep, size); \
}
DEF_CPY_FUNC(8u, uchar)
DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs)
DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R)
DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R)
DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R)
DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs)
DEF_CVT_FUNC(64f8u, double, uchar)
DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs)
DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs)
DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs)
DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R)
DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs)
DEF_CVT_FUNC(64f8s, double, schar)
DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R)
DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs)
DEF_CPY_FUNC(16u, ushort)
DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
DEF_CVT_FUNC(64f16u, double, ushort)
DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R)
DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R)
DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
DEF_CVT_FUNC(32f16s, float, short)
DEF_CVT_FUNC(64f16s, double, short)
DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R)
DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R)
DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
DEF_CPY_FUNC(32s, int)
DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
DEF_CVT_FUNC(64f32s, double, int)
DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R)
DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R)
DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
DEF_CVT_FUNC(64f32f, double, float)
DEF_CVT_FUNC(8u64f, uchar, double)
DEF_CVT_FUNC(8s64f, schar, double)
DEF_CVT_FUNC(16u64f, ushort, double)
DEF_CVT_FUNC(16s64f, short, double)
DEF_CVT_FUNC(32s64f, int, double)
DEF_CVT_FUNC(32f64f, float, double)
DEF_CPY_FUNC(64s, int64)
*/
BinaryFunc
getConvertFunc
(
int
sdepth
,
int
ddepth
)
{
static
BinaryFunc
cvtTab
[][
8
]
=
{
{
(
BinaryFunc
)(
cvt8u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8s8u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16u8u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16s8u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32s8u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32f8u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt64f8u
),
0
//(BinaryFunc)(cvt16f8u)
},
{
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8u8s
),
(
BinaryFunc
)
cvt8u
,
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16u8s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16s8s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32s8s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32f8s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt64f8s
),
0
//(BinaryFunc)(cvt16f8s)
},
{
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8u16u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8s16u
),
(
BinaryFunc
)
cvt16u
,
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16s16u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32s16u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32f16u
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt64f16u
),
0
//(BinaryFunc)(cvt16f16u)
},
{
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8u16s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8s16s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16u16s
),
(
BinaryFunc
)
cvt16u
,
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32s16s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32f16s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt64f16s
),
0
//(BinaryFunc)(cvt16f16s)
},
{
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8u32s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8s32s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16u32s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16s32s
),
(
BinaryFunc
)
cvt32s
,
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32f32s
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt64f32s
),
0
//(BinaryFunc)(cvt16f32s)
},
{
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8u32f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8s32f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16u32f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16s32f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32s32f
),
(
BinaryFunc
)
cvt32s
,
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt64f32f
),
0
//(BinaryFunc)(cvt16f32f)
},
{
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8u64f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt8s64f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16u64f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt16s64f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32s64f
),
(
BinaryFunc
)
GET_OPTIMIZED
(
cvt32f64f
),
(
BinaryFunc
)(
cvt64s
),
0
//(BinaryFunc)(cvt16f64f)
},
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
//(BinaryFunc)(cvt8u16f), (BinaryFunc)(cvt8s16f), (BinaryFunc)(cvt16u16f), (BinaryFunc)(cvt16s16f),
//(BinaryFunc)(cvt32s16f), (BinaryFunc)(cvt32f16f), (BinaryFunc)(cvt64f16f), (BinaryFunc)(cvt16u)
}
};
return
cvtTab
[
CV_MAT_DEPTH
(
ddepth
)][
CV_MAT_DEPTH
(
sdepth
)];
}
#ifdef HAVE_OPENCL
static
bool
ocl_convertFp16
(
InputArray
_src
,
OutputArray
_dst
,
int
sdepth
,
int
ddepth
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
_dst
.
createSameSize
(
_src
,
CV_MAKETYPE
(
ddepth
,
cn
)
);
int
kercn
=
1
;
int
rowsPerWI
=
1
;
String
build_opt
=
format
(
"-D HALF_SUPPORT -D srcT=%s -D dstT=%s -D rowsPerWI=%d%s"
,
sdepth
==
CV_32F
?
"float"
:
"half"
,
sdepth
==
CV_32F
?
"half"
:
"float"
,
rowsPerWI
,
sdepth
==
CV_32F
?
" -D FLOAT_TO_HALF "
:
""
);
ocl
::
Kernel
k
(
"convertFp16"
,
ocl
::
core
::
halfconvert_oclsrc
,
build_opt
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
,
kercn
);
k
.
args
(
srcarg
,
dstarg
);
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
*
cn
/
kercn
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
#endif
}
// cv::
void
cv
::
Mat
::
convertTo
(
OutputArray
_dst
,
int
_type
,
double
alpha
,
double
beta
)
const
{
CV_INSTRUMENT_REGION
();
if
(
empty
()
)
{
_dst
.
release
();
return
;
}
bool
noScale
=
fabs
(
alpha
-
1
)
<
DBL_EPSILON
&&
fabs
(
beta
)
<
DBL_EPSILON
;
if
(
_type
<
0
)
_type
=
_dst
.
fixedType
()
?
_dst
.
type
()
:
type
();
else
_type
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
_type
),
channels
());
int
sdepth
=
depth
(),
ddepth
=
CV_MAT_DEPTH
(
_type
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
copyTo
(
_dst
);
return
;
}
Mat
src
=
*
this
;
if
(
dims
<=
2
)
_dst
.
create
(
size
(),
_type
);
else
_dst
.
create
(
dims
,
size
,
_type
);
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
noScale
?
getConvertFunc
(
sdepth
,
ddepth
)
:
getConvertScaleFunc
(
sdepth
,
ddepth
);
double
scale
[]
=
{
alpha
,
beta
};
int
cn
=
channels
();
CV_Assert
(
func
!=
0
);
if
(
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
data
,
src
.
step
,
0
,
0
,
dst
.
data
,
dst
.
step
,
sz
,
scale
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)(
it
.
size
*
cn
),
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
1
,
0
,
0
,
ptrs
[
1
],
1
,
sz
,
scale
);
}
}
//==================================================================================================
void
cv
::
convertFp16
(
InputArray
_src
,
OutputArray
_dst
)
{
CV_INSTRUMENT_REGION
();
int
sdepth
=
_src
.
depth
(),
ddepth
=
0
;
BinaryFunc
func
=
0
;
switch
(
sdepth
)
{
case
CV_32F
:
if
(
_dst
.
fixedType
())
{
ddepth
=
_dst
.
depth
();
CV_Assert
(
ddepth
==
CV_16S
/*|| ddepth == CV_16F*/
);
CV_Assert
(
_dst
.
channels
()
==
_src
.
channels
());
}
else
ddepth
=
CV_16S
;
func
=
(
BinaryFunc
)
cvt32f16f
;
break
;
case
CV_16S
:
//case CV_16F:
ddepth
=
CV_32F
;
func
=
(
BinaryFunc
)
cvt16f32f
;
break
;
default
:
CV_Error
(
Error
::
StsUnsupportedFormat
,
"Unsupported input depth"
);
return
;
}
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
_dst
.
isUMat
(),
ocl_convertFp16
(
_src
,
_dst
,
sdepth
,
ddepth
))
Mat
src
=
_src
.
getMat
();
int
type
=
CV_MAKETYPE
(
ddepth
,
src
.
channels
());
_dst
.
create
(
src
.
dims
,
src
.
size
,
type
);
Mat
dst
=
_dst
.
getMat
();
int
cn
=
src
.
channels
();
CV_Assert
(
func
!=
0
);
if
(
src
.
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
data
,
src
.
step
,
0
,
0
,
dst
.
data
,
dst
.
step
,
sz
,
0
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)(
it
.
size
*
cn
),
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
0
,
0
,
0
,
ptrs
[
1
],
0
,
sz
,
0
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment