Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
047abb00
Commit
047abb00
authored
Sep 26, 2014
by
Maksim Shabunin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3258 from ilya-lavrenov:neon_convert
parents
12059416
345b1369
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
530 additions
and
17 deletions
+530
-17
base.hpp
modules/core/include/opencv2/core/base.hpp
+35
-0
convert.cpp
modules/core/src/convert.cpp
+495
-17
No files found.
modules/core/include/opencv2/core/base.hpp
View file @
047abb00
...
...
@@ -568,6 +568,41 @@ CV_EXPORTS int getIppStatus();
CV_EXPORTS
String
getIppErrorLocation
();
}
// ipp
#if CV_NEON
inline
int32x2_t
cv_vrnd_s32_f32
(
float32x2_t
v
)
{
static
int32x2_t
v_sign
=
vdup_n_s32
(
1
<<
31
),
v_05
=
vreinterpret_s32_f32
(
vdup_n_f32
(
0.5
f
));
int32x2_t
v_addition
=
vorr_s32
(
v_05
,
vand_s32
(
v_sign
,
vreinterpret_s32_f32
(
v
)));
return
vcvt_s32_f32
(
vadd_f32
(
v
,
vreinterpret_f32_s32
(
v_addition
)));
}
inline
int32x4_t
cv_vrndq_s32_f32
(
float32x4_t
v
)
{
static
int32x4_t
v_sign
=
vdupq_n_s32
(
1
<<
31
),
v_05
=
vreinterpretq_s32_f32
(
vdupq_n_f32
(
0.5
f
));
int32x4_t
v_addition
=
vorrq_s32
(
v_05
,
vandq_s32
(
v_sign
,
vreinterpretq_s32_f32
(
v
)));
return
vcvtq_s32_f32
(
vaddq_f32
(
v
,
vreinterpretq_f32_s32
(
v_addition
)));
}
inline
uint32x2_t
cv_vrnd_u32_f32
(
float32x2_t
v
)
{
static
float32x2_t
v_05
=
vdup_n_f32
(
0.5
f
);
return
vcvt_u32_f32
(
vadd_f32
(
v
,
v_05
));
}
inline
uint32x4_t
cv_vrndq_u32_f32
(
float32x4_t
v
)
{
static
float32x4_t
v_05
=
vdupq_n_f32
(
0.5
f
);
return
vcvtq_u32_f32
(
vaddq_f32
(
v
,
v_05
));
}
#endif
}
// cv
#endif //__OPENCV_CORE_BASE_HPP__
modules/core/src/convert.cpp
View file @
047abb00
...
...
@@ -1276,10 +1276,10 @@ struct cvtScaleAbs_SIMD<uchar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1320,10 +1320,10 @@ struct cvtScaleAbs_SIMD<schar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1353,8 +1353,8 @@ struct cvtScaleAbs_SIMD<ushort, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1384,8 +1384,8 @@ struct cvtScaleAbs_SIMD<short, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1407,11 +1407,11 @@ struct cvtScaleAbs_SIMD<int, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
+
4
)),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -1434,11 +1434,11 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
+
4
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -1491,6 +1491,7 @@ cvtScale_( const T* src, size_t sstep,
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
int
x
=
0
;
#if CV_ENABLE_UNROLLED
for
(
;
x
<=
size
.
width
-
4
;
x
+=
4
)
{
...
...
@@ -1604,16 +1605,493 @@ cvtScale_<short, int, float>( const short* src, size_t sstep,
}
}
template
<
typename
T
,
typename
DT
>
struct
Cvt_SIMD
{
int
operator
()
(
const
T
*
,
DT
*
,
int
)
const
{
return
0
;
}
};
#if CV_NEON
// from uchar
template
<>
struct
Cvt_SIMD
<
uchar
,
schar
>
{
int
operator
()
(
const
uchar
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1_s8
(
dst
+
x
,
vqmovn_s16
(
vreinterpretq_s16_u16
(
vmovl_u8
(
vld1_u8
(
src
+
x
)))));
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
uchar
,
ushort
>
{
int
operator
()
(
const
uchar
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1q_u16
(
dst
+
x
,
vmovl_u8
(
vld1_u8
(
src
+
x
)));
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
uchar
,
short
>
{
int
operator
()
(
const
uchar
*
src
,
short
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1q_s16
(
dst
+
x
,
vreinterpretq_s16_u16
(
vmovl_u8
(
vld1_u8
(
src
+
x
))));
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
uchar
,
int
>
{
int
operator
()
(
const
uchar
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vmovl_u8
(
vld1_u8
(
src
+
x
));
vst1q_s32
(
dst
+
x
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_s32
(
dst
+
x
+
4
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
uchar
,
float
>
{
int
operator
()
(
const
uchar
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vmovl_u8
(
vld1_u8
(
src
+
x
));
vst1q_f32
(
dst
+
x
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
// from schar
template
<>
struct
Cvt_SIMD
<
schar
,
uchar
>
{
int
operator
()
(
const
schar
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1_u8
(
dst
+
x
,
vqmovun_s16
(
vmovl_s8
(
vld1_s8
(
src
+
x
))));
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
schar
,
short
>
{
int
operator
()
(
const
schar
*
src
,
short
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1q_s16
(
dst
+
x
,
vmovl_s8
(
vld1_s8
(
src
+
x
)));
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
schar
,
int
>
{
int
operator
()
(
const
schar
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vmovl_s8
(
vld1_s8
(
src
+
x
));
vst1q_s32
(
dst
+
x
,
vmovl_s16
(
vget_low_s16
(
v_src
)));
vst1q_s32
(
dst
+
x
+
4
,
vmovl_s16
(
vget_high_s16
(
v_src
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
schar
,
float
>
{
int
operator
()
(
const
schar
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vmovl_s8
(
vld1_s8
(
src
+
x
));
vst1q_f32
(
dst
+
x
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_low_s16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_high_s16
(
v_src
))));
}
return
x
;
}
};
// from ushort
template
<>
struct
Cvt_SIMD
<
ushort
,
uchar
>
{
int
operator
()
(
const
ushort
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
uint16x8_t
v_src1
=
vld1q_u16
(
src
+
x
),
v_src2
=
vld1q_u16
(
src
+
x
+
8
);
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_src1
),
vqmovn_u16
(
v_src2
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
ushort
,
int
>
{
int
operator
()
(
const
ushort
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vld1q_u16
(
src
+
x
);
vst1q_s32
(
dst
+
x
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_s32
(
dst
+
x
+
4
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
ushort
,
float
>
{
int
operator
()
(
const
ushort
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vld1q_u16
(
src
+
x
);
vst1q_f32
(
dst
+
x
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
// from short
template
<>
struct
Cvt_SIMD
<
short
,
uchar
>
{
int
operator
()
(
const
short
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int16x8_t
v_src1
=
vld1q_s16
(
src
+
x
),
v_src2
=
vld1q_s16
(
src
+
x
+
8
);
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovun_s16
(
v_src1
),
vqmovun_s16
(
v_src2
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
short
,
schar
>
{
int
operator
()
(
const
short
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int16x8_t
v_src1
=
vld1q_s16
(
src
+
x
),
v_src2
=
vld1q_s16
(
src
+
x
+
8
);
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
vqmovn_s16
(
v_src1
),
vqmovn_s16
(
v_src2
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
short
,
ushort
>
{
int
operator
()
(
const
short
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
uint16x4_t
v_dst1
=
vqmovun_s32
(
vmovl_s16
(
vget_low_s16
(
v_src
)));
uint16x4_t
v_dst2
=
vqmovun_s32
(
vmovl_s16
(
vget_high_s16
(
v_src
)));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
short
,
int
>
{
int
operator
()
(
const
short
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
vst1q_s32
(
dst
+
x
,
vmovl_s16
(
vget_low_s16
(
v_src
)));
vst1q_s32
(
dst
+
x
+
4
,
vmovl_s16
(
vget_high_s16
(
v_src
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
short
,
float
>
{
int
operator
()
(
const
short
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
vst1q_f32
(
dst
+
x
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_low_s16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_high_s16
(
v_src
))));
}
return
x
;
}
};
// from int
template
<>
struct
Cvt_SIMD
<
int
,
uchar
>
{
int
operator
()
(
const
int
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
int32x4_t
v_src3
=
vld1q_s32
(
src
+
x
+
8
),
v_src4
=
vld1q_s32
(
src
+
x
+
12
);
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmovun_s32
(
v_src1
),
vqmovun_s32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmovun_s32
(
v_src3
),
vqmovun_s32
(
v_src4
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
int
,
schar
>
{
int
operator
()
(
const
int
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
int32x4_t
v_src3
=
vld1q_s32
(
src
+
x
+
8
),
v_src4
=
vld1q_s32
(
src
+
x
+
12
);
int8x8_t
v_dst1
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
int8x8_t
v_dst2
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src3
),
vqmovn_s32
(
v_src4
)));
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
int
,
ushort
>
{
int
operator
()
(
const
int
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmovun_s32
(
v_src1
),
vqmovun_s32
(
v_src2
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
int
,
short
>
{
int
operator
()
(
const
int
*
src
,
short
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
vst1q_s16
(
dst
+
x
,
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
int
,
float
>
{
int
operator
()
(
const
int
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
4
;
x
+=
4
)
vst1q_f32
(
dst
+
x
,
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)));
return
x
;
}
};
// from float
template
<>
struct
Cvt_SIMD
<
float
,
uchar
>
{
int
operator
()
(
const
float
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
uint32x4_t
v_src1
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
4
));
uint32x4_t
v_src3
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
8
));
uint32x4_t
v_src4
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
12
));
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmovn_u32
(
v_src1
),
vqmovn_u32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmovn_u32
(
v_src3
),
vqmovn_u32
(
v_src4
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
float
,
schar
>
{
int
operator
()
(
const
float
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
+
12
));
int8x8_t
v_dst1
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
int8x8_t
v_dst2
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src3
),
vqmovn_s32
(
v_src4
)));
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
float
,
ushort
>
{
int
operator
()
(
const
float
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint32x4_t
v_src1
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
4
));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmovn_u32
(
v_src1
),
vqmovn_u32
(
v_src2
)));
}
return
x
;
}
};
template
<>
struct
Cvt_SIMD
<
float
,
int
>
{
int
operator
()
(
const
float
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
4
;
x
+=
4
)
vst1q_s32
(
dst
+
x
,
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
)));
return
x
;
}
};
#endif
template
<
typename
T
,
typename
DT
>
static
void
cvt_
(
const
T
*
src
,
size_t
sstep
,
DT
*
dst
,
size_t
dstep
,
Size
size
)
{
sstep
/=
sizeof
(
src
[
0
]);
dstep
/=
sizeof
(
dst
[
0
]);
Cvt_SIMD
<
T
,
DT
>
vop
;
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
int
x
=
0
;
int
x
=
vop
(
src
,
dst
,
size
.
width
)
;
#if CV_ENABLE_UNROLLED
for
(
;
x
<=
size
.
width
-
4
;
x
+=
4
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment