Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
345b1369
Commit
345b1369
authored
Sep 25, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
correct neon rounding
parent
4b3f2c19
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
65 additions
and
30 deletions
+65
-30
base.hpp
modules/core/include/opencv2/core/base.hpp
+35
-0
convert.cpp
modules/core/src/convert.cpp
+30
-30
No files found.
modules/core/include/opencv2/core/base.hpp
View file @
345b1369
...
...
@@ -568,6 +568,41 @@ CV_EXPORTS int getIppStatus();
CV_EXPORTS
String
getIppErrorLocation
();
}
// ipp
#if CV_NEON
inline
int32x2_t
cv_vrnd_s32_f32
(
float32x2_t
v
)
{
static
int32x2_t
v_sign
=
vdup_n_s32
(
1
<<
31
),
v_05
=
vreinterpret_s32_f32
(
vdup_n_f32
(
0.5
f
));
int32x2_t
v_addition
=
vorr_s32
(
v_05
,
vand_s32
(
v_sign
,
vreinterpret_s32_f32
(
v
)));
return
vcvt_s32_f32
(
vadd_f32
(
v
,
vreinterpret_f32_s32
(
v_addition
)));
}
inline
int32x4_t
cv_vrndq_s32_f32
(
float32x4_t
v
)
{
static
int32x4_t
v_sign
=
vdupq_n_s32
(
1
<<
31
),
v_05
=
vreinterpretq_s32_f32
(
vdupq_n_f32
(
0.5
f
));
int32x4_t
v_addition
=
vorrq_s32
(
v_05
,
vandq_s32
(
v_sign
,
vreinterpretq_s32_f32
(
v
)));
return
vcvtq_s32_f32
(
vaddq_f32
(
v
,
vreinterpretq_f32_s32
(
v_addition
)));
}
inline
uint32x2_t
cv_vrnd_u32_f32
(
float32x2_t
v
)
{
static
float32x2_t
v_05
=
vdup_n_f32
(
0.5
f
);
return
vcvt_u32_f32
(
vadd_f32
(
v
,
v_05
));
}
inline
uint32x4_t
cv_vrndq_u32_f32
(
float32x4_t
v
)
{
static
float32x4_t
v_05
=
vdupq_n_f32
(
0.5
f
);
return
vcvtq_u32_f32
(
vaddq_f32
(
v
,
v_05
));
}
#endif
}
// cv
#endif //__OPENCV_CORE_BASE_HPP__
modules/core/src/convert.cpp
View file @
345b1369
...
...
@@ -1276,10 +1276,10 @@ struct cvtScaleAbs_SIMD<uchar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1320,10 +1320,10 @@ struct cvtScaleAbs_SIMD<schar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1353,8 +1353,8 @@ struct cvtScaleAbs_SIMD<ushort, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1384,8 +1384,8 @@ struct cvtScaleAbs_SIMD<short, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1407,11 +1407,11 @@ struct cvtScaleAbs_SIMD<int, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
+
4
)),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -1434,11 +1434,11 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
+
4
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -2011,12 +2011,12 @@ struct Cvt_SIMD<float, uchar>
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
12
));
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmov
un_s32
(
v_src1
),
vqmovun_s
32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmov
un_s32
(
v_src3
),
vqmovun_s
32
(
v_src4
)));
uint32x4_t
v_src1
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
uint32x4_t
v_src3
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
8
));
uint32x4_t
v_src4
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
12
));
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmov
n_u32
(
v_src1
),
vqmovn_u
32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmov
n_u32
(
v_src3
),
vqmovn_u
32
(
v_src4
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
v_dst1
,
v_dst2
));
}
...
...
@@ -2033,10 +2033,10 @@ struct Cvt_SIMD<float, schar>
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
+
12
));
int32x4_t
v_src1
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
+
12
));
int8x8_t
v_dst1
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
int8x8_t
v_dst2
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src3
),
vqmovn_s32
(
v_src4
)));
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
v_dst1
,
v_dst2
));
...
...
@@ -2056,9 +2056,9 @@ struct Cvt_SIMD<float, ushort>
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int32x4_t
v_src1
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmov
un_s32
(
v_src1
),
vqmovun_s
32
(
v_src2
)));
uint32x4_t
v_src1
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmov
n_u32
(
v_src1
),
vqmovn_u
32
(
v_src2
)));
}
return
x
;
...
...
@@ -2073,7 +2073,7 @@ struct Cvt_SIMD<float, int>
int
x
=
0
;
for
(
;
x
<=
width
-
4
;
x
+=
4
)
vst1q_s32
(
dst
+
x
,
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
)));
vst1q_s32
(
dst
+
x
,
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
)));
return
x
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment