Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1e667de1
Commit
1e667de1
authored
May 24, 2016
by
Maksim Shabunin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
HAL math interfaces: fastAtan2, magnitude, sqrt, invSqrt, log, exp
parent
f6fa1cee
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
411 additions
and
315 deletions
+411
-315
hal.hpp
modules/core/include/opencv2/core/hal/hal.hpp
+3
-1
intrin.hpp
modules/core/include/opencv2/core/hal/intrin.hpp
+94
-0
perf_math.cpp
modules/core/perf/perf_math.cpp
+14
-0
hal_replacement.hpp
modules/core/src/hal_replacement.hpp
+104
-0
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+6
-136
mathfuncs_core.cpp
modules/core/src/mathfuncs_core.cpp
+171
-84
test_intrin.cpp
modules/core/test/test_intrin.cpp
+11
-11
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+0
-76
test_math.cpp
modules/core/test/test_math.cpp
+7
-6
AKAZEFeatures.cpp
modules/features2d/src/kaze/AKAZEFeatures.cpp
+1
-1
No files found.
modules/core/include/opencv2/core/hal/hal.hpp
View file @
1e667de1
...
...
@@ -85,7 +85,8 @@ CV_EXPORTS void exp64f(const double* src, double* dst, int n);
CV_EXPORTS
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
CV_EXPORTS
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
CV_EXPORTS
void
fastAtan2
(
const
float
*
y
,
const
float
*
x
,
float
*
dst
,
int
n
,
bool
angleInDegrees
);
CV_EXPORTS
void
fastAtan32f
(
const
float
*
y
,
const
float
*
x
,
float
*
dst
,
int
n
,
bool
angleInDegrees
);
CV_EXPORTS
void
fastAtan64f
(
const
double
*
y
,
const
double
*
x
,
double
*
dst
,
int
n
,
bool
angleInDegrees
);
CV_EXPORTS
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
);
CV_EXPORTS
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
);
CV_EXPORTS
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
...
...
@@ -228,6 +229,7 @@ CV_EXPORTS void exp(const double* src, double* dst, int n);
CV_EXPORTS
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
);
CV_EXPORTS
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
);
CV_EXPORTS
void
fastAtan2
(
const
float
*
y
,
const
float
*
x
,
float
*
dst
,
int
n
,
bool
angleInDegrees
);
CV_EXPORTS
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
);
CV_EXPORTS
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
);
CV_EXPORTS
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
);
...
...
modules/core/include/opencv2/core/hal/intrin.hpp
View file @
1e667de1
...
...
@@ -317,4 +317,98 @@ template <typename T> struct V_SIMD128Traits
//! @}
//==================================================================================================
//! @cond IGNORED
namespace
cv
{
template
<
typename
R
>
struct
V_RegTrait128
;
template
<>
struct
V_RegTrait128
<
uchar
>
{
typedef
v_uint8x16
reg
;
typedef
v_uint16x8
w_reg
;
typedef
v_uint32x4
q_reg
;
typedef
v_uint8x16
u_reg
;
static
v_uint8x16
zero
()
{
return
v_setzero_u8
();
}
static
v_uint8x16
all
(
uchar
val
)
{
return
v_setall_u8
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
schar
>
{
typedef
v_int8x16
reg
;
typedef
v_int16x8
w_reg
;
typedef
v_int32x4
q_reg
;
typedef
v_uint8x16
u_reg
;
static
v_int8x16
zero
()
{
return
v_setzero_s8
();
}
static
v_int8x16
all
(
schar
val
)
{
return
v_setall_s8
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
ushort
>
{
typedef
v_uint16x8
reg
;
typedef
v_uint32x4
w_reg
;
typedef
v_int16x8
int_reg
;
typedef
v_uint16x8
u_reg
;
static
v_uint16x8
zero
()
{
return
v_setzero_u16
();
}
static
v_uint16x8
all
(
ushort
val
)
{
return
v_setall_u16
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
short
>
{
typedef
v_int16x8
reg
;
typedef
v_int32x4
w_reg
;
typedef
v_uint16x8
u_reg
;
static
v_int16x8
zero
()
{
return
v_setzero_s16
();
}
static
v_int16x8
all
(
short
val
)
{
return
v_setall_s16
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
unsigned
>
{
typedef
v_uint32x4
reg
;
typedef
v_uint64x2
w_reg
;
typedef
v_int32x4
int_reg
;
typedef
v_uint32x4
u_reg
;
static
v_uint32x4
zero
()
{
return
v_setzero_u32
();
}
static
v_uint32x4
all
(
unsigned
val
)
{
return
v_setall_u32
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
int
>
{
typedef
v_int32x4
reg
;
typedef
v_int64x2
w_reg
;
typedef
v_uint32x4
u_reg
;
static
v_int32x4
zero
()
{
return
v_setzero_s32
();
}
static
v_int32x4
all
(
int
val
)
{
return
v_setall_s32
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
uint64
>
{
typedef
v_uint64x2
reg
;
static
v_uint64x2
zero
()
{
return
v_setzero_u64
();
}
static
v_uint64x2
all
(
uint64
val
)
{
return
v_setall_u64
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
int64
>
{
typedef
v_int64x2
reg
;
static
v_int64x2
zero
()
{
return
v_setzero_s64
();
}
static
v_int64x2
all
(
int64
val
)
{
return
v_setall_s64
(
val
);
}
};
template
<>
struct
V_RegTrait128
<
float
>
{
typedef
v_float32x4
reg
;
typedef
v_int32x4
int_reg
;
typedef
v_float32x4
u_reg
;
static
v_float32x4
zero
()
{
return
v_setzero_f32
();
}
static
v_float32x4
all
(
float
val
)
{
return
v_setall_f32
(
val
);
}
};
#if CV_SIMD128_64F
template
<>
struct
V_RegTrait128
<
double
>
{
typedef
v_float64x2
reg
;
typedef
v_int32x4
int_reg
;
typedef
v_float64x2
u_reg
;
static
v_float64x2
zero
()
{
return
v_setzero_f64
();
}
static
v_float64x2
all
(
double
val
)
{
return
v_setall_f64
(
val
);
}
};
#endif
}
// cv::
//! @endcond
#endif
modules/core/perf/perf_math.cpp
View file @
1e667de1
...
...
@@ -25,6 +25,20 @@ PERF_TEST_P(VectorLength, phase32f, testing::Values(128, 1000, 128*1024, 512*102
SANITY_CHECK
(
angle
,
5e-5
);
}
PERF_TEST_P
(
VectorLength
,
phase64f
,
testing
::
Values
(
128
,
1000
,
128
*
1024
,
512
*
1024
,
1024
*
1024
))
{
size_t
length
=
GetParam
();
vector
<
double
>
X
(
length
);
vector
<
double
>
Y
(
length
);
vector
<
double
>
angle
(
length
);
declare
.
in
(
X
,
Y
,
WARMUP_RNG
).
out
(
angle
);
TEST_CYCLE_N
(
200
)
cv
::
phase
(
X
,
Y
,
angle
,
true
);
SANITY_CHECK
(
angle
,
5e-5
);
}
PERF_TEST_P
(
MaxDim_MaxPoints
,
kmeans
,
testing
::
Combine
(
testing
::
Values
(
16
,
32
,
64
),
testing
::
Values
(
300
,
400
,
500
)
)
)
...
...
modules/core/src/hal_replacement.hpp
View file @
1e667de1
...
...
@@ -376,6 +376,110 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int
#define cv_hal_merge64s hal_ni_merge64s
//! @endcond
/**
@param y,x source Y and X arrays
@param dst destination array
@param len length of arrays
@param angleInDegrees if set to true return angles in degrees, otherwise in radians
*/
//! @addtogroup core_hal_interface_fastAtan Atan calculation
//! @{
inline
int
hal_ni_fastAtan32f
(
const
float
*
y
,
const
float
*
x
,
float
*
dst
,
int
len
,
bool
angleInDegrees
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
inline
int
hal_ni_fastAtan64f
(
const
double
*
y
,
const
double
*
x
,
double
*
dst
,
int
len
,
bool
angleInDegrees
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
//! @}
//! @cond IGNORED
#define cv_hal_fastAtan32f hal_ni_fastAtan32f
#define cv_hal_fastAtan64f hal_ni_fastAtan64f
//! @endcond
/**
@param x,y source X and Y arrays
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_magnitude Magnitude calculation
//! @{
inline
int
hal_ni_magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
inline
int
hal_ni_magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
//! @}
//! @cond IGNORED
#define cv_hal_magnitude32f hal_ni_magnitude32f
#define cv_hal_magnitude64f hal_ni_magnitude64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_invSqrt Inverse square root calculation
//! @{
inline
int
hal_ni_invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
inline
int
hal_ni_invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
//! @}
//! @cond IGNORED
#define cv_hal_invSqrt32f hal_ni_invSqrt32f
#define cv_hal_invSqrt64f hal_ni_invSqrt64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_sqrt Square root calculation
//! @{
inline
int
hal_ni_sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
inline
int
hal_ni_sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
//! @}
//! @cond IGNORED
#define cv_hal_sqrt32f hal_ni_sqrt32f
#define cv_hal_sqrt64f hal_ni_sqrt64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_log Natural logarithm calculation
//! @{
inline
int
hal_ni_log32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
inline
int
hal_ni_log64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
//! @}
//! @cond IGNORED
#define cv_hal_log32f hal_ni_log32f
#define cv_hal_log64f hal_ni_log64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_exp Exponent calculation
//! @{
inline
int
hal_ni_exp32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
inline
int
hal_ni_exp64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
return
CV_HAL_ERROR_NOT_IMPLEMENTED
;
}
//! @}
//! @cond IGNORED
#define cv_hal_exp32f hal_ni_exp32f
#define cv_hal_exp64f hal_ni_exp64f
//! @endcond
/**
@brief Dummy structure storing DFT/DCT context
...
...
modules/core/src/mathfuncs.cpp
View file @
1e667de1
...
...
@@ -51,11 +51,6 @@ namespace cv
typedef
void
(
*
MathFunc
)(
const
void
*
src
,
void
*
dst
,
int
len
);
static
const
float
atan2_p1
=
0.9997878412794807
f
*
(
float
)(
180
/
CV_PI
);
static
const
float
atan2_p3
=
-
0.3258083974640975
f
*
(
float
)(
180
/
CV_PI
);
static
const
float
atan2_p5
=
0.1555786518463281
f
*
(
float
)(
180
/
CV_PI
);
static
const
float
atan2_p7
=
-
0.04432655554792128
f
*
(
float
)(
180
/
CV_PI
);
#ifdef HAVE_OPENCL
enum
{
OCL_OP_LOG
=
0
,
OCL_OP_EXP
=
1
,
OCL_OP_MAG
=
2
,
OCL_OP_PHASE_DEGREES
=
3
,
OCL_OP_PHASE_RADIANS
=
4
};
...
...
@@ -100,29 +95,6 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
#endif
float
fastAtan2
(
float
y
,
float
x
)
{
float
ax
=
std
::
abs
(
x
),
ay
=
std
::
abs
(
y
);
float
a
,
c
,
c2
;
if
(
ax
>=
ay
)
{
c
=
ay
/
(
ax
+
(
float
)
DBL_EPSILON
);
c2
=
c
*
c
;
a
=
(((
atan2_p7
*
c2
+
atan2_p5
)
*
c2
+
atan2_p3
)
*
c2
+
atan2_p1
)
*
c
;
}
else
{
c
=
ax
/
(
ay
+
(
float
)
DBL_EPSILON
);
c2
=
c
*
c
;
a
=
90.
f
-
(((
atan2_p7
*
c2
+
atan2_p5
)
*
c2
+
atan2_p3
)
*
c2
+
atan2_p1
)
*
c
;
}
if
(
x
<
0
)
a
=
180.
f
-
a
;
if
(
y
<
0
)
a
=
360.
f
-
a
;
return
a
;
}
/* ************************************************************************** *\
Fast cube root by Ken Turkowski
(http://www.worldserver.com/turk/computergraphics/papers.html)
...
...
@@ -202,7 +174,6 @@ void magnitude( InputArray src1, InputArray src2, OutputArray dst )
}
}
void
phase
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
bool
angleInDegrees
)
{
int
type
=
src1
.
type
(),
depth
=
src1
.
depth
(),
cn
=
src1
.
channels
();
...
...
@@ -218,19 +189,8 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
const
Mat
*
arrays
[]
=
{
&
X
,
&
Y
,
&
Angle
,
0
};
uchar
*
ptrs
[
3
];
NAryMatIterator
it
(
arrays
,
ptrs
);
cv
::
AutoBuffer
<
float
>
_buf
;
float
*
buf
[
2
]
=
{
0
,
0
};
int
j
,
k
,
total
=
(
int
)(
it
.
size
*
cn
),
blockSize
=
total
;
int
j
,
total
=
(
int
)(
it
.
size
*
cn
),
blockSize
=
total
;
size_t
esz1
=
X
.
elemSize1
();
if
(
depth
==
CV_64F
)
{
blockSize
=
std
::
min
(
blockSize
,
((
BLOCK_SIZE
+
cn
-
1
)
/
cn
)
*
cn
);
_buf
.
allocate
(
blockSize
*
2
);
buf
[
0
]
=
_buf
;
buf
[
1
]
=
buf
[
0
]
+
blockSize
;
}
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
j
=
0
;
j
<
total
;
j
+=
blockSize
)
...
...
@@ -240,53 +200,13 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
{
const
float
*
x
=
(
const
float
*
)
ptrs
[
0
],
*
y
=
(
const
float
*
)
ptrs
[
1
];
float
*
angle
=
(
float
*
)
ptrs
[
2
];
hal
::
fastAtan
2
(
y
,
x
,
angle
,
len
,
angleInDegrees
);
hal
::
fastAtan
32f
(
y
,
x
,
angle
,
len
,
angleInDegrees
);
}
else
{
const
double
*
x
=
(
const
double
*
)
ptrs
[
0
],
*
y
=
(
const
double
*
)
ptrs
[
1
];
double
*
angle
=
(
double
*
)
ptrs
[
2
];
k
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
+
2
)));
__m128
v_dst1
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
+
2
)));
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
}
}
#endif
for
(
;
k
<
len
;
k
++
)
{
buf
[
0
][
k
]
=
(
float
)
x
[
k
];
buf
[
1
][
k
]
=
(
float
)
y
[
k
];
}
hal
::
fastAtan2
(
buf
[
1
],
buf
[
0
],
buf
[
0
],
len
,
angleInDegrees
);
k
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
}
}
#endif
for
(
;
k
<
len
;
k
++
)
angle
[
k
]
=
buf
[
0
][
k
];
hal
::
fastAtan64f
(
y
,
x
,
angle
,
len
,
angleInDegrees
);
}
ptrs
[
0
]
+=
len
*
esz1
;
ptrs
[
1
]
+=
len
*
esz1
;
...
...
@@ -353,18 +273,9 @@ void cartToPolar( InputArray src1, InputArray src2,
const
Mat
*
arrays
[]
=
{
&
X
,
&
Y
,
&
Mag
,
&
Angle
,
0
};
uchar
*
ptrs
[
4
];
NAryMatIterator
it
(
arrays
,
ptrs
);
cv
::
AutoBuffer
<
float
>
_buf
;
float
*
buf
[
2
]
=
{
0
,
0
};
int
j
,
k
,
total
=
(
int
)(
it
.
size
*
cn
),
blockSize
=
std
::
min
(
total
,
((
BLOCK_SIZE
+
cn
-
1
)
/
cn
)
*
cn
);
int
j
,
total
=
(
int
)(
it
.
size
*
cn
),
blockSize
=
std
::
min
(
total
,
((
BLOCK_SIZE
+
cn
-
1
)
/
cn
)
*
cn
);
size_t
esz1
=
X
.
elemSize1
();
if
(
depth
==
CV_64F
)
{
_buf
.
allocate
(
blockSize
*
2
);
buf
[
0
]
=
_buf
;
buf
[
1
]
=
buf
[
0
]
+
blockSize
;
}
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
for
(
j
=
0
;
j
<
total
;
j
+=
blockSize
)
...
...
@@ -375,55 +286,14 @@ void cartToPolar( InputArray src1, InputArray src2,
const
float
*
x
=
(
const
float
*
)
ptrs
[
0
],
*
y
=
(
const
float
*
)
ptrs
[
1
];
float
*
mag
=
(
float
*
)
ptrs
[
2
],
*
angle
=
(
float
*
)
ptrs
[
3
];
hal
::
magnitude32f
(
x
,
y
,
mag
,
len
);
hal
::
fastAtan
2
(
y
,
x
,
angle
,
len
,
angleInDegrees
);
hal
::
fastAtan
32f
(
y
,
x
,
angle
,
len
,
angleInDegrees
);
}
else
{
const
double
*
x
=
(
const
double
*
)
ptrs
[
0
],
*
y
=
(
const
double
*
)
ptrs
[
1
];
double
*
angle
=
(
double
*
)
ptrs
[
3
];
hal
::
magnitude64f
(
x
,
y
,
(
double
*
)
ptrs
[
2
],
len
);
k
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_dst0
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
x
+
k
+
2
)));
__m128
v_dst1
=
_mm_movelh_ps
(
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
)),
_mm_cvtpd_ps
(
_mm_loadu_pd
(
y
+
k
+
2
)));
_mm_storeu_ps
(
buf
[
0
]
+
k
,
v_dst0
);
_mm_storeu_ps
(
buf
[
1
]
+
k
,
v_dst1
);
}
}
#endif
for
(
;
k
<
len
;
k
++
)
{
buf
[
0
][
k
]
=
(
float
)
x
[
k
];
buf
[
1
][
k
]
=
(
float
)
y
[
k
];
}
hal
::
fastAtan2
(
buf
[
1
],
buf
[
0
],
buf
[
0
],
len
,
angleInDegrees
);
k
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
{
for
(
;
k
<=
len
-
4
;
k
+=
4
)
{
__m128
v_src
=
_mm_loadu_ps
(
buf
[
0
]
+
k
);
_mm_storeu_pd
(
angle
+
k
,
_mm_cvtps_pd
(
v_src
));
_mm_storeu_pd
(
angle
+
k
+
2
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_src
),
8
))));
}
}
#endif
for
(
;
k
<
len
;
k
++
)
angle
[
k
]
=
buf
[
0
][
k
];
hal
::
fastAtan64f
(
y
,
x
,
angle
,
len
,
angleInDegrees
);
}
ptrs
[
0
]
+=
len
*
esz1
;
ptrs
[
1
]
+=
len
*
esz1
;
...
...
modules/core/src/mathfuncs_core.cpp
View file @
1e667de1
...
...
@@ -42,116 +42,188 @@
#include "precomp.hpp"
using
namespace
std
;
#undef HAVE_IPP
namespace
cv
{
namespace
hal
{
namespace
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
static
const
float
atan2_p1
=
0.9997878412794807
f
*
(
float
)(
180
/
CV_PI
);
static
const
float
atan2_p3
=
-
0.3258083974640975
f
*
(
float
)(
180
/
CV_PI
);
static
const
float
atan2_p5
=
0.1555786518463281
f
*
(
float
)(
180
/
CV_PI
);
static
const
float
atan2_p7
=
-
0.04432655554792128
f
*
(
float
)(
180
/
CV_PI
);
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
using
namespace
cv
;
#if CV_SIMD128
template
<
typename
T
>
struct
v_atan
{
int
i
=
0
;
float
scale
=
angleInDegrees
?
1
:
(
float
)(
CV_PI
/
180
);
typedef
V_RegTrait128
<
T
>
Trait
;
typedef
typename
Trait
::
reg
VT
;
// vector type
enum
{
WorkWidth
=
VT
::
nlanes
*
2
};
#ifdef HAVE_TEGRA_OPTIMIZATION
if
(
tegra
::
useTegra
()
&&
tegra
::
FastAtan2_32f
(
Y
,
X
,
angle
,
len
,
scale
))
return
;
#endif
v_atan
(
const
T
&
scale
)
:
s
(
Trait
::
all
(
scale
))
{
eps
=
Trait
::
all
(
DBL_EPSILON
);
z
=
Trait
::
zero
();
p7
=
Trait
::
all
(
atan2_p7
);
p5
=
Trait
::
all
(
atan2_p5
);
p3
=
Trait
::
all
(
atan2_p3
);
p1
=
Trait
::
all
(
atan2_p1
);
val90
=
Trait
::
all
(
90.
f
);
val180
=
Trait
::
all
(
180.
f
);
val360
=
Trait
::
all
(
360.
f
);
}
#if CV_SSE2
Cv32suf
iabsmask
;
iabsmask
.
i
=
0x7fffffff
;
__m128
eps
=
_mm_set1_ps
((
float
)
DBL_EPSILON
),
absmask
=
_mm_set1_ps
(
iabsmask
.
f
);
__m128
_90
=
_mm_set1_ps
(
90.
f
),
_180
=
_mm_set1_ps
(
180.
f
),
_360
=
_mm_set1_ps
(
360.
f
);
__m128
z
=
_mm_setzero_ps
(),
scale4
=
_mm_set1_ps
(
scale
);
__m128
p1
=
_mm_set1_ps
(
atan2_p1
),
p3
=
_mm_set1_ps
(
atan2_p3
);
__m128
p5
=
_mm_set1_ps
(
atan2_p5
),
p7
=
_mm_set1_ps
(
atan2_p7
);
inline
int
operator
()(
int
len
,
const
T
*
Y
,
const
T
*
X
,
T
*
angle
)
{
int
i
=
0
;
const
int
c
=
VT
::
nlanes
;
for
(
;
i
<=
len
-
c
*
2
;
i
+=
c
*
2
)
{
VT
x1
=
v_load
(
X
+
i
);
VT
x2
=
v_load
(
X
+
i
+
c
);
VT
y1
=
v_load
(
Y
+
i
);
VT
y2
=
v_load
(
Y
+
i
+
c
);
v_store
(
&
angle
[
i
],
s
*
one
(
x1
,
y1
));
v_store
(
&
angle
[
i
+
c
],
s
*
one
(
x2
,
y2
));
}
return
i
;
}
for
(
;
i
<=
len
-
4
;
i
+=
4
)
private
:
inline
VT
one
(
VT
&
x
,
VT
&
y
)
{
__m128
x
=
_mm_loadu_ps
(
X
+
i
),
y
=
_mm_loadu_ps
(
Y
+
i
);
__m128
ax
=
_mm_and_ps
(
x
,
absmask
),
ay
=
_mm_and_ps
(
y
,
absmask
);
__m128
mask
=
_mm_cmplt_ps
(
ax
,
ay
);
__m128
tmin
=
_mm_min_ps
(
ax
,
ay
),
tmax
=
_mm_max_ps
(
ax
,
ay
);
__m128
c
=
_mm_div_ps
(
tmin
,
_mm_add_ps
(
tmax
,
eps
));
__m128
c2
=
_mm_mul_ps
(
c
,
c
);
__m128
a
=
_mm_mul_ps
(
c2
,
p7
);
a
=
_mm_mul_ps
(
_mm_add_ps
(
a
,
p5
),
c2
);
a
=
_mm_mul_ps
(
_mm_add_ps
(
a
,
p3
),
c2
);
a
=
_mm_mul_ps
(
_mm_add_ps
(
a
,
p1
),
c
);
__m128
b
=
_mm_sub_ps
(
_90
,
a
);
a
=
_mm_xor_ps
(
a
,
_mm_and_ps
(
_mm_xor_ps
(
a
,
b
),
mask
));
b
=
_mm_sub_ps
(
_180
,
a
);
mask
=
_mm_cmplt_ps
(
x
,
z
);
a
=
_mm_xor_ps
(
a
,
_mm_and_ps
(
_mm_xor_ps
(
a
,
b
),
mask
));
b
=
_mm_sub_ps
(
_360
,
a
);
mask
=
_mm_cmplt_ps
(
y
,
z
);
a
=
_mm_xor_ps
(
a
,
_mm_and_ps
(
_mm_xor_ps
(
a
,
b
),
mask
));
a
=
_mm_mul_ps
(
a
,
scale4
);
_mm_storeu_ps
(
angle
+
i
,
a
);
VT
ax
=
v_abs
(
x
);
VT
ay
=
v_abs
(
y
);
VT
c
=
v_min
(
ax
,
ay
)
/
(
v_max
(
ax
,
ay
)
+
eps
);
VT
cc
=
c
*
c
;
VT
a
=
(((
p7
*
cc
+
p5
)
*
cc
+
p3
)
*
cc
+
p1
)
*
c
;
a
=
v_select
(
ax
>=
ay
,
a
,
val90
-
a
);
a
=
v_select
(
x
<
z
,
val180
-
a
,
a
);
a
=
v_select
(
y
<
z
,
val360
-
a
,
a
);
return
a
;
}
#elif CV_NEON
float32x4_t
eps
=
vdupq_n_f32
((
float
)
DBL_EPSILON
);
float32x4_t
_90
=
vdupq_n_f32
(
90.
f
),
_180
=
vdupq_n_f32
(
180.
f
),
_360
=
vdupq_n_f32
(
360.
f
);
float32x4_t
z
=
vdupq_n_f32
(
0.0
f
),
scale4
=
vdupq_n_f32
(
scale
);
float32x4_t
p1
=
vdupq_n_f32
(
atan2_p1
),
p3
=
vdupq_n_f32
(
atan2_p3
);
float32x4_t
p5
=
vdupq_n_f32
(
atan2_p5
),
p7
=
vdupq_n_f32
(
atan2_p7
);
for
(
;
i
<=
len
-
4
;
i
+=
4
)
private
:
VT
eps
;
VT
z
;
VT
p7
;
VT
p5
;
VT
p3
;
VT
p1
;
VT
val90
;
VT
val180
;
VT
val360
;
VT
s
;
};
#if !CV_SIMD128_64F
// emulation
template
<>
struct
v_atan
<
double
>
{
v_atan
(
double
scale
)
:
impl
(
static_cast
<
float
>
(
scale
))
{}
inline
int
operator
()(
int
len
,
const
double
*
Y
,
const
double
*
X
,
double
*
angle
)
{
int
i
=
0
;
const
int
c
=
v_atan
<
float
>::
WorkWidth
;
float
bufY
[
c
];
float
bufX
[
c
];
float
bufA
[
c
];
for
(
;
i
<=
len
-
c
;
i
+=
c
)
{
for
(
int
j
=
0
;
j
<
c
;
++
j
)
{
bufY
[
j
]
=
static_cast
<
float
>
(
Y
[
i
+
j
]);
bufX
[
j
]
=
static_cast
<
float
>
(
X
[
i
+
j
]);
}
impl
(
c
,
bufY
,
bufX
,
bufA
);
for
(
int
j
=
0
;
j
<
c
;
++
j
)
{
angle
[
i
+
j
]
=
bufA
[
j
];
}
}
return
i
;
}
private
:
v_atan
<
float
>
impl
;
};
#endif
#endif
template
<
typename
T
>
static
inline
T
atanImpl
(
T
y
,
T
x
)
{
T
ax
=
std
::
abs
(
x
),
ay
=
std
::
abs
(
y
);
T
a
,
c
,
c2
;
if
(
ax
>=
ay
)
{
float32x4_t
x
=
vld1q_f32
(
X
+
i
),
y
=
vld1q_f32
(
Y
+
i
);
float32x4_t
ax
=
vabsq_f32
(
x
),
ay
=
vabsq_f32
(
y
);
float32x4_t
tmin
=
vminq_f32
(
ax
,
ay
),
tmax
=
vmaxq_f32
(
ax
,
ay
);
float32x4_t
c
=
vmulq_f32
(
tmin
,
cv_vrecpq_f32
(
vaddq_f32
(
tmax
,
eps
)));
float32x4_t
c2
=
vmulq_f32
(
c
,
c
);
float32x4_t
a
=
vmulq_f32
(
c2
,
p7
);
a
=
vmulq_f32
(
vaddq_f32
(
a
,
p5
),
c2
);
a
=
vmulq_f32
(
vaddq_f32
(
a
,
p3
),
c2
);
a
=
vmulq_f32
(
vaddq_f32
(
a
,
p1
),
c
);
a
=
vbslq_f32
(
vcgeq_f32
(
ax
,
ay
),
a
,
vsubq_f32
(
_90
,
a
));
a
=
vbslq_f32
(
vcltq_f32
(
x
,
z
),
vsubq_f32
(
_180
,
a
),
a
);
a
=
vbslq_f32
(
vcltq_f32
(
y
,
z
),
vsubq_f32
(
_360
,
a
),
a
);
vst1q_f32
(
angle
+
i
,
vmulq_f32
(
a
,
scale4
));
c
=
ay
/
(
ax
+
static_cast
<
T
>
(
DBL_EPSILON
));
c2
=
c
*
c
;
a
=
(((
atan2_p7
*
c2
+
atan2_p5
)
*
c2
+
atan2_p3
)
*
c2
+
atan2_p1
)
*
c
;
}
else
{
c
=
ax
/
(
ay
+
static_cast
<
T
>
(
DBL_EPSILON
));
c2
=
c
*
c
;
a
=
90.
f
-
(((
atan2_p7
*
c2
+
atan2_p5
)
*
c2
+
atan2_p3
)
*
c2
+
atan2_p1
)
*
c
;
}
if
(
x
<
0
)
a
=
180.
f
-
a
;
if
(
y
<
0
)
a
=
360.
f
-
a
;
return
a
;
}
template
<
typename
T
>
static
inline
void
atanImpl
(
const
T
*
Y
,
const
T
*
X
,
T
*
angle
,
int
len
,
bool
angleInDegrees
)
{
int
i
=
0
;
T
scale
=
angleInDegrees
?
1
:
static_cast
<
T
>
(
CV_PI
/
180
);
#if CV_SIMD128
i
=
v_atan
<
T
>
(
scale
)(
len
,
Y
,
X
,
angle
);
#endif
for
(
;
i
<
len
;
i
++
)
{
float
x
=
X
[
i
],
y
=
Y
[
i
];
float
ax
=
std
::
abs
(
x
),
ay
=
std
::
abs
(
y
);
float
a
,
c
,
c2
;
if
(
ax
>=
ay
)
{
c
=
ay
/
(
ax
+
(
float
)
DBL_EPSILON
);
c2
=
c
*
c
;
a
=
(((
atan2_p7
*
c2
+
atan2_p5
)
*
c2
+
atan2_p3
)
*
c2
+
atan2_p1
)
*
c
;
}
else
{
c
=
ax
/
(
ay
+
(
float
)
DBL_EPSILON
);
c2
=
c
*
c
;
a
=
90.
f
-
(((
atan2_p7
*
c2
+
atan2_p5
)
*
c2
+
atan2_p3
)
*
c2
+
atan2_p1
)
*
c
;
}
if
(
x
<
0
)
a
=
180.
f
-
a
;
if
(
y
<
0
)
a
=
360.
f
-
a
;
angle
[
i
]
=
(
float
)(
a
*
scale
);
angle
[
i
]
=
atanImpl
<
T
>
(
Y
[
i
],
X
[
i
])
*
scale
;
}
}
}
// anonymous::
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
float
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
double
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
// deprecated
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
)
{
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
#if defined HAVE_IPP
CV_IPP_CHECK
()
{
...
...
@@ -188,6 +260,7 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
)
{
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
#if defined(HAVE_IPP)
CV_IPP_CHECK
()
{
...
...
@@ -225,6 +298,7 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
#if defined(HAVE_IPP)
CV_IPP_CHECK
()
{
...
...
@@ -256,6 +330,7 @@ void invSqrt32f(const float* src, float* dst, int len)
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
int
i
=
0
;
#if CV_SSE2
...
...
@@ -271,6 +346,7 @@ void invSqrt64f(const double* src, double* dst, int len)
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
#if defined(HAVE_IPP)
CV_IPP_CHECK
()
{
...
...
@@ -302,6 +378,7 @@ void sqrt32f(const float* src, float* dst, int len)
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
#if defined(HAVE_IPP)
CV_IPP_CHECK
()
{
...
...
@@ -433,6 +510,7 @@ static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) <
void
exp32f
(
const
float
*
_x
,
float
*
y
,
int
n
)
{
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
_x
,
y
,
n
);
static
const
float
A4
=
(
float
)(
1.000000000000002438532970795181890933776
/
EXPPOLY_32F_A0
),
A3
=
(
float
)(
.6931471805521448196800669615864773144641
/
EXPPOLY_32F_A0
),
...
...
@@ -632,6 +710,7 @@ void exp32f( const float *_x, float *y, int n )
void
exp64f
(
const
double
*
_x
,
double
*
y
,
int
n
)
{
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
_x
,
y
,
n
);
static
const
double
A5
=
.99999999999999999998285227504999
/
EXPPOLY_32F_A0
,
A4
=
.69314718055994546743029643825322
/
EXPPOLY_32F_A0
,
...
...
@@ -1076,6 +1155,7 @@ static const double ln_2 = 0.69314718055994530941723212145818;
void
log32f
(
const
float
*
_x
,
float
*
y
,
int
n
)
{
CALL_HAL
(
log32f
,
cv_hal_log32f
,
_x
,
y
,
n
);
static
const
float
shift
[]
=
{
0
,
-
1.
f
/
512
};
static
const
float
A0
=
0.3333333333333333333333333
f
,
...
...
@@ -1220,6 +1300,7 @@ void log32f( const float *_x, float *y, int n )
void
log64f
(
const
double
*
x
,
double
*
y
,
int
n
)
{
CALL_HAL
(
log64f
,
cv_hal_log64f
,
x
,
y
,
n
);
static
const
double
shift
[]
=
{
0
,
-
1.
/
512
};
static
const
double
A7
=
1.0
,
...
...
@@ -1457,4 +1538,10 @@ void invSqrt(const double* src, double* dst, int len)
}
}}
// cv::hal::
}
// cv::hal::
}
// cv::
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
return
atanImpl
<
float
>
(
y
,
x
);
}
modules/core/test/test_intrin.cpp
View file @
1e667de1
...
...
@@ -69,8 +69,8 @@ template<typename R> struct TheTest
EXPECT_EQ
(
d
,
res
);
// zero, all
Data
<
R
>
resZ
=
RegTrait
<
R
>::
zero
();
Data
<
R
>
resV
=
RegTrait
<
R
>::
all
(
8
);
Data
<
R
>
resZ
=
V_RegTrait128
<
LaneType
>::
zero
();
Data
<
R
>
resV
=
V_RegTrait128
<
LaneType
>::
all
(
8
);
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
{
EXPECT_EQ
((
LaneType
)
0
,
resZ
[
i
]);
...
...
@@ -135,7 +135,7 @@ template<typename R> struct TheTest
// v_expand and v_load_expand
TheTest
&
test_expand
()
{
typedef
typename
RegTrait
<
R
>::
w_reg
Rx2
;
typedef
typename
V_RegTrait128
<
LaneType
>::
w_reg
Rx2
;
Data
<
R
>
dataA
;
R
a
=
dataA
;
...
...
@@ -158,7 +158,7 @@ template<typename R> struct TheTest
TheTest
&
test_expand_q
()
{
typedef
typename
RegTrait
<
R
>::
q_reg
Rx4
;
typedef
typename
V_RegTrait128
<
LaneType
>::
q_reg
Rx4
;
Data
<
R
>
data
;
Data
<
Rx4
>
out
=
v_load_expand_q
(
data
.
d
);
const
int
n
=
Rx4
::
nlanes
;
...
...
@@ -232,7 +232,7 @@ template<typename R> struct TheTest
TheTest
&
test_mul_expand
()
{
typedef
typename
RegTrait
<
R
>::
w_reg
Rx2
;
typedef
typename
V_RegTrait128
<
LaneType
>::
w_reg
Rx2
;
Data
<
R
>
dataA
,
dataB
(
2
);
R
a
=
dataA
,
b
=
dataB
;
Rx2
c
,
d
;
...
...
@@ -295,7 +295,7 @@ template<typename R> struct TheTest
TheTest
&
test_dot_prod
()
{
typedef
typename
RegTrait
<
R
>::
w_reg
Rx2
;
typedef
typename
V_RegTrait128
<
LaneType
>::
w_reg
Rx2
;
Data
<
R
>
dataA
,
dataB
(
2
);
R
a
=
dataA
,
b
=
dataB
;
...
...
@@ -361,7 +361,7 @@ template<typename R> struct TheTest
TheTest
&
test_absdiff
()
{
typedef
typename
RegTrait
<
R
>::
u_reg
Ru
;
typedef
typename
V_RegTrait128
<
LaneType
>::
u_reg
Ru
;
typedef
typename
Ru
::
lane_type
u_type
;
Data
<
R
>
dataA
(
std
::
numeric_limits
<
LaneType
>::
max
()),
dataB
(
std
::
numeric_limits
<
LaneType
>::
min
());
...
...
@@ -445,7 +445,7 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_pack
()
{
typedef
typename
RegTrait
<
R
>::
w_reg
Rx2
;
typedef
typename
V_RegTrait128
<
LaneType
>::
w_reg
Rx2
;
typedef
typename
Rx2
::
lane_type
w_type
;
Data
<
Rx2
>
dataA
,
dataB
;
dataA
+=
std
::
numeric_limits
<
LaneType
>::
is_signed
?
-
10
:
10
;
...
...
@@ -480,8 +480,8 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_pack_u
()
{
typedef
typename
RegTrait
<
R
>::
w_reg
Rx2
;
typedef
typename
RegTrait
<
Rx2
>::
int_reg
Ri2
;
typedef
typename
V_TypeTraits
<
LaneType
>::
w_type
LaneType_w
;
typedef
typename
V_RegTrait128
<
LaneType_w
>::
int_reg
Ri2
;
typedef
typename
Ri2
::
lane_type
w_type
;
Data
<
Ri2
>
dataA
,
dataB
;
...
...
@@ -572,7 +572,7 @@ template<typename R> struct TheTest
TheTest
&
test_float_math
()
{
typedef
typename
RegTrait
<
R
>::
int_reg
Ri
;
typedef
typename
V_RegTrait128
<
LaneType
>::
int_reg
Ri
;
Data
<
R
>
data1
,
data2
,
data3
;
data1
*=
1.1
;
data2
+=
10
;
...
...
modules/core/test/test_intrin_utils.hpp
View file @
1e667de1
...
...
@@ -155,80 +155,4 @@ template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R
return
out
;
}
//==================================================================================================
template
<
typename
R
>
struct
RegTrait
;
template
<>
struct
RegTrait
<
cv
::
v_uint8x16
>
{
typedef
cv
::
v_uint16x8
w_reg
;
typedef
cv
::
v_uint32x4
q_reg
;
typedef
cv
::
v_uint8x16
u_reg
;
static
cv
::
v_uint8x16
zero
()
{
return
cv
::
v_setzero_u8
();
}
static
cv
::
v_uint8x16
all
(
uchar
val
)
{
return
cv
::
v_setall_u8
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int8x16
>
{
typedef
cv
::
v_int16x8
w_reg
;
typedef
cv
::
v_int32x4
q_reg
;
typedef
cv
::
v_uint8x16
u_reg
;
static
cv
::
v_int8x16
zero
()
{
return
cv
::
v_setzero_s8
();
}
static
cv
::
v_int8x16
all
(
schar
val
)
{
return
cv
::
v_setall_s8
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_uint16x8
>
{
typedef
cv
::
v_uint32x4
w_reg
;
typedef
cv
::
v_int16x8
int_reg
;
typedef
cv
::
v_uint16x8
u_reg
;
static
cv
::
v_uint16x8
zero
()
{
return
cv
::
v_setzero_u16
();
}
static
cv
::
v_uint16x8
all
(
ushort
val
)
{
return
cv
::
v_setall_u16
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int16x8
>
{
typedef
cv
::
v_int32x4
w_reg
;
typedef
cv
::
v_uint16x8
u_reg
;
static
cv
::
v_int16x8
zero
()
{
return
cv
::
v_setzero_s16
();
}
static
cv
::
v_int16x8
all
(
short
val
)
{
return
cv
::
v_setall_s16
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_uint32x4
>
{
typedef
cv
::
v_uint64x2
w_reg
;
typedef
cv
::
v_int32x4
int_reg
;
typedef
cv
::
v_uint32x4
u_reg
;
static
cv
::
v_uint32x4
zero
()
{
return
cv
::
v_setzero_u32
();
}
static
cv
::
v_uint32x4
all
(
unsigned
val
)
{
return
cv
::
v_setall_u32
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int32x4
>
{
typedef
cv
::
v_int64x2
w_reg
;
typedef
cv
::
v_uint32x4
u_reg
;
static
cv
::
v_int32x4
zero
()
{
return
cv
::
v_setzero_s32
();
}
static
cv
::
v_int32x4
all
(
int
val
)
{
return
cv
::
v_setall_s32
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_uint64x2
>
{
static
cv
::
v_uint64x2
zero
()
{
return
cv
::
v_setzero_u64
();
}
static
cv
::
v_uint64x2
all
(
uint64
val
)
{
return
cv
::
v_setall_u64
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int64x2
>
{
static
cv
::
v_int64x2
zero
()
{
return
cv
::
v_setzero_s64
();
}
static
cv
::
v_int64x2
all
(
int64
val
)
{
return
cv
::
v_setall_s64
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_float32x4
>
{
typedef
cv
::
v_int32x4
int_reg
;
typedef
cv
::
v_float32x4
u_reg
;
static
cv
::
v_float32x4
zero
()
{
return
cv
::
v_setzero_f32
();
}
static
cv
::
v_float32x4
all
(
float
val
)
{
return
cv
::
v_setall_f32
(
val
);
}
};
#if CV_SIMD128_64F
template
<>
struct
RegTrait
<
cv
::
v_float64x2
>
{
typedef
cv
::
v_int32x4
int_reg
;
typedef
cv
::
v_float64x2
u_reg
;
static
cv
::
v_float64x2
zero
()
{
return
cv
::
v_setzero_f64
();
}
static
cv
::
v_float64x2
all
(
double
val
)
{
return
cv
::
v_setall_f64
(
val
);
}
};
#endif
#endif
modules/core/test/test_math.cpp
View file @
1e667de1
...
...
@@ -2411,8 +2411,9 @@ TEST(Core_SolvePoly, regression_5599)
class
Core_PhaseTest
:
public
cvtest
::
BaseTest
{
int
t
;
public
:
Core_PhaseTest
()
{}
Core_PhaseTest
(
int
t_
)
:
t
(
t_
)
{}
~
Core_PhaseTest
()
{}
protected
:
virtual
void
run
(
int
)
...
...
@@ -2421,9 +2422,9 @@ protected:
const
int
axisCount
=
8
;
const
int
dim
=
theRNG
().
uniform
(
1
,
10
);
const
float
scale
=
theRNG
().
uniform
(
1.
f
,
100.
f
);
Mat
x
(
axisCount
+
1
,
dim
,
CV_32FC1
),
y
(
axisCount
+
1
,
dim
,
CV_32FC1
);
Mat
anglesInDegrees
(
axisCount
+
1
,
dim
,
CV_32FC1
);
Mat
x
(
axisCount
+
1
,
dim
,
t
),
y
(
axisCount
+
1
,
dim
,
t
);
Mat
anglesInDegrees
(
axisCount
+
1
,
dim
,
t
);
// fill the data
x
.
row
(
0
).
setTo
(
Scalar
(
0
));
...
...
@@ -2696,8 +2697,8 @@ TEST(Core_SVD, accuracy) { Core_SVDTest test; test.safe_run(); }
TEST
(
Core_SVBkSb
,
accuracy
)
{
Core_SVBkSbTest
test
;
test
.
safe_run
();
}
TEST
(
Core_Trace
,
accuracy
)
{
Core_TraceTest
test
;
test
.
safe_run
();
}
TEST
(
Core_SolvePoly
,
accuracy
)
{
Core_SolvePolyTest
test
;
test
.
safe_run
();
}
TEST
(
Core_Phase
,
accuracy
)
{
Core_PhaseTest
test
;
test
.
safe_run
();
}
TEST
(
Core_Phase
,
accuracy
32f
)
{
Core_PhaseTest
test
(
CV_32FC1
)
;
test
.
safe_run
();
}
TEST
(
Core_Phase
,
accuracy64f
)
{
Core_PhaseTest
test
(
CV_64FC1
);
test
.
safe_run
();
}
TEST
(
Core_SVD
,
flt
)
{
...
...
modules/features2d/src/kaze/AKAZEFeatures.cpp
View file @
1e667de1
...
...
@@ -812,7 +812,7 @@ void AKAZEFeatures::Compute_Main_Orientation(KeyPoint& kpt, const std::vector<TE
}
}
}
hal
::
fastAtan
2
(
resY
,
resX
,
Ang
,
ang_size
,
false
);
hal
::
fastAtan
32f
(
resY
,
resX
,
Ang
,
ang_size
,
false
);
// Loop slides pi/3 window around feature point
for
(
ang1
=
0
;
ang1
<
(
float
)(
2.0
*
CV_PI
);
ang1
+=
0.15
f
)
{
ang2
=
(
ang1
+
(
float
)(
CV_PI
/
3.0
)
>
(
float
)(
2.0
*
CV_PI
)
?
ang1
-
(
float
)(
5.0
*
CV_PI
/
3.0
)
:
ang1
+
(
float
)(
CV_PI
/
3.0
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment