Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e00d0528
Commit
e00d0528
authored
8 years ago
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #8397 from woodychow:initUndistortRectifyMap_avx2
parents
b683e682
05476d66
master
4.3.0
4.2.0
4.1.2
4.1.2-openvino
4.1.1
4.1.1-openvino
4.1.0
4.1.0-openvino
4.0.1
4.0.1-openvino
4.0.0
4.0.0-rc
4.0.0-openvino
4.0.0-beta
4.0.0-alpha
3.4.10
3.4.9
3.4.8
3.4.7
3.4.6
3.4.5
3.4.4
3.4.3
3.4.3-openvino
3.4.2
3.4.2-openvino
3.4.1
3.4.1-cvsdk
3.4.0
3.4.0-rc
3.3.1
3.3.1-cvsdk
3.3.0
3.3.0-rc
3.3.0-cvsdk
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
148 additions
and
1 deletion
+148
-1
undistort.cpp
modules/imgproc/src/undistort.cpp
+148
-1
No files found.
modules/imgproc/src/undistort.cpp
View file @
e00d0528
...
...
@@ -136,6 +136,10 @@ void cv::initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoef
cv
::
Matx33d
matTilt
=
cv
::
Matx33d
::
eye
();
cv
::
detail
::
computeTiltProjectionMatrix
(
tauX
,
tauY
,
&
matTilt
);
#if CV_AVX2
bool
USE_AVX2
=
cv
::
checkHardwareSupport
(
CV_CPU_AVX2
);
#endif
for
(
int
i
=
0
;
i
<
size
.
height
;
i
++
)
{
float
*
m1f
=
map1
.
ptr
<
float
>
(
i
);
...
...
@@ -144,7 +148,150 @@ void cv::initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoef
ushort
*
m2
=
(
ushort
*
)
m2f
;
double
_x
=
i
*
ir
[
1
]
+
ir
[
2
],
_y
=
i
*
ir
[
4
]
+
ir
[
5
],
_w
=
i
*
ir
[
7
]
+
ir
[
8
];
for
(
int
j
=
0
;
j
<
size
.
width
;
j
++
,
_x
+=
ir
[
0
],
_y
+=
ir
[
3
],
_w
+=
ir
[
6
]
)
int
j
=
0
;
#if CV_AVX2
if
(
USE_AVX2
)
{
static
const
__m256d
__one
=
_mm256_set1_pd
(
1.0
);
static
const
__m256d
__two
=
_mm256_set1_pd
(
2.0
);
const
__m256d
__matTilt_00
=
_mm256_set1_pd
(
matTilt
(
0
,
0
));
const
__m256d
__matTilt_10
=
_mm256_set1_pd
(
matTilt
(
1
,
0
));
const
__m256d
__matTilt_20
=
_mm256_set1_pd
(
matTilt
(
2
,
0
));
const
__m256d
__matTilt_01
=
_mm256_set1_pd
(
matTilt
(
0
,
1
));
const
__m256d
__matTilt_11
=
_mm256_set1_pd
(
matTilt
(
1
,
1
));
const
__m256d
__matTilt_21
=
_mm256_set1_pd
(
matTilt
(
2
,
1
));
const
__m256d
__matTilt_02
=
_mm256_set1_pd
(
matTilt
(
0
,
2
));
const
__m256d
__matTilt_12
=
_mm256_set1_pd
(
matTilt
(
1
,
2
));
const
__m256d
__matTilt_22
=
_mm256_set1_pd
(
matTilt
(
2
,
2
));
for
(
;
j
<=
size
.
width
-
4
;
j
+=
4
,
_x
+=
4
*
ir
[
0
],
_y
+=
4
*
ir
[
3
],
_w
+=
4
*
ir
[
6
]
)
{
// Question: Should we load the constants first?
__m256d
__w
=
_mm256_div_pd
(
__one
,
_mm256_set_pd
(
_w
+
3
*
ir
[
6
],
_w
+
2
*
ir
[
6
],
_w
+
ir
[
6
],
_w
));
__m256d
__x
=
_mm256_mul_pd
(
_mm256_set_pd
(
_x
+
3
*
ir
[
0
],
_x
+
2
*
ir
[
0
],
_x
+
ir
[
0
],
_x
),
__w
);
__m256d
__y
=
_mm256_mul_pd
(
_mm256_set_pd
(
_y
+
3
*
ir
[
3
],
_y
+
2
*
ir
[
3
],
_y
+
ir
[
3
],
_y
),
__w
);
__m256d
__x2
=
_mm256_mul_pd
(
__x
,
__x
);
__m256d
__y2
=
_mm256_mul_pd
(
__y
,
__y
);
__m256d
__r2
=
_mm256_add_pd
(
__x2
,
__y2
);
__m256d
__2xy
=
_mm256_mul_pd
(
__two
,
_mm256_mul_pd
(
__x
,
__y
));
__m256d
__kr
=
_mm256_div_pd
(
#if CV_FMA3
_mm256_fmadd_pd
(
_mm256_fmadd_pd
(
_mm256_fmadd_pd
(
_mm256_set1_pd
(
k3
),
__r2
,
_mm256_set1_pd
(
k2
)),
__r2
,
_mm256_set1_pd
(
k1
)),
__r2
,
__one
),
_mm256_fmadd_pd
(
_mm256_fmadd_pd
(
_mm256_fmadd_pd
(
_mm256_set1_pd
(
k6
),
__r2
,
_mm256_set1_pd
(
k5
)),
__r2
,
_mm256_set1_pd
(
k4
)),
__r2
,
__one
)
#else
_mm256_add_pd
(
__one
,
_mm256_mul_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
k3
),
__r2
),
_mm256_set1_pd
(
k2
)),
__r2
),
_mm256_set1_pd
(
k1
)),
__r2
)),
_mm256_add_pd
(
__one
,
_mm256_mul_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
k6
),
__r2
),
_mm256_set1_pd
(
k5
)),
__r2
),
_mm256_set1_pd
(
k4
)),
__r2
))
#endif
);
__m256d
__r22
=
_mm256_mul_pd
(
__r2
,
__r2
);
#if CV_FMA3
__m256d
__xd
=
_mm256_fmadd_pd
(
__x
,
__kr
,
_mm256_add_pd
(
_mm256_fmadd_pd
(
_mm256_set1_pd
(
p1
),
__2xy
,
_mm256_mul_pd
(
_mm256_set1_pd
(
p2
),
_mm256_fmadd_pd
(
__two
,
__x2
,
__r2
))),
_mm256_fmadd_pd
(
_mm256_set1_pd
(
s1
),
__r2
,
_mm256_mul_pd
(
_mm256_set1_pd
(
s2
),
__r22
))));
__m256d
__yd
=
_mm256_fmadd_pd
(
__y
,
__kr
,
_mm256_add_pd
(
_mm256_fmadd_pd
(
_mm256_set1_pd
(
p1
),
_mm256_fmadd_pd
(
__two
,
__y2
,
__r2
),
_mm256_mul_pd
(
_mm256_set1_pd
(
p2
),
__2xy
)),
_mm256_fmadd_pd
(
_mm256_set1_pd
(
s3
),
__r2
,
_mm256_mul_pd
(
_mm256_set1_pd
(
s4
),
__r22
))));
__m256d
__vecTilt2
=
_mm256_fmadd_pd
(
__matTilt_20
,
__xd
,
_mm256_fmadd_pd
(
__matTilt_21
,
__yd
,
__matTilt_22
));
#else
__m256d
__xd
=
_mm256_add_pd
(
_mm256_mul_pd
(
__x
,
__kr
),
_mm256_add_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
p1
),
__2xy
),
_mm256_mul_pd
(
_mm256_set1_pd
(
p2
),
_mm256_add_pd
(
__r2
,
_mm256_mul_pd
(
__two
,
__x2
)))),
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
s1
),
__r2
),
_mm256_mul_pd
(
_mm256_set1_pd
(
s2
),
__r22
))));
__m256d
__yd
=
_mm256_add_pd
(
_mm256_mul_pd
(
__y
,
__kr
),
_mm256_add_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
p1
),
_mm256_add_pd
(
__r2
,
_mm256_mul_pd
(
__two
,
__y2
))),
_mm256_mul_pd
(
_mm256_set1_pd
(
p2
),
__2xy
)),
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
s3
),
__r2
),
_mm256_mul_pd
(
_mm256_set1_pd
(
s4
),
__r22
))));
__m256d
__vecTilt2
=
_mm256_add_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
__matTilt_20
,
__xd
),
_mm256_mul_pd
(
__matTilt_21
,
__yd
)),
__matTilt_22
);
#endif
__m256d
__invProj
=
_mm256_blendv_pd
(
__one
,
_mm256_div_pd
(
__one
,
__vecTilt2
),
_mm256_cmp_pd
(
__vecTilt2
,
_mm256_setzero_pd
(),
_CMP_EQ_OQ
));
#if CV_FMA3
__m256d
__u
=
_mm256_fmadd_pd
(
__matTilt_00
,
__xd
,
_mm256_fmadd_pd
(
__matTilt_01
,
__yd
,
__matTilt_02
));
__u
=
_mm256_fmadd_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
fx
),
__invProj
),
__u
,
_mm256_set1_pd
(
u0
));
__m256d
__v
=
_mm256_fmadd_pd
(
__matTilt_10
,
__xd
,
_mm256_fmadd_pd
(
__matTilt_11
,
__yd
,
__matTilt_12
));
__v
=
_mm256_fmadd_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
fy
),
__invProj
),
__v
,
_mm256_set1_pd
(
v0
));
#else
__m256d
__u
=
_mm256_add_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
__matTilt_00
,
__xd
),
_mm256_mul_pd
(
__matTilt_01
,
__yd
)),
__matTilt_02
);
__u
=
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
fx
),
__invProj
),
__u
),
_mm256_set1_pd
(
u0
));
__m256d
__v
=
_mm256_add_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
__matTilt_10
,
__xd
),
_mm256_mul_pd
(
__matTilt_11
,
__yd
)),
__matTilt_12
);
__v
=
_mm256_add_pd
(
_mm256_mul_pd
(
_mm256_mul_pd
(
_mm256_set1_pd
(
fy
),
__invProj
),
__v
),
_mm256_set1_pd
(
v0
));
#endif
if
(
m1type
==
CV_32FC1
)
{
_mm_storeu_ps
(
&
m1f
[
j
],
_mm256_cvtpd_ps
(
__u
));
_mm_storeu_ps
(
&
m2f
[
j
],
_mm256_cvtpd_ps
(
__v
));
}
else
if
(
m1type
==
CV_32FC2
)
{
__m128
__u_float
=
_mm256_cvtpd_ps
(
__u
);
__m128
__v_float
=
_mm256_cvtpd_ps
(
__v
);
_mm_storeu_ps
(
&
m1f
[
j
*
2
],
_mm_unpacklo_ps
(
__u_float
,
__v_float
));
_mm_storeu_ps
(
&
m1f
[
j
*
2
+
4
],
_mm_unpackhi_ps
(
__u_float
,
__v_float
));
}
else
// m1type == CV_16SC2
{
__u
=
_mm256_mul_pd
(
__u
,
_mm256_set1_pd
(
INTER_TAB_SIZE
));
__v
=
_mm256_mul_pd
(
__v
,
_mm256_set1_pd
(
INTER_TAB_SIZE
));
__m128
__u_float
=
_mm256_cvtpd_ps
(
__u
);
__m128
__v_float
=
_mm256_cvtpd_ps
(
__v
);
_mm256_zeroupper
();
static
const
__m128
__int_max
=
_mm_set1_ps
(
std
::
numeric_limits
<
int
>::
max
());
static
const
__m128
__int_min
=
_mm_set1_ps
(
std
::
numeric_limits
<
int
>::
min
());
__u_float
=
_mm_max_ps
(
_mm_min_ps
(
__u_float
,
__int_max
),
__int_min
);
__v_float
=
_mm_max_ps
(
_mm_min_ps
(
__v_float
,
__int_max
),
__int_min
);
__m128i
__iu
=
_mm_cvtps_epi32
(
__u_float
);
__m128i
__iv
=
_mm_cvtps_epi32
(
__v_float
);
static
const
__m128i
__INTER_TAB_SIZE_m1
=
_mm_set1_epi32
(
INTER_TAB_SIZE
-
1
);
__m128i
__m2
=
_mm_add_epi32
(
_mm_mul_epi32
(
_mm_and_si128
(
__iv
,
__INTER_TAB_SIZE_m1
),
_mm_set1_epi32
(
INTER_TAB_SIZE
)),
_mm_and_si128
(
__iu
,
__INTER_TAB_SIZE_m1
));
__m2
=
_mm_packus_epi16
(
__m2
,
__m2
);
_mm_maskstore_epi64
((
long
long
int
*
)
&
m2
[
j
],
_mm_set_epi32
(
0
,
0
,
0xFFFFFFFF
,
0xFFFFFFFF
),
__m2
);
// gcc4.9 does not support _mm256_set_m128
// __m256i __m1 = _mm256_set_m128i(__iv, __iu);
__m256i
__m1
;
__m1
=
_mm256_inserti128_si256
(
__m1
,
__iu
,
0
);
__m1
=
_mm256_inserti128_si256
(
__m1
,
__iv
,
1
);
__m1
=
_mm256_srli_epi32
(
__m1
,
INTER_BITS
);
// v3 v2 v1 v0 u3 u2 u1 u0 (int32_t)
static
const
__m256i
__permute_mask
=
_mm256_set_epi32
(
7
,
3
,
6
,
2
,
5
,
1
,
4
,
0
);
__m1
=
_mm256_permutevar8x32_epi32
(
__m1
,
__permute_mask
);
// v3 u3 v2 u2 v1 u1 v0 u0 (int32_t)
__m1
=
_mm256_packs_epi32
(
__m1
,
__m1
);
// x x x x v3 u3 v2 u2 x x x x v1 u1 v0 u0 (int16_t)
_mm_storeu_si128
((
__m128i
*
)
&
m1
[
j
*
2
],
_mm256_extracti128_si256
(
_mm256_permute4x64_epi64
(
__m1
,
(
2
<<
2
)
+
0
),
0
));
}
}
}
#endif
for
(
;
j
<
size
.
width
;
j
++
,
_x
+=
ir
[
0
],
_y
+=
ir
[
3
],
_w
+=
ir
[
6
]
)
{
double
w
=
1.
/
_w
,
x
=
_x
*
w
,
y
=
_y
*
w
;
double
x2
=
x
*
x
,
y2
=
y
*
y
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment