Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
790927bb
Commit
790927bb
authored
Sep 25, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #15582 from terfendail:pyrdown_oob
parents
e923712d
1c17b328
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
42 deletions
+42
-42
pyramids.cpp
modules/imgproc/src/pyramids.cpp
+42
-42
No files found.
modules/imgproc/src/pyramids.cpp
View file @
790927bb
...
...
@@ -87,13 +87,13 @@ template<typename T1, typename T2> int PyrUpVecV(T1**, T2**, int) { return 0; }
template
<>
int
PyrDownVecH
<
uchar
,
int
,
1
>
(
const
uchar
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
uchar
*
src0
=
src
,
*
src2
=
src
+
2
,
*
src4
=
src
+
3
;
const
uchar
*
src0
1
=
src
,
*
src23
=
src
+
2
,
*
src4
=
src
+
3
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_reinterpret_as_s16
(
vx_load_expand
(
src0
)),
v_1_4
)
+
v_dotprod
(
v_reinterpret_as_s16
(
vx_load_expand
(
src2
)),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_reinterpret_as_s16
(
vx_load_expand
(
src0
1
)),
v_1_4
)
+
v_dotprod
(
v_reinterpret_as_s16
(
vx_load_expand
(
src2
3
)),
v_6_4
)
+
(
v_reinterpret_as_s32
(
vx_load_expand
(
src4
))
>>
16
));
vx_cleanup
();
...
...
@@ -102,13 +102,13 @@ template<> int PyrDownVecH<uchar, int, 1>(const uchar* src, int* row, int width)
template
<>
int
PyrDownVecH
<
uchar
,
int
,
2
>
(
const
uchar
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
uchar
*
src0
=
src
,
*
src2
=
src
+
4
,
*
src4
=
src
+
6
;
const
uchar
*
src0
1
=
src
,
*
src23
=
src
+
4
,
*
src4
=
src
+
6
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vx_load_expand
(
src0
))),
v_1_4
)
+
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vx_load_expand
(
src2
))),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vx_load_expand
(
src0
1
))),
v_1_4
)
+
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vx_load_expand
(
src2
3
))),
v_6_4
)
+
(
v_reinterpret_as_s32
(
v_interleave_pairs
(
vx_load_expand
(
src4
)))
>>
16
));
vx_cleanup
();
...
...
@@ -150,13 +150,13 @@ template<> int PyrDownVecH<uchar, int, 3>(const uchar* src, int* row, int width)
template
<>
int
PyrDownVecH
<
uchar
,
int
,
4
>
(
const
uchar
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
uchar
*
src0
=
src
,
*
src2
=
src
+
8
,
*
src4
=
src
+
12
;
const
uchar
*
src0
1
=
src
,
*
src23
=
src
+
8
,
*
src4
=
src
+
12
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vx_load_expand
(
src0
))),
v_1_4
)
+
v_dotprod
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vx_load_expand
(
src2
))),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vx_load_expand
(
src0
1
))),
v_1_4
)
+
v_dotprod
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vx_load_expand
(
src2
3
))),
v_6_4
)
+
(
v_reinterpret_as_s32
(
v_interleave_quads
(
vx_load_expand
(
src4
)))
>>
16
));
vx_cleanup
();
...
...
@@ -166,13 +166,13 @@ template<> int PyrDownVecH<uchar, int, 4>(const uchar* src, int* row, int width)
template
<>
int
PyrDownVecH
<
short
,
int
,
1
>
(
const
short
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
short
*
src0
=
src
,
*
src2
=
src
+
2
,
*
src4
=
src
+
3
;
const
short
*
src0
1
=
src
,
*
src23
=
src
+
2
,
*
src4
=
src
+
3
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
vx_load
(
src0
),
v_1_4
)
+
v_dotprod
(
vx_load
(
src2
),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
vx_load
(
src0
1
),
v_1_4
)
+
v_dotprod
(
vx_load
(
src2
3
),
v_6_4
)
+
(
v_reinterpret_as_s32
(
vx_load
(
src4
))
>>
16
));
vx_cleanup
();
...
...
@@ -181,13 +181,13 @@ template<> int PyrDownVecH<short, int, 1>(const short* src, int* row, int width)
template
<>
int
PyrDownVecH
<
short
,
int
,
2
>
(
const
short
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
short
*
src0
=
src
,
*
src2
=
src
+
4
,
*
src4
=
src
+
6
;
const
short
*
src0
1
=
src
,
*
src23
=
src
+
4
,
*
src4
=
src
+
6
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_pairs
(
vx_load
(
src0
)),
v_1_4
)
+
v_dotprod
(
v_interleave_pairs
(
vx_load
(
src2
)),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_pairs
(
vx_load
(
src0
1
)),
v_1_4
)
+
v_dotprod
(
v_interleave_pairs
(
vx_load
(
src2
3
)),
v_6_4
)
+
(
v_reinterpret_as_s32
(
v_interleave_pairs
(
vx_load
(
src4
)))
>>
16
));
vx_cleanup
();
...
...
@@ -247,15 +247,15 @@ template<> int PyrDownVecH<short, int, 4>(const short* src, int* row, int width)
template
<>
int
PyrDownVecH
<
ushort
,
int
,
1
>
(
const
ushort
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
ushort
*
src0
=
src
,
*
src2
=
src
+
2
,
*
src4
=
src
+
3
;
const
ushort
*
src0
1
=
src
,
*
src23
=
src
+
2
,
*
src4
=
src
+
3
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
v_uint16
v_half
=
vx_setall_u16
(
0x8000
);
v_int32
v_half15
=
vx_setall_s32
(
0x00078000
);
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src0
),
v_half
)),
v_1_4
)
+
v_dotprod
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src2
),
v_half
)),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src0
1
),
v_half
)),
v_1_4
)
+
v_dotprod
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src2
3
),
v_half
)),
v_6_4
)
+
v_reinterpret_as_s32
(
v_reinterpret_as_u32
(
vx_load
(
src4
))
>>
16
)
+
v_half15
);
vx_cleanup
();
...
...
@@ -264,15 +264,15 @@ template<> int PyrDownVecH<ushort, int, 1>(const ushort* src, int* row, int widt
template
<>
int
PyrDownVecH
<
ushort
,
int
,
2
>
(
const
ushort
*
src
,
int
*
row
,
int
width
)
{
int
x
=
0
;
const
ushort
*
src0
=
src
,
*
src2
=
src
+
4
,
*
src4
=
src
+
6
;
const
ushort
*
src0
1
=
src
,
*
src23
=
src
+
4
,
*
src4
=
src
+
6
;
v_int16
v_1_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040001
));
v_int16
v_6_4
=
v_reinterpret_as_s16
(
vx_setall_u32
(
0x00040006
));
v_uint16
v_half
=
vx_setall_u16
(
0x8000
);
v_int32
v_half15
=
vx_setall_s32
(
0x00078000
);
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
+=
v_int16
::
nlanes
,
src2
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src0
),
v_half
))),
v_1_4
)
+
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src2
),
v_half
))),
v_6_4
)
+
for
(;
x
<=
width
-
v_int32
::
nlanes
;
x
+=
v_int32
::
nlanes
,
src0
1
+=
v_int16
::
nlanes
,
src23
+=
v_int16
::
nlanes
,
src4
+=
v_int16
::
nlanes
,
row
+=
v_int32
::
nlanes
)
v_store
(
row
,
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src0
1
),
v_half
))),
v_1_4
)
+
v_dotprod
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
v_sub_wrap
(
vx_load
(
src2
3
),
v_half
))),
v_6_4
)
+
v_reinterpret_as_s32
(
v_reinterpret_as_u32
(
v_interleave_pairs
(
vx_load
(
src4
)))
>>
16
)
+
v_half15
);
vx_cleanup
();
...
...
@@ -344,15 +344,15 @@ template<> int PyrDownVecH<ushort, int, 4>(const ushort* src, int* row, int widt
template
<>
int
PyrDownVecH
<
float
,
float
,
1
>
(
const
float
*
src
,
float
*
row
,
int
width
)
{
int
x
=
0
;
const
float
*
src0
=
src
,
*
src2
=
src
+
2
,
*
src4
=
src
+
4
;
const
float
*
src0
1
=
src
,
*
src23
=
src
+
2
,
*
src4
=
src
+
3
;
v_float32
_4
=
vx_setall_f32
(
4.
f
),
_6
=
vx_setall_f32
(
6.
f
);
for
(;
x
<=
width
-
v_float32
::
nlanes
;
x
+=
v_float32
::
nlanes
,
src0
+=
2
*
v_float32
::
nlanes
,
src2
+=
2
*
v_float32
::
nlanes
,
src4
+=
2
*
v_float32
::
nlanes
,
row
+=
v_float32
::
nlanes
)
for
(;
x
<=
width
-
v_float32
::
nlanes
;
x
+=
v_float32
::
nlanes
,
src0
1
+=
2
*
v_float32
::
nlanes
,
src23
+=
2
*
v_float32
::
nlanes
,
src4
+=
2
*
v_float32
::
nlanes
,
row
+=
v_float32
::
nlanes
)
{
v_float32
r0
,
r1
,
r2
,
r3
,
r4
,
rtmp
;
v_load_deinterleave
(
src0
,
r0
,
r1
);
v_load_deinterleave
(
src2
,
r2
,
r3
);
v_load_deinterleave
(
src4
,
r
4
,
rtmp
);
v_load_deinterleave
(
src0
1
,
r0
,
r1
);
v_load_deinterleave
(
src2
3
,
r2
,
r3
);
v_load_deinterleave
(
src4
,
r
tmp
,
r4
);
v_store
(
row
,
v_muladd
(
r2
,
_6
,
v_muladd
(
r1
+
r3
,
_4
,
r0
+
r4
)));
}
vx_cleanup
();
...
...
@@ -362,14 +362,14 @@ template<> int PyrDownVecH<float, float, 1>(const float* src, float* row, int wi
template
<>
int
PyrDownVecH
<
float
,
float
,
2
>
(
const
float
*
src
,
float
*
row
,
int
width
)
{
int
x
=
0
;
const
float
*
src0
=
src
,
*
src2
=
src
+
4
,
*
src4
=
src
+
6
;
const
float
*
src0
1
=
src
,
*
src23
=
src
+
4
,
*
src4
=
src
+
6
;
v_float32
_4
=
vx_setall_f32
(
4.
f
),
_6
=
vx_setall_f32
(
6.
f
);
for
(;
x
<=
width
-
2
*
v_float32
::
nlanes
;
x
+=
2
*
v_float32
::
nlanes
,
src0
+=
4
*
v_float32
::
nlanes
,
src2
+=
4
*
v_float32
::
nlanes
,
src4
+=
4
*
v_float32
::
nlanes
,
row
+=
2
*
v_float32
::
nlanes
)
for
(;
x
<=
width
-
2
*
v_float32
::
nlanes
;
x
+=
2
*
v_float32
::
nlanes
,
src0
1
+=
4
*
v_float32
::
nlanes
,
src23
+=
4
*
v_float32
::
nlanes
,
src4
+=
4
*
v_float32
::
nlanes
,
row
+=
2
*
v_float32
::
nlanes
)
{
v_float32
r0a
,
r0b
,
r1a
,
r1b
,
r2a
,
r2b
,
r3a
,
r3b
,
r4a
,
r4b
,
rtmpa
,
rtmpb
;
v_load_deinterleave
(
src0
,
r0a
,
r0b
,
r1a
,
r1b
);
v_load_deinterleave
(
src2
,
r2a
,
r2b
,
r3a
,
r3b
);
v_load_deinterleave
(
src0
1
,
r0a
,
r0b
,
r1a
,
r1b
);
v_load_deinterleave
(
src2
3
,
r2a
,
r2b
,
r3a
,
r3b
);
v_load_deinterleave
(
src4
,
rtmpa
,
rtmpb
,
r4a
,
r4b
);
v_store_interleave
(
row
,
v_muladd
(
r2a
,
_6
,
v_muladd
(
r1a
+
r3a
,
_4
,
r0a
+
r4a
)),
v_muladd
(
r2b
,
_6
,
v_muladd
(
r1b
+
r3b
,
_4
,
r0b
+
r4b
)));
}
...
...
@@ -430,15 +430,15 @@ template<> int PyrDownVecH<float, float, 4>(const float* src, float* row, int wi
template
<>
int
PyrDownVecH
<
double
,
double
,
1
>
(
const
double
*
src
,
double
*
row
,
int
width
)
{
int
x
=
0
;
const
double
*
src0
=
src
,
*
src2
=
src
+
2
,
*
src4
=
src
+
4
;
const
double
*
src0
1
=
src
,
*
src23
=
src
+
2
,
*
src4
=
src
+
3
;
v_float64
_4
=
vx_setall_f64
(
4.
f
),
_6
=
vx_setall_f64
(
6.
f
);
for
(;
x
<=
width
-
v_float64
::
nlanes
;
x
+=
v_float64
::
nlanes
,
src0
+=
2
*
v_float64
::
nlanes
,
src2
+=
2
*
v_float64
::
nlanes
,
src4
+=
2
*
v_float64
::
nlanes
,
row
+=
v_float64
::
nlanes
)
for
(;
x
<=
width
-
v_float64
::
nlanes
;
x
+=
v_float64
::
nlanes
,
src0
1
+=
2
*
v_float64
::
nlanes
,
src23
+=
2
*
v_float64
::
nlanes
,
src4
+=
2
*
v_float64
::
nlanes
,
row
+=
v_float64
::
nlanes
)
{
v_float64
r0
,
r1
,
r2
,
r3
,
r4
,
rtmp
;
v_load_deinterleave
(
src0
,
r0
,
r1
);
v_load_deinterleave
(
src2
,
r2
,
r3
);
v_load_deinterleave
(
src4
,
r
4
,
rtmp
);
v_load_deinterleave
(
src0
1
,
r0
,
r1
);
v_load_deinterleave
(
src2
3
,
r2
,
r3
);
v_load_deinterleave
(
src4
,
r
tmp
,
r4
);
v_store
(
row
,
v_muladd
(
r2
,
_6
,
v_muladd
(
r1
+
r3
,
_4
,
r0
+
r4
)));
}
vx_cleanup
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment