Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1eb2fa9e
Commit
1eb2fa9e
authored
Dec 07, 2017
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added universal intrinsics based implementations for CV_8UC2, CV_8UC3, CV_8UC4 bit-exact resizes.
parent
dcdd6af5
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
271 additions
and
27 deletions
+271
-27
resize.cpp
modules/imgproc/src/resize.cpp
+219
-16
test_resize_bitexact.cpp
modules/imgproc/test/test_resize_bitexact.cpp
+52
-11
No files found.
modules/imgproc/src/resize.cpp
View file @
1eb2fa9e
...
...
@@ -356,7 +356,7 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 1>(uint8_t* src, int, int *o
}
for
(;
i
<
dst_max
-
7
&&
ofst
[
i
+
7
]
+
15
<=
ofst
[
dst_width
-
1
];
i
+=
8
,
m
+=
16
,
dst
+=
8
)
{
v_uint32x4
v_src01
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
1
])));
v_uint32x4
v_src01
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
1
])));
v_uint32x4
v_src23
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
2
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
3
])));
v_uint32x4
v_src45
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
4
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
5
])));
v_uint32x4
v_src67
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
6
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
ofst
[
i
+
7
])));
...
...
@@ -390,18 +390,221 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 1>(uint8_t* src, int, int *o
*
(
dst
++
)
=
src_0
;
}
}
template
<>
void
hlineResizeCn
<
uint8_t
,
ufixedpoint16
,
2
,
true
,
2
>
(
uint8_t
*
src
,
int
,
int
*
ofst
,
ufixedpoint16
*
m
,
ufixedpoint16
*
dst
,
int
dst_min
,
int
dst_max
,
int
dst_width
)
{
int
i
=
0
;
ufixedpoint16
srccn
[
8
]
=
{
src
[
0
],
src
[
1
],
src
[
0
],
src
[
1
],
src
[
0
],
src
[
1
],
src
[
0
],
src
[
1
]
};
v_uint16x8
v_srccn
=
v_load
((
uint16_t
*
)
srccn
);
for
(;
i
<
dst_min
-
3
;
i
+=
4
,
m
+=
8
,
dst
+=
8
)
// Points that fall left from src image so became equal to leftmost src point
{
v_store
((
uint16_t
*
)
dst
,
v_srccn
);
}
for
(;
i
<
dst_min
;
i
++
,
m
+=
2
)
{
*
(
dst
++
)
=
srccn
[
0
];
*
(
dst
++
)
=
srccn
[
1
];
}
for
(;
i
<
dst_max
-
3
&&
ofst
[
i
+
3
]
+
7
<=
ofst
[
dst_width
-
1
];
i
+=
4
,
m
+=
8
,
dst
+=
8
)
{
v_uint32x4
v_src0
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
2
*
ofst
[
i
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
2
*
ofst
[
i
+
1
])));
v_uint32x4
v_src1
=
v_combine_low
(
v_reinterpret_as_u32
(
v_load_expand
(
src
+
2
*
ofst
[
i
+
2
])),
v_reinterpret_as_u32
(
v_load_expand
(
src
+
2
*
ofst
[
i
+
3
])));
v_uint32x4
v_zip0
,
v_zip1
;
v_zip
(
v_src0
,
v_src1
,
v_zip0
,
v_zip1
);
v_zip
(
v_zip0
,
v_zip1
,
v_src0
,
v_src1
);
v_int16x8
v_src0123
,
v_src4567
;
v_zip
(
v_reinterpret_as_s16
(
v_src0
),
v_reinterpret_as_s16
(
v_src1
),
v_src0123
,
v_src4567
);
v_uint32x4
v_mul
=
v_load
((
uint32_t
*
)
m
);
//AaBbCcDd
v_zip
(
v_mul
,
v_mul
,
v_zip0
,
v_zip1
);
//AaAaBbBb CcCcDdDd
v_uint32x4
v_res0
=
v_reinterpret_as_u32
(
v_dotprod
(
v_src0123
,
v_reinterpret_as_s16
(
v_zip0
)));
v_uint32x4
v_res1
=
v_reinterpret_as_u32
(
v_dotprod
(
v_src4567
,
v_reinterpret_as_s16
(
v_zip1
)));
v_store
((
uint16_t
*
)
dst
,
v_pack
(
v_res0
,
v_res1
));
//AB1AB2CD1CD2
}
for
(;
i
<
dst_max
;
i
+=
1
,
m
+=
2
)
{
uint8_t
*
px
=
src
+
2
*
ofst
[
i
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
0
]
+
m
[
1
]
*
px
[
2
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
1
]
+
m
[
1
]
*
px
[
3
];
}
srccn
[
0
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
0
];
srccn
[
1
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
1
];
srccn
[
2
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
0
];
srccn
[
3
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
1
];
srccn
[
4
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
0
];
srccn
[
5
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
1
];
srccn
[
6
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
0
];
srccn
[
7
]
=
(
src
+
2
*
ofst
[
dst_width
-
1
])[
1
];
v_srccn
=
v_load
((
uint16_t
*
)
srccn
);
for
(;
i
<
dst_width
-
3
;
i
+=
4
,
dst
+=
8
)
// Points that fall left from src image so became equal to leftmost src point
{
v_store
((
uint16_t
*
)
dst
,
v_srccn
);
}
for
(;
i
<
dst_width
;
i
++
)
{
*
(
dst
++
)
=
srccn
[
0
];
*
(
dst
++
)
=
srccn
[
1
];
}
}
template
<>
void
hlineResizeCn
<
uint8_t
,
ufixedpoint16
,
2
,
true
,
3
>
(
uint8_t
*
src
,
int
,
int
*
ofst
,
ufixedpoint16
*
m
,
ufixedpoint16
*
dst
,
int
dst_min
,
int
dst_max
,
int
dst_width
)
{
int
i
=
0
;
ufixedpoint16
srccn
[
3
]
=
{
src
[
0
],
src
[
1
],
src
[
2
]
};
v_uint16x8
v_srccn0
=
v_setall_u16
(((
uint16_t
*
)
srccn
)[
0
]);
v_uint16x8
v_srccn1
=
v_setall_u16
(((
uint16_t
*
)
srccn
)[
1
]);
v_uint16x8
v_srccn2
=
v_setall_u16
(((
uint16_t
*
)
srccn
)[
2
]);
for
(;
i
<
dst_min
-
7
;
i
+=
8
,
m
+=
16
,
dst
+=
24
)
// Points that fall left from src image so became equal to leftmost src point
{
v_store_interleave
((
uint16_t
*
)
dst
,
v_srccn0
,
v_srccn1
,
v_srccn2
);
}
for
(;
i
<
dst_min
;
i
++
,
m
+=
2
)
{
*
(
dst
++
)
=
srccn
[
0
];
*
(
dst
++
)
=
srccn
[
1
];
*
(
dst
++
)
=
srccn
[
2
];
}
for
(;
i
<
dst_max
-
7
&&
ofst
[
i
+
7
]
+
15
<=
ofst
[
dst_width
-
1
];
i
+=
8
,
m
+=
16
,
dst
+=
24
)
{
v_uint8x16
v_src0_c1
,
v_src0_c2
,
v_src0_c3
;
v_load_deinterleave
(
src
+
3
*
ofst
[
i
],
v_src0_c1
,
v_src0_c2
,
v_src0_c3
);
v_uint8x16
v_src1_c1
,
v_src1_c2
,
v_src1_c3
;
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
1
],
v_src1_c1
,
v_src1_c2
,
v_src1_c3
);
v_uint8x16
v_src2_c1
,
v_src2_c2
,
v_src2_c3
;
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
2
],
v_src2_c1
,
v_src2_c2
,
v_src2_c3
);
v_uint8x16
v_src3_c1
,
v_src3_c2
,
v_src3_c3
;
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
3
],
v_src3_c1
,
v_src3_c2
,
v_src3_c3
);
v_uint16x8
v_tmp0
,
v_tmp1
,
v_tmp2
;
v_uint16x8
v_src0123_c1
,
v_src0123_c2
,
v_src0123_c3
;
v_zip
(
v_reinterpret_as_u16
(
v_src0_c1
),
v_reinterpret_as_u16
(
v_src2_c1
),
v_tmp0
,
v_tmp1
);
v_zip
(
v_reinterpret_as_u16
(
v_src1_c1
),
v_reinterpret_as_u16
(
v_src3_c1
),
v_tmp1
,
v_tmp2
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src0123_c1
,
v_tmp2
);
v_zip
(
v_reinterpret_as_u16
(
v_src0_c2
),
v_reinterpret_as_u16
(
v_src2_c2
),
v_tmp0
,
v_tmp1
);
v_zip
(
v_reinterpret_as_u16
(
v_src1_c2
),
v_reinterpret_as_u16
(
v_src3_c2
),
v_tmp1
,
v_tmp2
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src0123_c2
,
v_tmp2
);
v_zip
(
v_reinterpret_as_u16
(
v_src0_c3
),
v_reinterpret_as_u16
(
v_src2_c3
),
v_tmp0
,
v_tmp1
);
v_zip
(
v_reinterpret_as_u16
(
v_src1_c3
),
v_reinterpret_as_u16
(
v_src3_c3
),
v_tmp1
,
v_tmp2
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src0123_c3
,
v_tmp2
);
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
4
],
v_src0_c1
,
v_src0_c2
,
v_src0_c3
);
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
5
],
v_src1_c1
,
v_src1_c2
,
v_src1_c3
);
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
6
],
v_src2_c1
,
v_src2_c2
,
v_src2_c3
);
v_load_deinterleave
(
src
+
3
*
ofst
[
i
+
7
],
v_src3_c1
,
v_src3_c2
,
v_src3_c3
);
v_uint16x8
v_src4567_c1
,
v_src4567_c2
,
v_src4567_c3
;
v_zip
(
v_reinterpret_as_u16
(
v_src0_c1
),
v_reinterpret_as_u16
(
v_src2_c1
),
v_tmp0
,
v_tmp1
);
v_zip
(
v_reinterpret_as_u16
(
v_src1_c1
),
v_reinterpret_as_u16
(
v_src3_c1
),
v_tmp1
,
v_tmp2
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src4567_c1
,
v_tmp2
);
v_zip
(
v_reinterpret_as_u16
(
v_src0_c2
),
v_reinterpret_as_u16
(
v_src2_c2
),
v_tmp0
,
v_tmp1
);
v_zip
(
v_reinterpret_as_u16
(
v_src1_c2
),
v_reinterpret_as_u16
(
v_src3_c2
),
v_tmp1
,
v_tmp2
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src4567_c2
,
v_tmp2
);
v_zip
(
v_reinterpret_as_u16
(
v_src0_c3
),
v_reinterpret_as_u16
(
v_src2_c3
),
v_tmp0
,
v_tmp1
);
v_zip
(
v_reinterpret_as_u16
(
v_src1_c3
),
v_reinterpret_as_u16
(
v_src3_c3
),
v_tmp1
,
v_tmp2
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src4567_c3
,
v_tmp2
);
v_expand
(
v_reinterpret_as_u8
(
v_combine_low
(
v_src0123_c1
,
v_src4567_c1
)),
v_src0123_c1
,
v_src4567_c1
);
v_expand
(
v_reinterpret_as_u8
(
v_combine_low
(
v_src0123_c2
,
v_src4567_c2
)),
v_src0123_c2
,
v_src4567_c2
);
v_expand
(
v_reinterpret_as_u8
(
v_combine_low
(
v_src0123_c3
,
v_src4567_c3
)),
v_src0123_c3
,
v_src4567_c3
);
v_int16x8
v_mul0123
=
v_load
((
int16_t
*
)
m
);
v_int16x8
v_mul4567
=
v_load
((
int16_t
*
)
m
+
8
);
v_uint32x4
v_res0123_c1
=
v_reinterpret_as_u32
(
v_dotprod
(
v_reinterpret_as_s16
(
v_src0123_c1
),
v_mul0123
));
v_uint32x4
v_res0123_c2
=
v_reinterpret_as_u32
(
v_dotprod
(
v_reinterpret_as_s16
(
v_src0123_c2
),
v_mul0123
));
v_uint32x4
v_res0123_c3
=
v_reinterpret_as_u32
(
v_dotprod
(
v_reinterpret_as_s16
(
v_src0123_c3
),
v_mul0123
));
v_uint32x4
v_res4567_c1
=
v_reinterpret_as_u32
(
v_dotprod
(
v_reinterpret_as_s16
(
v_src4567_c1
),
v_mul4567
));
v_uint32x4
v_res4567_c2
=
v_reinterpret_as_u32
(
v_dotprod
(
v_reinterpret_as_s16
(
v_src4567_c2
),
v_mul4567
));
v_uint32x4
v_res4567_c3
=
v_reinterpret_as_u32
(
v_dotprod
(
v_reinterpret_as_s16
(
v_src4567_c3
),
v_mul4567
));
v_store_interleave
((
uint16_t
*
)
dst
,
v_pack
(
v_res0123_c1
,
v_res4567_c1
),
v_pack
(
v_res0123_c2
,
v_res4567_c2
),
v_pack
(
v_res0123_c3
,
v_res4567_c3
));
}
for
(;
i
<
dst_max
;
i
+=
1
,
m
+=
2
)
{
uint8_t
*
px
=
src
+
3
*
ofst
[
i
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
0
]
+
m
[
1
]
*
px
[
3
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
1
]
+
m
[
1
]
*
px
[
4
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
2
]
+
m
[
1
]
*
px
[
5
];
}
srccn
[
0
]
=
(
src
+
3
*
ofst
[
dst_width
-
1
])[
0
];
v_srccn0
=
v_setall_u16
(((
uint16_t
*
)
srccn
)[
0
]);
srccn
[
1
]
=
(
src
+
3
*
ofst
[
dst_width
-
1
])[
1
];
v_srccn1
=
v_setall_u16
(((
uint16_t
*
)
srccn
)[
1
]);
srccn
[
2
]
=
(
src
+
3
*
ofst
[
dst_width
-
1
])[
2
];
v_srccn2
=
v_setall_u16
(((
uint16_t
*
)
srccn
)[
2
]);
for
(;
i
<
dst_width
-
7
;
i
+=
8
,
dst
+=
24
)
// Points that fall left from src image so became equal to leftmost src point
{
v_store_interleave
((
uint16_t
*
)
dst
,
v_srccn0
,
v_srccn1
,
v_srccn2
);
}
for
(;
i
<
dst_width
;
i
++
)
{
*
(
dst
++
)
=
srccn
[
0
];
*
(
dst
++
)
=
srccn
[
1
];
*
(
dst
++
)
=
srccn
[
2
];
}
}
template
<>
void
hlineResizeCn
<
uint8_t
,
ufixedpoint16
,
2
,
true
,
4
>
(
uint8_t
*
src
,
int
,
int
*
ofst
,
ufixedpoint16
*
m
,
ufixedpoint16
*
dst
,
int
dst_min
,
int
dst_max
,
int
dst_width
)
{
int
i
=
0
;
ufixedpoint16
srccn
[
8
]
=
{
src
[
0
],
src
[
1
],
src
[
2
],
src
[
3
],
src
[
0
],
src
[
1
],
src
[
2
],
src
[
3
]
};
v_uint16x8
v_srccn
=
v_load
((
uint16_t
*
)
srccn
);
for
(;
i
<
dst_min
-
1
;
i
+=
2
,
m
+=
4
,
dst
+=
8
)
// Points that fall left from src image so became equal to leftmost src point
{
v_store
((
uint16_t
*
)
dst
,
v_srccn
);
}
if
(
i
<
dst_min
)
// Points that fall left from src image so became equal to leftmost src point
{
*
(
dst
++
)
=
srccn
[
0
];
*
(
dst
++
)
=
srccn
[
1
];
*
(
dst
++
)
=
srccn
[
2
];
*
(
dst
++
)
=
srccn
[
3
];
i
++
;
m
+=
2
;
}
for
(;
i
<
dst_max
-
1
&&
ofst
[
i
+
1
]
+
3
<=
ofst
[
dst_width
-
1
];
i
+=
2
,
m
+=
4
,
dst
+=
8
)
{
v_int16x8
v_src01
=
v_reinterpret_as_s16
(
v_load_expand
(
src
+
4
*
ofst
[
i
]));
v_int16x8
v_src23
=
v_reinterpret_as_s16
(
v_load_expand
(
src
+
4
*
ofst
[
i
+
1
]));
v_int16x8
v_tmp0
,
v_tmp1
;
v_recombine
(
v_src01
,
v_src23
,
v_tmp0
,
v_tmp1
);
v_zip
(
v_tmp0
,
v_tmp1
,
v_src01
,
v_src23
);
v_int16x8
v_mul01
=
v_reinterpret_as_s16
(
v_setall_u32
(((
uint32_t
*
)
m
)[
0
]));
//AaAaAaAa
v_int16x8
v_mul23
=
v_reinterpret_as_s16
(
v_setall_u32
(((
uint32_t
*
)
m
)[
1
]));
//BbBbBbBb
v_uint32x4
v_res0
=
v_reinterpret_as_u32
(
v_dotprod
(
v_src01
,
v_mul01
));
v_uint32x4
v_res1
=
v_reinterpret_as_u32
(
v_dotprod
(
v_src23
,
v_mul23
));
v_store
((
uint16_t
*
)
dst
,
v_pack
(
v_res0
,
v_res1
));
//AB1AB2CD1CD2
}
for
(;
i
<
dst_max
;
i
+=
1
,
m
+=
2
)
{
uint8_t
*
px
=
src
+
4
*
ofst
[
i
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
0
]
+
m
[
1
]
*
px
[
4
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
1
]
+
m
[
1
]
*
px
[
5
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
2
]
+
m
[
1
]
*
px
[
6
];
*
(
dst
++
)
=
m
[
0
]
*
px
[
3
]
+
m
[
1
]
*
px
[
7
];
}
srccn
[
0
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
0
];
srccn
[
1
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
1
];
srccn
[
2
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
2
];
srccn
[
3
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
3
];
srccn
[
4
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
0
];
srccn
[
5
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
1
];
srccn
[
6
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
2
];
srccn
[
7
]
=
(
src
+
4
*
ofst
[
dst_width
-
1
])[
3
];
v_srccn
=
v_load
((
uint16_t
*
)
srccn
);
for
(;
i
<
dst_width
-
1
;
i
+=
2
,
dst
+=
8
)
// Points that fall right from src image so became equal to rightmost src point
{
v_store
((
uint16_t
*
)
dst
,
v_srccn
);
}
if
(
i
<
dst_width
)
{
*
(
dst
++
)
=
srccn
[
0
];
*
(
dst
++
)
=
srccn
[
1
];
*
(
dst
++
)
=
srccn
[
2
];
*
(
dst
++
)
=
srccn
[
3
];
}
}
template
<>
void
hlineResizeCn
<
uint16_t
,
ufixedpoint32
,
2
,
true
,
1
>
(
uint16_t
*
src
,
int
,
int
*
ofst
,
ufixedpoint32
*
m
,
ufixedpoint32
*
dst
,
int
dst_min
,
int
dst_max
,
int
dst_width
)
{
typedef
v_uint32x4
v_fixedtype
;
typedef
uint32_t
lanetype
;
int
i
=
0
;
ufixedpoint32
src_0
(
src
[
0
]);
v_
fixedtype
v_src_0
=
v_setall_u32
(
*
((
lanetype
*
)
&
src_0
));
v_
uint32x4
v_src_0
=
v_setall_u32
(
*
((
uint32_t
*
)
&
src_0
));
for
(;
i
<
dst_min
-
3
;
i
+=
4
,
m
+=
8
,
dst
+=
4
)
// Points that fall left from src image so became equal to leftmost src point
{
v_store
((
lanetype
*
)
dst
,
v_src_0
);
v_store
((
uint32_t
*
)
dst
,
v_src_0
);
}
for
(;
i
<
dst_min
;
i
++
,
m
+=
2
)
{
...
...
@@ -409,17 +612,17 @@ void hlineResizeCn<uint16_t, ufixedpoint32, 2, true, 1>(uint16_t* src, int, int
}
for
(;
i
<
dst_max
-
3
&&
ofst
[
i
+
3
]
+
8
<=
ofst
[
dst_width
-
1
];
i
+=
4
,
m
+=
8
,
dst
+=
4
)
{
v_
fixedtype
v_src0
=
v_combine_low
(
v_load_expand
(
src
+
ofst
[
i
]),
v_load_expand
(
src
+
ofst
[
i
+
1
]));
v_
fixedtype
v_mul0
=
v_load
((
lanetype
*
)
m
);
v_
fixedtype
v_src1
=
v_combine_low
(
v_load_expand
(
src
+
ofst
[
i
+
2
]),
v_load_expand
(
src
+
ofst
[
i
+
3
]));
v_
fixedtype
v_mul1
=
v_load
((
lanetype
*
)
m
+
4
);
v_
fixedtype
v_res0
=
v_src0
*
v_mul0
;
//a1a2b1b2
v_
fixedtype
v_res1
=
v_src1
*
v_mul1
;
//c1c2d1d2
v_
fixedtype
v_tmp0
,
v_tmp1
;
v_
uint32x4
v_src0
=
v_combine_low
(
v_load_expand
(
src
+
ofst
[
i
]),
v_load_expand
(
src
+
ofst
[
i
+
1
]));
v_
uint32x4
v_mul0
=
v_load
((
uint32_t
*
)
m
);
v_
uint32x4
v_src1
=
v_combine_low
(
v_load_expand
(
src
+
ofst
[
i
+
2
]),
v_load_expand
(
src
+
ofst
[
i
+
3
]));
v_
uint32x4
v_mul1
=
v_load
((
uint32_t
*
)
m
+
4
);
v_
uint32x4
v_res0
=
v_src0
*
v_mul0
;
//a1a2b1b2
v_
uint32x4
v_res1
=
v_src1
*
v_mul1
;
//c1c2d1d2
v_
uint32x4
v_tmp0
,
v_tmp1
;
v_recombine
(
v_res0
,
v_res1
,
v_tmp0
,
v_tmp1
);
//a1a2c1c2 b1b2d1d2
v_zip
(
v_tmp0
,
v_tmp1
,
v_res0
,
v_res1
);
//a1b1a2b2 c1d1c2d2
v_recombine
(
v_res0
,
v_res1
,
v_tmp0
,
v_tmp1
);
//a1b1c1d1 a2b2c2d2
v_store
((
lanetype
*
)
dst
,
v_tmp0
+
v_tmp1
);
//abcd
v_store
((
uint32_t
*
)
dst
,
v_tmp0
+
v_tmp1
);
//abcd
}
for
(;
i
<
dst_max
;
i
+=
1
,
m
+=
2
)
{
...
...
@@ -427,10 +630,10 @@ void hlineResizeCn<uint16_t, ufixedpoint32, 2, true, 1>(uint16_t* src, int, int
*
(
dst
++
)
=
m
[
0
]
*
px
[
0
]
+
m
[
1
]
*
px
[
1
];
}
src_0
=
(
src
+
ofst
[
dst_width
-
1
])[
0
];
v_src_0
=
v_setall_u32
(
*
((
lanetype
*
)
&
src_0
));
v_src_0
=
v_setall_u32
(
*
((
uint32_t
*
)
&
src_0
));
for
(;
i
<
dst_width
-
3
;
i
+=
4
,
dst
+=
4
)
{
v_store
((
lanetype
*
)
dst
,
v_src_0
);
v_store
((
uint32_t
*
)
dst
,
v_src_0
);
}
for
(;
i
<
dst_width
;
i
++
)
{
...
...
@@ -3714,7 +3917,7 @@ void resize(int src_type,
{
// in case of inv_scale_x && inv_scale_y is equal to 0.5
// INTER_AREA (fast) is equal to bit exact INTER_LINEAR
if
(
is_area_fast
&&
iscale_x
==
2
&&
iscale_y
==
2
)
if
(
is_area_fast
&&
iscale_x
==
2
&&
iscale_y
==
2
&&
cn
!=
2
)
//Area resize implementation for 2-channel images isn't bit-exact
interpolation
=
INTER_AREA
;
else
{
...
...
modules/imgproc/test/test_resize_bitexact.cpp
View file @
1eb2fa9e
...
...
@@ -26,18 +26,59 @@ TEST(Resize_Bitexact, Linear8U)
{
static
const
int64_t
fixedOne
=
(
1L
<<
fixedShiftU8
);
int
types
[]
=
{
CV_8UC1
,
CV_8UC4
};
// NOTICE: 2x downscaling ommitted since it use different rounding
// 1/2 1 1 1/2 1/2 1/2 1/4 1/4 1/256 1/256 1/3 1/2 1/3 1/3 1/2 1/3 1/7 1/7
Size
dstsizes
[]
=
{
Size
(
512
,
768
),
Size
(
1024
,
384
),
Size
(
512
,
384
),
Size
(
256
,
192
),
Size
(
4
,
3
),
Size
(
342
,
384
),
Size
(
342
,
256
),
Size
(
512
,
256
),
Size
(
146
,
110
),
// 10/11 10/11 10/12 10/12 251/256 2 2 3 3 7 7
Size
(
931
,
698
),
Size
(
853
,
640
),
Size
(
1004
,
753
),
Size
(
2048
,
1536
),
Size
(
3072
,
2304
),
Size
(
7168
,
5376
)
};
struct
testmode
{
int
type
;
Size
sz
;
}
modes
[]
=
{
{
CV_8UC1
,
Size
(
512
,
768
)
},
// 1/2 1
{
CV_8UC3
,
Size
(
512
,
768
)
},
{
CV_8UC1
,
Size
(
1024
,
384
)
},
// 1 1/2
{
CV_8UC4
,
Size
(
1024
,
384
)
},
{
CV_8UC1
,
Size
(
512
,
384
)
},
// 1/2 1/2
{
CV_8UC2
,
Size
(
512
,
384
)
},
{
CV_8UC3
,
Size
(
512
,
384
)
},
{
CV_8UC4
,
Size
(
512
,
384
)
},
{
CV_8UC1
,
Size
(
256
,
192
)
},
// 1/4 1/4
{
CV_8UC2
,
Size
(
256
,
192
)
},
{
CV_8UC3
,
Size
(
256
,
192
)
},
{
CV_8UC4
,
Size
(
256
,
192
)
},
{
CV_8UC1
,
Size
(
4
,
3
)
},
// 1/256 1/256
{
CV_8UC2
,
Size
(
4
,
3
)
},
{
CV_8UC3
,
Size
(
4
,
3
)
},
{
CV_8UC4
,
Size
(
4
,
3
)
},
{
CV_8UC1
,
Size
(
342
,
384
)
},
// 1/3 1/2
{
CV_8UC1
,
Size
(
342
,
256
)
},
// 1/3 1/3
{
CV_8UC1
,
Size
(
342
,
256
)
},
{
CV_8UC1
,
Size
(
342
,
256
)
},
{
CV_8UC1
,
Size
(
342
,
256
)
},
{
CV_8UC1
,
Size
(
512
,
256
)
},
// 1/2 1/3
{
CV_8UC1
,
Size
(
146
,
110
)
},
// 1/7 1/7
{
CV_8UC3
,
Size
(
146
,
110
)
},
{
CV_8UC4
,
Size
(
146
,
110
)
},
{
CV_8UC1
,
Size
(
931
,
698
)
},
// 10/11 10/11
{
CV_8UC2
,
Size
(
931
,
698
)
},
{
CV_8UC3
,
Size
(
931
,
698
)
},
{
CV_8UC4
,
Size
(
931
,
698
)
},
{
CV_8UC1
,
Size
(
853
,
640
)
},
// 10/12 10/12
{
CV_8UC3
,
Size
(
853
,
640
)
},
{
CV_8UC4
,
Size
(
853
,
640
)
},
{
CV_8UC1
,
Size
(
1004
,
753
)
},
// 251/256 251/256
{
CV_8UC2
,
Size
(
1004
,
753
)
},
{
CV_8UC3
,
Size
(
1004
,
753
)
},
{
CV_8UC4
,
Size
(
1004
,
753
)
},
{
CV_8UC1
,
Size
(
2048
,
1536
)
},
// 2 2
{
CV_8UC2
,
Size
(
2048
,
1536
)
},
{
CV_8UC4
,
Size
(
2048
,
1536
)
},
{
CV_8UC1
,
Size
(
3072
,
2304
)
},
// 3 3
{
CV_8UC3
,
Size
(
3072
,
2304
)
},
{
CV_8UC1
,
Size
(
7168
,
5376
)
}
// 7 7
};
for
(
int
dsizeind
=
0
,
_dsizecnt
=
sizeof
(
dstsizes
)
/
sizeof
(
dstsizes
[
0
]);
dsizeind
<
_dsizecnt
;
++
dsizeind
)
for
(
int
typeind
=
0
,
_typecnt
=
sizeof
(
types
)
/
sizeof
(
types
[
0
]);
typeind
<
_typecnt
;
++
typeind
)
for
(
int
modeind
=
0
,
_modecnt
=
sizeof
(
modes
)
/
sizeof
(
modes
[
0
]);
modeind
<
_modecnt
;
++
modeind
)
{
int
type
=
types
[
typeind
]
,
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
dcols
=
dstsizes
[
dsizeind
].
width
,
drows
=
dstsizes
[
dsizeind
]
.
height
;
int
type
=
modes
[
modeind
].
type
,
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
dcols
=
modes
[
modeind
].
sz
.
width
,
drows
=
modes
[
modeind
].
sz
.
height
;
int
cols
=
1024
,
rows
=
768
;
double
inv_scale_x
=
(
double
)
dcols
/
cols
;
...
...
@@ -111,7 +152,7 @@ TEST(Resize_Bitexact, Linear8U)
cv
::
resize
(
src
,
dst
,
Size
(
dcols
,
drows
),
0
,
0
,
cv
::
INTER_LINEAR_EXACT
);
EXPECT_GE
(
0
,
cvtest
::
norm
(
refdst
,
dst
,
cv
::
NORM_L1
))
<<
"Resize from "
<<
cols
<<
"x"
<<
rows
<<
" to "
<<
dcols
<<
"x"
<<
drows
<<
" failed with max diff "
<<
cvtest
::
norm
(
refdst
,
dst
,
cv
::
NORM_INF
);
<<
"Resize
"
<<
cn
<<
"-chan mat
from "
<<
cols
<<
"x"
<<
rows
<<
" to "
<<
dcols
<<
"x"
<<
drows
<<
" failed with max diff "
<<
cvtest
::
norm
(
refdst
,
dst
,
cv
::
NORM_INF
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment