Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9df8400f
Commit
9df8400f
authored
Jul 20, 2016
by
k-shinotsuka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve RGB5x52Gray()
parent
b34272f8
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
50 additions
and
64 deletions
+50
-64
color.cpp
modules/imgproc/src/color.cpp
+50
-64
No files found.
modules/imgproc/src/color.cpp
View file @
9df8400f
...
...
@@ -1107,12 +1107,13 @@ struct RGB5x52Gray
v_fc
=
vdupq_n_u16
(
0xfc
);
#elif CV_SSE2
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
v_b2y
=
_mm_set1_epi16
(
B2Y
);
v_g2y
=
_mm_set1_epi16
(
G2Y
);
v_r2y
=
_mm_set1_epi16
(
R2Y
);
v_delta
=
_mm_set1_epi32
(
1
<<
(
yuv_shift
-
1
));
v_f8
=
_mm_set1_epi16
(
0xf8
);
v_fc
=
_mm_set1_epi16
(
0xfc
);
const
__m128i
v_b2y
=
_mm_set1_epi16
(
B2Y
);
const
__m128i
v_g2y
=
_mm_set1_epi16
(
G2Y
);
v_bg2y
=
_mm_unpacklo_epi16
(
v_b2y
,
v_g2y
);
const
__m128i
v_r2y
=
_mm_set1_epi16
(
R2Y
);
const
__m128i
v_one
=
_mm_set1_epi16
(
1
);
v_rd2y
=
_mm_unpacklo_epi16
(
v_r2y
,
v_one
);
v_delta
=
_mm_slli_epi16
(
v_one
,
yuv_shift
-
1
);
#endif
}
...
...
@@ -1141,37 +1142,30 @@ struct RGB5x52Gray
#elif CV_SSE2
if
(
haveSIMD
)
{
__m128i
v_zero
=
_mm_setzero_si128
();
for
(
;
i
<=
n
-
8
;
i
+=
8
)
{
__m128i
v_src
=
_mm_loadu_si128
((
__m128i
const
*
)((
ushort
*
)
src
+
i
));
__m128i
v_t0
=
_mm_and_si128
(
_mm_slli_epi16
(
v_src
,
3
),
v_f8
),
v_t1
=
_mm_and_si128
(
_mm_srli_epi16
(
v_src
,
3
),
v_fc
),
v_t2
=
_mm_and_si128
(
_mm_srli_epi16
(
v_src
,
8
),
v_f8
);
__m128i
v_mullo_b
=
_mm_mullo_epi16
(
v_t0
,
v_b2y
);
__m128i
v_mullo_g
=
_mm_mullo_epi16
(
v_t1
,
v_g2y
);
__m128i
v_mullo_r
=
_mm_mullo_epi16
(
v_t2
,
v_r2y
);
__m128i
v_mulhi_b
=
_mm_mulhi_epi16
(
v_t0
,
v_b2y
);
__m128i
v_mulhi_g
=
_mm_mulhi_epi16
(
v_t1
,
v_g2y
);
__m128i
v_mulhi_r
=
_mm_mulhi_epi16
(
v_t2
,
v_r2y
);
__m128i
v_dst0
=
_mm_add_epi32
(
_mm_unpacklo_epi16
(
v_mullo_b
,
v_mulhi_b
),
_mm_unpacklo_epi16
(
v_mullo_g
,
v_mulhi_g
));
v_dst0
=
_mm_add_epi32
(
_mm_add_epi32
(
v_dst0
,
v_delta
),
_mm_unpacklo_epi16
(
v_mullo_r
,
v_mulhi_r
));
__m128i
v_dst1
=
_mm_add_epi32
(
_mm_unpackhi_epi16
(
v_mullo_b
,
v_mulhi_b
),
_mm_unpackhi_epi16
(
v_mullo_g
,
v_mulhi_g
));
v_dst1
=
_mm_add_epi32
(
_mm_add_epi32
(
v_dst1
,
v_delta
),
_mm_unpackhi_epi16
(
v_mullo_r
,
v_mulhi_r
));
v_dst0
=
_mm_srli_epi32
(
v_dst0
,
yuv_shift
);
v_dst1
=
_mm_srli_epi32
(
v_dst1
,
yuv_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
v_dst0
,
v_dst1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
i
),
_mm_packus_epi16
(
v_dst
,
v_zero
));
__m128i
v_b
=
_mm_srli_epi16
(
_mm_slli_epi16
(
v_src
,
11
),
8
),
v_g
=
_mm_srli_epi16
(
_mm_slli_epi16
(
_mm_srli_epi16
(
v_src
,
5
),
10
),
8
),
v_r
=
_mm_slli_epi16
(
_mm_srli_epi16
(
v_src
,
11
),
3
);
__m128i
v_bg_lo
=
_mm_unpacklo_epi16
(
v_b
,
v_g
);
__m128i
v_rd_lo
=
_mm_unpacklo_epi16
(
v_r
,
v_delta
);
__m128i
v_bg_hi
=
_mm_unpackhi_epi16
(
v_b
,
v_g
);
__m128i
v_rd_hi
=
_mm_unpackhi_epi16
(
v_r
,
v_delta
);
v_bg_lo
=
_mm_madd_epi16
(
v_bg_lo
,
v_bg2y
);
v_rd_lo
=
_mm_madd_epi16
(
v_rd_lo
,
v_rd2y
);
v_bg_hi
=
_mm_madd_epi16
(
v_bg_hi
,
v_bg2y
);
v_rd_hi
=
_mm_madd_epi16
(
v_rd_hi
,
v_rd2y
);
__m128i
v_bgr_lo
=
_mm_add_epi32
(
v_bg_lo
,
v_rd_lo
);
__m128i
v_bgr_hi
=
_mm_add_epi32
(
v_bg_hi
,
v_rd_hi
);
v_bgr_lo
=
_mm_srli_epi32
(
v_bgr_lo
,
yuv_shift
);
v_bgr_hi
=
_mm_srli_epi32
(
v_bgr_hi
,
yuv_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
v_bgr_lo
,
v_bgr_hi
);
v_dst
=
_mm_packus_epi16
(
v_dst
,
v_dst
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
i
),
v_dst
);
}
}
#endif
...
...
@@ -1205,37 +1199,30 @@ struct RGB5x52Gray
#elif CV_SSE2
if
(
haveSIMD
)
{
__m128i
v_zero
=
_mm_setzero_si128
();
for
(
;
i
<=
n
-
8
;
i
+=
8
)
{
__m128i
v_src
=
_mm_loadu_si128
((
__m128i
const
*
)((
ushort
*
)
src
+
i
));
__m128i
v_t0
=
_mm_and_si128
(
_mm_slli_epi16
(
v_src
,
3
),
v_f8
),
v_t1
=
_mm_and_si128
(
_mm_srli_epi16
(
v_src
,
2
),
v_f8
),
v_t2
=
_mm_and_si128
(
_mm_srli_epi16
(
v_src
,
7
),
v_f8
);
__m128i
v_mullo_b
=
_mm_mullo_epi16
(
v_t0
,
v_b2y
);
__m128i
v_mullo_g
=
_mm_mullo_epi16
(
v_t1
,
v_g2y
);
__m128i
v_mullo_r
=
_mm_mullo_epi16
(
v_t2
,
v_r2y
);
__m128i
v_mulhi_b
=
_mm_mulhi_epi16
(
v_t0
,
v_b2y
);
__m128i
v_mulhi_g
=
_mm_mulhi_epi16
(
v_t1
,
v_g2y
);
__m128i
v_mulhi_r
=
_mm_mulhi_epi16
(
v_t2
,
v_r2y
);
__m128i
v_dst0
=
_mm_add_epi32
(
_mm_unpacklo_epi16
(
v_mullo_b
,
v_mulhi_b
),
_mm_unpacklo_epi16
(
v_mullo_g
,
v_mulhi_g
));
v_dst0
=
_mm_add_epi32
(
_mm_add_epi32
(
v_dst0
,
v_delta
),
_mm_unpacklo_epi16
(
v_mullo_r
,
v_mulhi_r
));
__m128i
v_dst1
=
_mm_add_epi32
(
_mm_unpackhi_epi16
(
v_mullo_b
,
v_mulhi_b
),
_mm_unpackhi_epi16
(
v_mullo_g
,
v_mulhi_g
));
v_dst1
=
_mm_add_epi32
(
_mm_add_epi32
(
v_dst1
,
v_delta
),
_mm_unpackhi_epi16
(
v_mullo_r
,
v_mulhi_r
));
v_dst0
=
_mm_srli_epi32
(
v_dst0
,
yuv_shift
);
v_dst1
=
_mm_srli_epi32
(
v_dst1
,
yuv_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
v_dst0
,
v_dst1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
i
),
_mm_packus_epi16
(
v_dst
,
v_zero
));
__m128i
v_b
=
_mm_srli_epi16
(
_mm_slli_epi16
(
v_src
,
11
),
8
),
v_g
=
_mm_srli_epi16
(
_mm_slli_epi16
(
_mm_srli_epi16
(
v_src
,
5
),
11
),
8
),
v_r
=
_mm_srli_epi16
(
_mm_slli_epi16
(
_mm_srli_epi16
(
v_src
,
10
),
11
),
8
);
__m128i
v_bg_lo
=
_mm_unpacklo_epi16
(
v_b
,
v_g
);
__m128i
v_rd_lo
=
_mm_unpacklo_epi16
(
v_r
,
v_delta
);
__m128i
v_bg_hi
=
_mm_unpackhi_epi16
(
v_b
,
v_g
);
__m128i
v_rd_hi
=
_mm_unpackhi_epi16
(
v_r
,
v_delta
);
v_bg_lo
=
_mm_madd_epi16
(
v_bg_lo
,
v_bg2y
);
v_rd_lo
=
_mm_madd_epi16
(
v_rd_lo
,
v_rd2y
);
v_bg_hi
=
_mm_madd_epi16
(
v_bg_hi
,
v_bg2y
);
v_rd_hi
=
_mm_madd_epi16
(
v_rd_hi
,
v_rd2y
);
__m128i
v_bgr_lo
=
_mm_add_epi32
(
v_bg_lo
,
v_rd_lo
);
__m128i
v_bgr_hi
=
_mm_add_epi32
(
v_bg_hi
,
v_rd_hi
);
v_bgr_lo
=
_mm_srli_epi32
(
v_bgr_lo
,
yuv_shift
);
v_bgr_hi
=
_mm_srli_epi32
(
v_bgr_hi
,
yuv_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
v_bgr_lo
,
v_bgr_hi
);
v_dst
=
_mm_packus_epi16
(
v_dst
,
v_dst
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
i
),
v_dst
);
}
}
#endif
...
...
@@ -1256,9 +1243,8 @@ struct RGB5x52Gray
uint16x8_t
v_f8
,
v_fc
;
#elif CV_SSE2
bool
haveSIMD
;
__m128i
v_b
2y
,
v_g2y
,
v_r
2y
;
__m128i
v_b
g2y
,
v_rd
2y
;
__m128i
v_delta
;
__m128i
v_f8
,
v_fc
;
#endif
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment