Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
3faaabd6
Commit
3faaabd6
authored
Aug 14, 2016
by
k-shinotsuka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve YCrCb2RGB_i<uchar>()
parent
658e4c3d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
151 additions
and
0 deletions
+151
-0
color.cpp
modules/imgproc/src/color.cpp
+151
-0
No files found.
modules/imgproc/src/color.cpp
View file @
3faaabd6
...
...
@@ -2987,6 +2987,72 @@ struct YCrCb2RGB_i<uchar>
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
#if CV_SSE4_1
// 16s x 8
void
process
(
__m128i
*
v_src
,
__m128i
*
v_shuffle
,
__m128i
*
v_coeffs
)
const
{
__m128i
v_ycrcb
[
3
];
v_ycrcb
[
0
]
=
_mm_shuffle_epi8
(
v_src
[
0
],
v_shuffle
[
0
]);
v_ycrcb
[
1
]
=
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
8
),
v_shuffle
[
0
]);
v_ycrcb
[
2
]
=
_mm_shuffle_epi8
(
v_src
[
1
],
v_shuffle
[
0
]);
__m128i
v_y
[
3
];
v_y
[
1
]
=
_mm_shuffle_epi8
(
v_src
[
0
],
v_shuffle
[
1
]);
v_y
[
2
]
=
_mm_srli_si128
(
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
15
),
v_shuffle
[
1
]),
1
);
v_y
[
0
]
=
_mm_unpacklo_epi8
(
v_y
[
1
],
v_zero
);
v_y
[
1
]
=
_mm_unpackhi_epi8
(
v_y
[
1
],
v_zero
);
v_y
[
2
]
=
_mm_unpacklo_epi8
(
v_y
[
2
],
v_zero
);
__m128i
v_rgb
[
6
];
v_rgb
[
0
]
=
_mm_unpacklo_epi8
(
v_ycrcb
[
0
],
v_zero
);
v_rgb
[
1
]
=
_mm_unpackhi_epi8
(
v_ycrcb
[
0
],
v_zero
);
v_rgb
[
2
]
=
_mm_unpacklo_epi8
(
v_ycrcb
[
1
],
v_zero
);
v_rgb
[
3
]
=
_mm_unpackhi_epi8
(
v_ycrcb
[
1
],
v_zero
);
v_rgb
[
4
]
=
_mm_unpacklo_epi8
(
v_ycrcb
[
2
],
v_zero
);
v_rgb
[
5
]
=
_mm_unpackhi_epi8
(
v_ycrcb
[
2
],
v_zero
);
v_rgb
[
0
]
=
_mm_sub_epi16
(
v_rgb
[
0
],
v_delta
);
v_rgb
[
1
]
=
_mm_sub_epi16
(
v_rgb
[
1
],
v_delta
);
v_rgb
[
2
]
=
_mm_sub_epi16
(
v_rgb
[
2
],
v_delta
);
v_rgb
[
3
]
=
_mm_sub_epi16
(
v_rgb
[
3
],
v_delta
);
v_rgb
[
4
]
=
_mm_sub_epi16
(
v_rgb
[
4
],
v_delta
);
v_rgb
[
5
]
=
_mm_sub_epi16
(
v_rgb
[
5
],
v_delta
);
v_rgb
[
0
]
=
_mm_madd_epi16
(
v_rgb
[
0
],
v_coeffs
[
0
]);
v_rgb
[
1
]
=
_mm_madd_epi16
(
v_rgb
[
1
],
v_coeffs
[
1
]);
v_rgb
[
2
]
=
_mm_madd_epi16
(
v_rgb
[
2
],
v_coeffs
[
2
]);
v_rgb
[
3
]
=
_mm_madd_epi16
(
v_rgb
[
3
],
v_coeffs
[
0
]);
v_rgb
[
4
]
=
_mm_madd_epi16
(
v_rgb
[
4
],
v_coeffs
[
1
]);
v_rgb
[
5
]
=
_mm_madd_epi16
(
v_rgb
[
5
],
v_coeffs
[
2
]);
v_rgb
[
0
]
=
_mm_add_epi32
(
v_rgb
[
0
],
v_delta2
);
v_rgb
[
1
]
=
_mm_add_epi32
(
v_rgb
[
1
],
v_delta2
);
v_rgb
[
2
]
=
_mm_add_epi32
(
v_rgb
[
2
],
v_delta2
);
v_rgb
[
3
]
=
_mm_add_epi32
(
v_rgb
[
3
],
v_delta2
);
v_rgb
[
4
]
=
_mm_add_epi32
(
v_rgb
[
4
],
v_delta2
);
v_rgb
[
5
]
=
_mm_add_epi32
(
v_rgb
[
5
],
v_delta2
);
v_rgb
[
0
]
=
_mm_srai_epi32
(
v_rgb
[
0
],
yuv_shift
);
v_rgb
[
1
]
=
_mm_srai_epi32
(
v_rgb
[
1
],
yuv_shift
);
v_rgb
[
2
]
=
_mm_srai_epi32
(
v_rgb
[
2
],
yuv_shift
);
v_rgb
[
3
]
=
_mm_srai_epi32
(
v_rgb
[
3
],
yuv_shift
);
v_rgb
[
4
]
=
_mm_srai_epi32
(
v_rgb
[
4
],
yuv_shift
);
v_rgb
[
5
]
=
_mm_srai_epi32
(
v_rgb
[
5
],
yuv_shift
);
v_rgb
[
0
]
=
_mm_packs_epi32
(
v_rgb
[
0
],
v_rgb
[
1
]);
v_rgb
[
2
]
=
_mm_packs_epi32
(
v_rgb
[
2
],
v_rgb
[
3
]);
v_rgb
[
4
]
=
_mm_packs_epi32
(
v_rgb
[
4
],
v_rgb
[
5
]);
v_rgb
[
0
]
=
_mm_add_epi16
(
v_rgb
[
0
],
v_y
[
0
]);
v_rgb
[
2
]
=
_mm_add_epi16
(
v_rgb
[
2
],
v_y
[
1
]);
v_rgb
[
4
]
=
_mm_add_epi16
(
v_rgb
[
4
],
v_y
[
2
]);
v_src
[
0
]
=
_mm_packus_epi16
(
v_rgb
[
0
],
v_rgb
[
2
]);
v_src
[
1
]
=
_mm_packus_epi16
(
v_rgb
[
4
],
v_rgb
[
4
]);
}
#endif // CV_SSE4_1
// 16s x 8
void
process
(
__m128i
v_y
,
__m128i
v_cr
,
__m128i
v_cb
,
__m128i
&
v_r
,
__m128i
&
v_g
,
__m128i
&
v_b
)
const
...
...
@@ -3040,6 +3106,91 @@ struct YCrCb2RGB_i<uchar>
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
];
n
*=
3
;
#if CV_SSE4_1
if
(
checkHardwareSupport
(
CV_CPU_SSE4_1
)
&&
useSSE
)
{
__m128i
v_shuffle
[
2
];
v_shuffle
[
0
]
=
_mm_set_epi8
(
0x8
,
0x7
,
0x7
,
0x6
,
0x6
,
0x5
,
0x5
,
0x4
,
0x4
,
0x3
,
0x3
,
0x2
,
0x2
,
0x1
,
0x1
,
0x0
);
v_shuffle
[
1
]
=
_mm_set_epi8
(
0xf
,
0xc
,
0xc
,
0xc
,
0x9
,
0x9
,
0x9
,
0x6
,
0x6
,
0x6
,
0x3
,
0x3
,
0x3
,
0x0
,
0x0
,
0x0
);
__m128i
v_coeffs
[
3
];
v_coeffs
[
0
]
=
_mm_set_epi16
((
short
)
C0
,
0
,
0
,
(
short
)
C3
,
(
short
)
C2
,
(
short
)
C1
,
(
short
)
C0
,
0
);
v_coeffs
[
1
]
=
_mm_set_epi16
((
short
)
C2
,
(
short
)
C1
,
(
short
)
C0
,
0
,
0
,
(
short
)
C3
,
(
short
)
C2
,
(
short
)
C1
);
v_coeffs
[
2
]
=
_mm_set_epi16
(
0
,
(
short
)
C3
,
(
short
)
C2
,
(
short
)
C1
,
(
short
)
C0
,
0
,
0
,
(
short
)
C3
);
if
(
dcn
==
3
)
{
if
(
bidx
==
0
)
{
__m128i
v_shuffle_dst
=
_mm_set_epi8
(
0xf
,
0xc
,
0xd
,
0xe
,
0x9
,
0xa
,
0xb
,
0x6
,
0x7
,
0x8
,
0x3
,
0x4
,
0x5
,
0x0
,
0x1
,
0x2
);
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
__m128i
v_dst
[
2
];
v_dst
[
0
]
=
_mm_shuffle_epi8
(
v_src
[
0
],
v_shuffle_dst
);
v_dst
[
1
]
=
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
15
),
v_shuffle_dst
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
_mm_alignr_epi8
(
v_dst
[
1
],
_mm_slli_si128
(
v_dst
[
0
],
1
),
1
));
_mm_storel_epi64
((
__m128i
*
)(
dst
+
16
),
_mm_srli_si128
(
v_dst
[
1
],
1
));
}
}
else
{
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
v_src
[
0
]);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
16
),
v_src
[
1
]);
}
}
}
else
{
if
(
bidx
==
0
)
{
__m128i
v_shuffle_dst
=
_mm_set_epi8
(
0x0
,
0xa
,
0xb
,
0xc
,
0x0
,
0x7
,
0x8
,
0x9
,
0x0
,
0x4
,
0x5
,
0x6
,
0x0
,
0x1
,
0x2
,
0x3
);
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
0
],
v_alpha
,
15
),
v_shuffle_dst
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
16
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
12
),
v_alpha
,
15
),
v_shuffle_dst
));
}
}
else
{
__m128i
v_shuffle_dst
=
_mm_set_epi8
(
0x0
,
0xc
,
0xb
,
0xa
,
0x0
,
0x9
,
0x8
,
0x7
,
0x0
,
0x6
,
0x5
,
0x4
,
0x0
,
0x3
,
0x2
,
0x1
);
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
0
],
v_alpha
,
15
),
v_shuffle_dst
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
16
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
12
),
v_alpha
,
15
),
v_shuffle_dst
));
}
}
}
}
else
#endif // CV_SSE4_1
if
(
haveSIMD
&&
useSSE
)
{
for
(
;
i
<=
n
-
96
;
i
+=
96
,
dst
+=
dcn
*
32
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment