Commit ba1f5269 authored by fbarchard@google.com's avatar fbarchard@google.com

rewrite of I420ToRGB565 etc using row functions

BUG=none
TEST=media_unittest
Review URL: http://webrtc-codereview.appspot.com/345004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@131 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 20005e64
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 130
Version: 131
License: BSD
License File: LICENSE
......
......@@ -30,7 +30,6 @@
'include/libyuv/video_common.h',
# headers
'source/conversion_tables.h',
'source/rotate_priv.h',
'source/row.h',
......
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/**************************************************************
* conversion_tables.h
*
* Pre-compiled definitions of the conversion equations: YUV -> RGB.
*
***************************************************************/
#ifndef LIBYUV_SOURCE_CONVERSION_TABLES_H_
#define LIBYUV_SOURCE_CONVERSION_TABLES_H_
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
/******************************************************************************
* YUV TO RGB approximation
*
* R = clip( (298 * (Y - 16) + 409 * (V - 128) + 128 ) >> 8 )
* G = clip( (298 * (Y - 16) - 100 * (U - 128) - 208 * (V - 128) + 128 ) >> 8 )
* B = clip( (298 * (Y - 16) + 516 * (U - 128) + 128 ) >> 8 )
*******************************************************************************/
#define Yc(i) static_cast<int> ( 298 * ( i - 16 )) // Y contribution
#define Ucg(i) static_cast<int> ( -100 * ( i - 128 ))// U contribution to G
#define Ucb(i) static_cast<int> ( 516 * ( i - 128 ))// U contribution to B
#define Vcr(i) static_cast<int> ( 409 * ( i - 128 ))// V contribution to R
#define Vcg(i) static_cast<int> ( -208 * ( i - 128 ))// V contribution to G
static const int mapYc[256] = {
Yc(0),Yc(1),Yc(2),Yc(3),Yc(4),Yc(5),Yc(6),Yc(7),Yc(8),Yc(9),
Yc(10),Yc(11),Yc(12),Yc(13),Yc(14),Yc(15),Yc(16),Yc(17),Yc(18),Yc(19),
Yc(20),Yc(21),Yc(22),Yc(23),Yc(24),Yc(25),Yc(26),Yc(27),Yc(28),Yc(29),
Yc(30),Yc(31),Yc(32),Yc(33),Yc(34),Yc(35),Yc(36),Yc(37),Yc(38),Yc(39),
Yc(40),Yc(41),Yc(42),Yc(43),Yc(44),Yc(45),Yc(46),Yc(47),Yc(48),Yc(49),
Yc(50),Yc(51),Yc(52),Yc(53),Yc(54),Yc(55),Yc(56),Yc(57),Yc(58),Yc(59),
Yc(60),Yc(61),Yc(62),Yc(63),Yc(64),Yc(65),Yc(66),Yc(67),Yc(68),Yc(69),
Yc(70),Yc(71),Yc(72),Yc(73),Yc(74),Yc(75),Yc(76),Yc(77),Yc(78),Yc(79),
Yc(80),Yc(81),Yc(82),Yc(83),Yc(84),Yc(85),Yc(86),Yc(87),Yc(88),Yc(89),
Yc(90),Yc(91),Yc(92),Yc(93),Yc(94),Yc(95),Yc(96),Yc(97),Yc(98),Yc(99),
Yc(100),Yc(101),Yc(102),Yc(103),Yc(104),Yc(105),Yc(106),Yc(107),Yc(108),
Yc(109),Yc(110),Yc(111),Yc(112),Yc(113),Yc(114),Yc(115),Yc(116),Yc(117),
Yc(118),Yc(119),Yc(120),Yc(121),Yc(122),Yc(123),Yc(124),Yc(125),Yc(126),
Yc(127),Yc(128),Yc(129),Yc(130),Yc(131),Yc(132),Yc(133),Yc(134),Yc(135),
Yc(136),Yc(137),Yc(138),Yc(139),Yc(140),Yc(141),Yc(142),Yc(143),Yc(144),
Yc(145),Yc(146),Yc(147),Yc(148),Yc(149),Yc(150),Yc(151),Yc(152),Yc(153),
Yc(154),Yc(155),Yc(156),Yc(157),Yc(158),Yc(159),Yc(160),Yc(161),Yc(162),
Yc(163),Yc(164),Yc(165),Yc(166),Yc(167),Yc(168),Yc(169),Yc(170),Yc(171),
Yc(172),Yc(173),Yc(174),Yc(175),Yc(176),Yc(177),Yc(178),Yc(179),Yc(180),
Yc(181),Yc(182),Yc(183),Yc(184),Yc(185),Yc(186),Yc(187),Yc(188),Yc(189),
Yc(190),Yc(191),Yc(192),Yc(193),Yc(194),Yc(195),Yc(196),Yc(197),Yc(198),
Yc(199),Yc(200),Yc(201),Yc(202),Yc(203),Yc(204),Yc(205),Yc(206),Yc(207),
Yc(208),Yc(209),Yc(210),Yc(211),Yc(212),Yc(213),Yc(214),Yc(215),Yc(216),
Yc(217),Yc(218),Yc(219),Yc(220),Yc(221),Yc(222),Yc(223),Yc(224),Yc(225),
Yc(226),Yc(227),Yc(228),Yc(229),Yc(230),Yc(231),Yc(232),Yc(233),Yc(234),
Yc(235),Yc(236),Yc(237),Yc(238),Yc(239),Yc(240),Yc(241),Yc(242),Yc(243),
Yc(244),Yc(245),Yc(246),Yc(247),Yc(248),Yc(249),Yc(250),Yc(251),Yc(252),
Yc(253),Yc(254),Yc(255)};
static const int mapUcg[256] = {
Ucg(0),Ucg(1),Ucg(2),Ucg(3),Ucg(4),Ucg(5),Ucg(6),Ucg(7),Ucg(8),Ucg(9),
Ucg(10),Ucg(11),Ucg(12),Ucg(13),Ucg(14),Ucg(15),Ucg(16),Ucg(17),Ucg(18),
Ucg(19),Ucg(20),Ucg(21),Ucg(22),Ucg(23),Ucg(24),Ucg(25),Ucg(26),Ucg(27),
Ucg(28),Ucg(29),Ucg(30),Ucg(31),Ucg(32),Ucg(33),Ucg(34),Ucg(35),Ucg(36),
Ucg(37),Ucg(38),Ucg(39),Ucg(40),Ucg(41),Ucg(42),Ucg(43),Ucg(44),Ucg(45),
Ucg(46),Ucg(47),Ucg(48),Ucg(49),Ucg(50),Ucg(51),Ucg(52),Ucg(53),Ucg(54),
Ucg(55),Ucg(56),Ucg(57),Ucg(58),Ucg(59),Ucg(60),Ucg(61),Ucg(62),Ucg(63),
Ucg(64),Ucg(65),Ucg(66),Ucg(67),Ucg(68),Ucg(69),Ucg(70),Ucg(71),Ucg(72),
Ucg(73),Ucg(74),Ucg(75),Ucg(76),Ucg(77),Ucg(78),Ucg(79),Ucg(80),Ucg(81),
Ucg(82),Ucg(83),Ucg(84),Ucg(85),Ucg(86),Ucg(87),Ucg(88),Ucg(89),Ucg(90),
Ucg(91),Ucg(92),Ucg(93),Ucg(94),Ucg(95),Ucg(96),Ucg(97),Ucg(98),Ucg(99),
Ucg(100),Ucg(101),Ucg(102),Ucg(103),Ucg(104),Ucg(105),Ucg(106),Ucg(107),
Ucg(108),Ucg(109),Ucg(110),Ucg(111),Ucg(112),Ucg(113),Ucg(114),Ucg(115),
Ucg(116),Ucg(117),Ucg(118),Ucg(119),Ucg(120),Ucg(121),Ucg(122),Ucg(123),
Ucg(124),Ucg(125),Ucg(126),Ucg(127),Ucg(128),Ucg(129),Ucg(130),Ucg(131),
Ucg(132),Ucg(133),Ucg(134),Ucg(135),Ucg(136),Ucg(137),Ucg(138),Ucg(139),
Ucg(140),Ucg(141),Ucg(142),Ucg(143),Ucg(144),Ucg(145),Ucg(146),Ucg(147),
Ucg(148),Ucg(149),Ucg(150),Ucg(151),Ucg(152),Ucg(153),Ucg(154),Ucg(155),
Ucg(156),Ucg(157),Ucg(158),Ucg(159),Ucg(160),Ucg(161),Ucg(162),Ucg(163),
Ucg(164),Ucg(165),Ucg(166),Ucg(167),Ucg(168),Ucg(169),Ucg(170),Ucg(171),
Ucg(172),Ucg(173),Ucg(174),Ucg(175),Ucg(176),Ucg(177),Ucg(178),Ucg(179),
Ucg(180),Ucg(181),Ucg(182),Ucg(183),Ucg(184),Ucg(185),Ucg(186),Ucg(187),
Ucg(188),Ucg(189),Ucg(190),Ucg(191),Ucg(192),Ucg(193),Ucg(194),Ucg(195),
Ucg(196),Ucg(197),Ucg(198),Ucg(199),Ucg(200),Ucg(201),Ucg(202),Ucg(203),
Ucg(204),Ucg(205),Ucg(206),Ucg(207),Ucg(208),Ucg(209),Ucg(210),Ucg(211),
Ucg(212),Ucg(213),Ucg(214),Ucg(215),Ucg(216),Ucg(217),Ucg(218),Ucg(219),
Ucg(220),Ucg(221),Ucg(222),Ucg(223),Ucg(224),Ucg(225),Ucg(226),Ucg(227),
Ucg(228),Ucg(229),Ucg(230),Ucg(231),Ucg(232),Ucg(233),Ucg(234),Ucg(235),
Ucg(236),Ucg(237),Ucg(238),Ucg(239),Ucg(240),Ucg(241),Ucg(242),Ucg(243),
Ucg(244),Ucg(245),Ucg(246),Ucg(247),Ucg(248),Ucg(249),Ucg(250),Ucg(251),
Ucg(252),Ucg(253),Ucg(254),Ucg(255)};
static const int mapUcb[256] = {
Ucb(0),Ucb(1),Ucb(2),Ucb(3),Ucb(4),Ucb(5),Ucb(6),Ucb(7),Ucb(8),Ucb(9),
Ucb(10),Ucb(11),Ucb(12),Ucb(13),Ucb(14),Ucb(15),Ucb(16),Ucb(17),Ucb(18),
Ucb(19),Ucb(20),Ucb(21),Ucb(22),Ucb(23),Ucb(24),Ucb(25),Ucb(26),Ucb(27),
Ucb(28),Ucb(29),Ucb(30),Ucb(31),Ucb(32),Ucb(33),Ucb(34),Ucb(35),Ucb(36),
Ucb(37),Ucb(38),Ucb(39),Ucb(40),Ucb(41),Ucb(42),Ucb(43),Ucb(44),Ucb(45),
Ucb(46),Ucb(47),Ucb(48),Ucb(49),Ucb(50),Ucb(51),Ucb(52),Ucb(53),Ucb(54),
Ucb(55),Ucb(56),Ucb(57),Ucb(58),Ucb(59),Ucb(60),Ucb(61),Ucb(62),Ucb(63),
Ucb(64),Ucb(65),Ucb(66),Ucb(67),Ucb(68),Ucb(69),Ucb(70),Ucb(71),Ucb(72),
Ucb(73),Ucb(74),Ucb(75),Ucb(76),Ucb(77),Ucb(78),Ucb(79),Ucb(80),Ucb(81),
Ucb(82),Ucb(83),Ucb(84),Ucb(85),Ucb(86),Ucb(87),Ucb(88),Ucb(89),Ucb(90),
Ucb(91),Ucb(92),Ucb(93),Ucb(94),Ucb(95),Ucb(96),Ucb(97),Ucb(98),Ucb(99),
Ucb(100),Ucb(101),Ucb(102),Ucb(103),Ucb(104),Ucb(105),Ucb(106),Ucb(107),
Ucb(108),Ucb(109),Ucb(110),Ucb(111),Ucb(112),Ucb(113),Ucb(114),Ucb(115),
Ucb(116),Ucb(117),Ucb(118),Ucb(119),Ucb(120),Ucb(121),Ucb(122),Ucb(123),
Ucb(124),Ucb(125),Ucb(126),Ucb(127),Ucb(128),Ucb(129),Ucb(130),Ucb(131),
Ucb(132),Ucb(133),Ucb(134),Ucb(135),Ucb(136),Ucb(137),Ucb(138),Ucb(139),
Ucb(140),Ucb(141),Ucb(142),Ucb(143),Ucb(144),Ucb(145),Ucb(146),Ucb(147),
Ucb(148),Ucb(149),Ucb(150),Ucb(151),Ucb(152),Ucb(153),Ucb(154),Ucb(155),
Ucb(156),Ucb(157),Ucb(158),Ucb(159),Ucb(160),Ucb(161),Ucb(162),Ucb(163),
Ucb(164),Ucb(165),Ucb(166),Ucb(167),Ucb(168),Ucb(169),Ucb(170),Ucb(171),
Ucb(172),Ucb(173),Ucb(174),Ucb(175),Ucb(176),Ucb(177),Ucb(178),Ucb(179),
Ucb(180),Ucb(181),Ucb(182),Ucb(183),Ucb(184),Ucb(185),Ucb(186),Ucb(187),
Ucb(188),Ucb(189),Ucb(190),Ucb(191),Ucb(192),Ucb(193),Ucb(194),Ucb(195),
Ucb(196),Ucb(197),Ucb(198),Ucb(199),Ucb(200),Ucb(201),Ucb(202),Ucb(203),
Ucb(204),Ucb(205),Ucb(206),Ucb(207),Ucb(208),Ucb(209),Ucb(210),Ucb(211),
Ucb(212),Ucb(213),Ucb(214),Ucb(215),Ucb(216),Ucb(217),Ucb(218),Ucb(219),
Ucb(220),Ucb(221),Ucb(222),Ucb(223),Ucb(224),Ucb(225),Ucb(226),Ucb(227),
Ucb(228),Ucb(229),Ucb(230),Ucb(231),Ucb(232),Ucb(233),Ucb(234),Ucb(235),
Ucb(236),Ucb(237),Ucb(238),Ucb(239),Ucb(240),Ucb(241),Ucb(242),Ucb(243),
Ucb(244),Ucb(245),Ucb(246),Ucb(247),Ucb(248),Ucb(249),Ucb(250),Ucb(251),
Ucb(252),Ucb(253),Ucb(254),Ucb(255)};
static const int mapVcr[256] = {
Vcr(0),Vcr(1),Vcr(2),Vcr(3),Vcr(4),Vcr(5),Vcr(6),Vcr(7),Vcr(8),Vcr(9),
Vcr(10),Vcr(11),Vcr(12),Vcr(13),Vcr(14),Vcr(15),Vcr(16),Vcr(17),Vcr(18),
Vcr(19),Vcr(20),Vcr(21),Vcr(22),Vcr(23),Vcr(24),Vcr(25),Vcr(26),Vcr(27),
Vcr(28),Vcr(29),Vcr(30),Vcr(31),Vcr(32),Vcr(33),Vcr(34),Vcr(35),Vcr(36),
Vcr(37),Vcr(38),Vcr(39),Vcr(40),Vcr(41),Vcr(42),Vcr(43),Vcr(44),Vcr(45),
Vcr(46),Vcr(47),Vcr(48),Vcr(49),Vcr(50),Vcr(51),Vcr(52),Vcr(53),Vcr(54),
Vcr(55),Vcr(56),Vcr(57),Vcr(58),Vcr(59),Vcr(60),Vcr(61),Vcr(62),Vcr(63),
Vcr(64),Vcr(65),Vcr(66),Vcr(67),Vcr(68),Vcr(69),Vcr(70),Vcr(71),Vcr(72),
Vcr(73),Vcr(74),Vcr(75),Vcr(76),Vcr(77),Vcr(78),Vcr(79),Vcr(80),Vcr(81),
Vcr(82),Vcr(83),Vcr(84),Vcr(85),Vcr(86),Vcr(87),Vcr(88),Vcr(89),Vcr(90),
Vcr(91),Vcr(92),Vcr(93),Vcr(94),Vcr(95),Vcr(96),Vcr(97),Vcr(98),Vcr(99),
Vcr(100),Vcr(101),Vcr(102),Vcr(103),Vcr(104),Vcr(105),Vcr(106),Vcr(107),
Vcr(108),Vcr(109),Vcr(110),Vcr(111),Vcr(112),Vcr(113),Vcr(114),Vcr(115),
Vcr(116),Vcr(117),Vcr(118),Vcr(119),Vcr(120),Vcr(121),Vcr(122),Vcr(123),
Vcr(124),Vcr(125),Vcr(126),Vcr(127),Vcr(128),Vcr(129),Vcr(130),Vcr(131),
Vcr(132),Vcr(133),Vcr(134),Vcr(135),Vcr(136),Vcr(137),Vcr(138),Vcr(139),
Vcr(140),Vcr(141),Vcr(142),Vcr(143),Vcr(144),Vcr(145),Vcr(146),Vcr(147),
Vcr(148),Vcr(149),Vcr(150),Vcr(151),Vcr(152),Vcr(153),Vcr(154),Vcr(155),
Vcr(156),Vcr(157),Vcr(158),Vcr(159),Vcr(160),Vcr(161),Vcr(162),Vcr(163),
Vcr(164),Vcr(165),Vcr(166),Vcr(167),Vcr(168),Vcr(169),Vcr(170),Vcr(171),
Vcr(172),Vcr(173),Vcr(174),Vcr(175),Vcr(176),Vcr(177),Vcr(178),Vcr(179),
Vcr(180),Vcr(181),Vcr(182),Vcr(183),Vcr(184),Vcr(185),Vcr(186),Vcr(187),
Vcr(188),Vcr(189),Vcr(190),Vcr(191),Vcr(192),Vcr(193),Vcr(194),Vcr(195),
Vcr(196),Vcr(197),Vcr(198),Vcr(199),Vcr(200),Vcr(201),Vcr(202),Vcr(203),
Vcr(204),Vcr(205),Vcr(206),Vcr(207),Vcr(208),Vcr(209),Vcr(210),Vcr(211),
Vcr(212),Vcr(213),Vcr(214),Vcr(215),Vcr(216),Vcr(217),Vcr(218),Vcr(219),
Vcr(220),Vcr(221),Vcr(222),Vcr(223),Vcr(224),Vcr(225),Vcr(226),Vcr(227),
Vcr(228),Vcr(229),Vcr(230),Vcr(231),Vcr(232),Vcr(233),Vcr(234),Vcr(235),
Vcr(236),Vcr(237),Vcr(238),Vcr(239),Vcr(240),Vcr(241),Vcr(242),Vcr(243),
Vcr(244),Vcr(245),Vcr(246),Vcr(247),Vcr(248),Vcr(249),Vcr(250),Vcr(251),
Vcr(252),Vcr(253),Vcr(254),Vcr(255)};
static const int mapVcg[256] = {
Vcg(0),Vcg(1),Vcg(2),Vcg(3),Vcg(4),Vcg(5),Vcg(6),Vcg(7),Vcg(8),Vcg(9),
Vcg(10),Vcg(11),Vcg(12),Vcg(13),Vcg(14),Vcg(15),Vcg(16),Vcg(17),Vcg(18),
Vcg(19),Vcg(20),Vcg(21),Vcg(22),Vcg(23),Vcg(24),Vcg(25),Vcg(26),Vcg(27),
Vcg(28),Vcg(29),Vcg(30),Vcg(31),Vcg(32),Vcg(33),Vcg(34),Vcg(35),Vcg(36),
Vcg(37),Vcg(38),Vcg(39),Vcg(40),Vcg(41),Vcg(42),Vcg(43),Vcg(44),Vcg(45),
Vcg(46),Vcg(47),Vcg(48),Vcg(49),Vcg(50),Vcg(51),Vcg(52),Vcg(53),Vcg(54),
Vcg(55),Vcg(56),Vcg(57),Vcg(58),Vcg(59),Vcg(60),Vcg(61),Vcg(62),Vcg(63),
Vcg(64),Vcg(65),Vcg(66),Vcg(67),Vcg(68),Vcg(69),Vcg(70),Vcg(71),Vcg(72),
Vcg(73),Vcg(74),Vcg(75),Vcg(76),Vcg(77),Vcg(78),Vcg(79),Vcg(80),Vcg(81),
Vcg(82),Vcg(83),Vcg(84),Vcg(85),Vcg(86),Vcg(87),Vcg(88),Vcg(89),Vcg(90),
Vcg(91),Vcg(92),Vcg(93),Vcg(94),Vcg(95),Vcg(96),Vcg(97),Vcg(98),Vcg(99),
Vcg(100),Vcg(101),Vcg(102),Vcg(103),Vcg(104),Vcg(105),Vcg(106),Vcg(107),
Vcg(108),Vcg(109),Vcg(110),Vcg(111),Vcg(112),Vcg(113),Vcg(114),Vcg(115),
Vcg(116),Vcg(117),Vcg(118),Vcg(119),Vcg(120),Vcg(121),Vcg(122),Vcg(123),
Vcg(124),Vcg(125),Vcg(126),Vcg(127),Vcg(128),Vcg(129),Vcg(130),Vcg(131),
Vcg(132),Vcg(133),Vcg(134),Vcg(135),Vcg(136),Vcg(137),Vcg(138),Vcg(139),
Vcg(140),Vcg(141),Vcg(142),Vcg(143),Vcg(144),Vcg(145),Vcg(146),Vcg(147),
Vcg(148),Vcg(149),Vcg(150),Vcg(151),Vcg(152),Vcg(153),Vcg(154),Vcg(155),
Vcg(156),Vcg(157),Vcg(158),Vcg(159),Vcg(160),Vcg(161),Vcg(162),Vcg(163),
Vcg(164),Vcg(165),Vcg(166),Vcg(167),Vcg(168),Vcg(169),Vcg(170),Vcg(171),
Vcg(172),Vcg(173),Vcg(174),Vcg(175),Vcg(176),Vcg(177),Vcg(178),Vcg(179),
Vcg(180),Vcg(181),Vcg(182),Vcg(183),Vcg(184),Vcg(185),Vcg(186),Vcg(187),
Vcg(188),Vcg(189),Vcg(190),Vcg(191),Vcg(192),Vcg(193),Vcg(194),Vcg(195),
Vcg(196),Vcg(197),Vcg(198),Vcg(199),Vcg(200),Vcg(201),Vcg(202),Vcg(203),
Vcg(204),Vcg(205),Vcg(206),Vcg(207),Vcg(208),Vcg(209),Vcg(210),Vcg(211),
Vcg(212),Vcg(213),Vcg(214),Vcg(215),Vcg(216),Vcg(217),Vcg(218),Vcg(219),
Vcg(220),Vcg(221),Vcg(222),Vcg(223),Vcg(224),Vcg(225),Vcg(226),Vcg(227),
Vcg(228),Vcg(229),Vcg(230),Vcg(231),Vcg(232),Vcg(233),Vcg(234),Vcg(235),
Vcg(236),Vcg(237),Vcg(238),Vcg(239),Vcg(240),Vcg(241),Vcg(242),Vcg(243),
Vcg(244),Vcg(245),Vcg(246),Vcg(247),Vcg(248),Vcg(249),Vcg(250),Vcg(251),
Vcg(252),Vcg(253),Vcg(254),Vcg(255)};
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif
......@@ -16,7 +16,6 @@
#include <emmintrin.h> // Not currently used
#endif
#include "conversion_tables.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
......@@ -30,372 +29,6 @@ namespace libyuv {
extern "C" {
#endif
static __inline uint8 Clip(int32 val) {
if (val < 0) {
return (uint8) 0;
} else if (val > 255){
return (uint8) 255;
}
return (uint8) val;
}
// FourCC is 24BG. bgr in memory
// TODO(fbarchard): rewrite with row functions
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// TODO(fbarchard): support inversion
uint8* out = dst_frame;
uint8* out2 = out + dst_stride_frame;
int h, w;
int tmp_r, tmp_g, tmp_b;
const uint8 *y1, *y2 ,*u, *v;
y1 = src_y;
y2 = y1 + src_stride_y;
u = src_u;
v = src_v;
for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time
for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = Clip(tmp_b);
out[1] = Clip(tmp_g);
out[2] = Clip(tmp_r);
tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[3] = Clip(tmp_b);
out[4] = Clip(tmp_g);
out[5] = Clip(tmp_r);
tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = Clip(tmp_b);
out2[1] = Clip(tmp_g);
out2[2] = Clip(tmp_r);
tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[3] = Clip(tmp_b);
out2[4] = Clip(tmp_g);
out2[5] = Clip(tmp_r);
out += 6;
out2 += 6;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_stride_y - width;
y2 += 2 * src_stride_y - width;
u += src_stride_u - ((width + 1) >> 1);
v += src_stride_v - ((width + 1) >> 1);
out += dst_stride_frame;
out2 += dst_stride_frame;
}
return 0;
}
// FourCC is RAW. Same as RGB24 but r,g,b instead of b,g,r
// TODO(fbarchard): rewrite with row functions
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// RGB orientation - bottom up
// TODO(fbarchard): support inversion
uint8* out = dst_frame + dst_stride_frame * height - dst_stride_frame;
uint8* out2 = out - dst_stride_frame;
int h, w;
int tmp_r, tmp_g, tmp_b;
const uint8 *y1, *y2 ,*u, *v;
y1 = src_y;
y2 = y1 + src_stride_y;
u = src_u;
v = src_v;
for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time
for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = Clip(tmp_r);
out[1] = Clip(tmp_g);
out[2] = Clip(tmp_b);
tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[3] = Clip(tmp_r);
out[4] = Clip(tmp_g);
out[5] = Clip(tmp_b);
tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = Clip(tmp_r);
out2[1] = Clip(tmp_g);
out2[2] = Clip(tmp_b);
tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[3] = Clip(tmp_r);
out2[4] = Clip(tmp_g);
out2[5] = Clip(tmp_b);
out += 6;
out2 += 6;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += src_stride_y + src_stride_y - width;
y2 += src_stride_y + src_stride_y - width;
u += src_stride_u - ((width + 1) >> 1);
v += src_stride_v - ((width + 1) >> 1);
out -= dst_stride_frame * 3;
out2 -= dst_stride_frame * 3;
} // end height for
return 0;
}
// FourCC is R444. Little Endian...
// TODO(fbarchard): rewrite with row functions
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + dst_stride_frame * (height - 1);
uint8* out2 = out - dst_stride_frame;
int tmp_r, tmp_g, tmp_b;
const uint8 *y1,*y2, *u, *v;
y1 = src_y;
y2 = y1 + src_stride_y;
u = src_u;
v = src_v;
int h, w;
for (h = ((height + 1) >> 1); h > 0; h--) {
// 2 rows at a time, 2 y's at a time
for (w = 0; w < ((width + 1) >> 1); w++) {
// Vertical and horizontal sub-sampling
// Convert to RGB888 and re-scale to 4 bits
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] =(uint8)((Clip(tmp_g) & 0xf0) + (Clip(tmp_b) >> 4));
out[1] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
out[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
out2[1] = (uint8) (0xf0 + (Clip(tmp_r) >> 4));
tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
out2[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
out += 4;
out2 += 4;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_stride_y - width;
y2 += 2 * src_stride_y - width;
u += src_stride_u - ((width + 1) >> 1);
v += src_stride_v - ((width + 1) >> 1);
out -= (dst_stride_frame + width) * 2;
out2 -= (dst_stride_frame + width) * 2;
} // end height for
return 0;
}
// FourCC RGBP little endian rgb565
// TODO(fbarchard): rewrite with row functions
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
uint16* out2 = out - dst_stride_frame;
int tmp_r, tmp_g, tmp_b;
const uint8* y1,* y2, * u, * v;
y1 = src_y;
y2 = y1 + src_stride_y;
u = src_u;
v = src_v;
int h, w;
for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time
for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
& 0xfc) << 3) + (Clip(tmp_b) >> 3);
tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
& 0xfc) << 3) + (Clip(tmp_b ) >> 3);
tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
& 0xfc) << 3) + (Clip(tmp_b) >> 3);
tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
& 0xfc) << 3) + (Clip(tmp_b) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_stride_y - width;
y2 += 2 * src_stride_y - width;
u += src_stride_u - ((width + 1) >> 1);
v += src_stride_v - ((width + 1) >> 1);
out -= 2 * dst_stride_frame + width;
out2 -= 2 * dst_stride_frame + width;
}
return 0;
}
// FourCC RGBO little endian rgb565
// TODO(fbarchard): rewrite with row functions
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
uint16* out2 = out - dst_stride_frame ;
int32 tmp_r, tmp_g, tmp_b;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_y;
y2 = y1 + src_stride_y;
u = src_u;
v = src_v;
for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time
for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 555
// 3. Add 1 for alpha value
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_stride_y - width;
y2 += 2 * src_stride_y - width;
u += src_stride_u - ((width + 1) >> 1);
v += src_stride_v - ((width + 1) >> 1);
out -= 2 * dst_stride_frame + width;
out2 -= 2 * dst_stride_frame + width;
}
return 0;
}
// YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
......@@ -685,7 +318,7 @@ loop0:
return 0;
}
#if HAVE_NV12TORGB565
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_frame, int dst_stride_frame,
......@@ -754,6 +387,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
}
return 0;
}
#endif
// TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
......@@ -952,27 +586,37 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
src_frame = src_frame + (height - 1) * src_stride_frame;
src_stride_frame = -src_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RGB24TOYROW_SSSE3)
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
} else
#endif
{
RGB24ToARGBRow = RGB24ToARGBRow_C;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = RGB24ToYRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = RGB24ToYRow_C;
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_RGB24TOUVROW_SSSE3)
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = RGB24ToUVRow_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
......@@ -980,17 +624,20 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
}
for (int y = 0; y < (height - 1); y += 2) {
ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
RGB24ToARGBRow(src_frame, row, width);
RGB24ToARGBRow(src_frame + src_stride_frame, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
src_frame += src_stride_frame * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
RGB24ToARGBRow(src_frame, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
return 0;
}
......@@ -1005,27 +652,37 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
src_frame = src_frame + (height - 1) * src_stride_frame;
src_stride_frame = -src_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RAWTOYROW_SSSE3)
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
} else
#endif
{
RAWToARGBRow = RAWToARGBRow_C;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = RAWToYRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = RAWToYRow_C;
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_RAWTOUVROW_SSSE3)
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = RAWToUVRow_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
......@@ -1033,17 +690,20 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
}
for (int y = 0; y < (height - 1); y += 2) {
ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
RAWToARGBRow(src_frame, row, width);
RAWToARGBRow(src_frame + src_stride_frame, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
src_frame += src_stride_frame * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
RAWToARGBRow(src_frame, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
return 0;
}
......@@ -1058,27 +718,37 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
src_frame = src_frame + (height - 1) * src_stride_frame;
src_stride_frame = -src_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RGB565TOYROW_SSSE3)
#if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
} else
#endif
{
RGB565ToARGBRow = RGB565ToARGBRow_C;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = RGB565ToYRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = RGB565ToYRow_C;
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_RGB565TOUVROW_SSSE3)
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = RGB565ToUVRow_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
......@@ -1086,52 +756,65 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
}
for (int y = 0; y < (height - 1); y += 2) {
ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
RGB565ToARGBRow(src_frame, row, width);
RGB565ToARGBRow(src_frame + src_stride_frame, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
src_frame += src_stride_frame * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
RGB565ToARGBRow(src_frame, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
return 0;
}
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (height < 0) {
height = -height;
src_frame = src_frame + (height - 1) * src_stride_frame;
src_stride_frame = -src_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGB1555TOYROW_SSSE3)
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
} else
#endif
{
ARGB1555ToARGBRow = ARGB1555ToARGBRow_C;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGB1555ToYRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = ARGB1555ToYRow_C;
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_ARGB1555TOUVROW_SSSE3)
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGB1555ToUVRow_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
......@@ -1139,52 +822,65 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
}
for (int y = 0; y < (height - 1); y += 2) {
ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
ARGB1555ToARGBRow(src_frame, row, width);
ARGB1555ToARGBRow(src_frame + src_stride_frame, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
src_frame += src_stride_frame * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGB1555ToARGBRow(src_frame, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
return 0;
}
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (height < 0) {
height = -height;
src_frame = src_frame + (height - 1) * src_stride_frame;
src_stride_frame = -src_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGB4444TOYROW_SSSE3)
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
} else
#endif
{
ARGB4444ToARGBRow = ARGB4444ToARGBRow_C;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGB4444ToYRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = ARGB4444ToYRow_C;
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_ARGB4444TOUVROW_SSSE3)
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGB4444ToUVRow_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
......@@ -1192,17 +888,20 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
}
for (int y = 0; y < (height - 1); y += 2) {
ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
ARGB4444ToARGBRow(src_frame, row, width);
ARGB4444ToARGBRow(src_frame + src_stride_frame, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
src_frame += src_stride_frame * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
ARGBToYRow(src_frame, dst_y, width);
ARGB4444ToARGBRow(src_frame, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
return 0;
}
......
......@@ -1621,6 +1621,217 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I420 to RGB565.
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRGB565Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3;
} else
#endif
{
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRGB565Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB1555.
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToARGB1555Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_SSSE3;
} else
#endif
{
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToARGB1555Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB4444.
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToARGB4444Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTOARGB4444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_SSSE3;
} else
#endif
{
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToARGB4444Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRGB24Row)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_SSSE3;
} else
#endif
{
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRGB24Row(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RAW.
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*FastConvertYUVToRAWRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYUVTORAWROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_NEON;
} else
#elif defined(HAS_FASTCONVERTYUVTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_SSSE3;
} else
#endif
{
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_C;
}
for (int y = 0; y < height; ++y) {
FastConvertYUVToRAWRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......@@ -1875,31 +2086,31 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
return 0;
}
// Convert BG24 to ARGB.
int BG24ToARGB(const uint8* src_bg24, int src_stride_bg24,
// Convert RGB24 to ARGB.
int BG24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (height < 0) {
height = -height;
src_bg24 = src_bg24 + (height - 1) * src_stride_bg24;
src_stride_bg24 = -src_stride_bg24;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
void (*BG24ToARGBRow)(const uint8* src_bg24, uint8* dst_argb, int pix);
#if defined(HAS_BG24TOARGBROW_SSSE3)
void (*RGB24ToARGBRow)(const uint8* src_rgb24, uint8* dst_argb, int pix);
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_bg24, 16) && IS_ALIGNED(src_stride_bg24, 16) &&
IS_ALIGNED(src_rgb24, 16) && IS_ALIGNED(src_stride_rgb24, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
BG24ToARGBRow = BG24ToARGBRow_SSSE3;
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
} else
#endif
{
BG24ToARGBRow = BG24ToARGBRow_C;
RGB24ToARGBRow = RGB24ToARGBRow_C;
}
for (int y = 0; y < height; ++y) {
BG24ToARGBRow(src_bg24, dst_argb, width);
src_bg24 += src_stride_bg24;
RGB24ToARGBRow(src_rgb24, dst_argb, width);
src_rgb24 += src_stride_rgb24;
dst_argb += dst_stride_argb;
}
return 0;
......
......@@ -25,7 +25,7 @@
!defined(YUV_DISABLE_ASM)
#define HAS_ABGRTOARGBROW_SSSE3
#define HAS_BGRATOARGBROW_SSSE3
#define HAS_BG24TOARGBROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
......@@ -48,6 +48,11 @@
#define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUVTORGB565ROW_SSSE3
#define HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3
#define HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3
#define HAS_FASTCONVERTYUVTORGB24ROW_SSSE3
#define HAS_FASTCONVERTYUVTORAWROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_REVERSE_ROW_SSSE3
#define HAS_REVERSE_ROW_SSE2
......@@ -64,6 +69,11 @@
#define HAS_FASTCONVERTYUVTOARGBROW_NEON
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON
#define HAS_FASTCONVERTYUVTOABGRROW_NEON
#define HAS_FASTCONVERTYUVTORGB565ROW_NEON
#define HAS_FASTCONVERTYUVTOARGB1555ROW_NEON
#define HAS_FASTCONVERTYUVTOARGB4444ROW_NEON
#define HAS_FASTCONVERTYUVTORGB24ROW_NEON
#define HAS_FASTCONVERTYUVTORAWROW_NEON
#endif
#ifdef __cplusplus
......@@ -92,6 +102,41 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORGB565ROW_NEON
void FastConvertYUVToRGB565Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGB1555ROW_NEON
void FastConvertYUVToARGB1555Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGB4444ROW_NEON
void FastConvertYUVToARGB4444Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORGB24ROW_NEON
void FastConvertYUVToRGB24Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTORAWROW_NEON
void FastConvertYUVToRAWRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
......@@ -104,7 +149,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#endif
#if defined(HAS_BG24TOARGBROW_SSSE3) && defined(HAS_ARGBTOYROW_SSSE3)
#if defined(HAS_RGB24TOARGBROW_SSSE3) && defined(HAS_ARGBTOYROW_SSSE3)
#define HASRGB24TOYROW_SSSE3
#endif
#ifdef HASRGB24TOYROW_SSSE3
......@@ -163,11 +208,11 @@ void ARGB1555ToUVRow_C(const uint8* src_argb0, int src_stride_argb,
void ARGB4444ToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#ifdef HAS_BG24TOARGBROW_SSSE3
#ifdef HAS_RGB24TOARGBROW_SSSE3
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix);
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
// TODO(fbarchard): SSE2 565 etc
//void RGB565ToARGBRow_SSE2(const uint8* src_rgb, uint8* dst_argb, int pix);
//void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
......@@ -177,8 +222,8 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
#endif
void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix);
void RAWToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix);
void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
......@@ -222,6 +267,36 @@ void FastConvertYUVToABGRRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB565Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB1555Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB4444Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB24Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRAWRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -293,6 +368,35 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB1555Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGB4444Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
......
......@@ -63,17 +63,17 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix) {
}
}
void BG24ToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix) {
void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix) {
for (int x = 0; x < pix; ++x) {
uint8 b = src_bg24[0];
uint8 g = src_bg24[1];
uint8 r = src_bg24[2];
uint8 b = src_rgb24[0];
uint8 g = src_rgb24[1];
uint8 r = src_rgb24[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
dst_argb += 4;
src_bg24 += 3;
src_rgb24 += 3;
}
}
......@@ -100,7 +100,7 @@ void ARGB1555ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix) {
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 3) | (g >> 2);
dst_argb[2] = (r << 3) | (r >> 2);
dst_argb[3] = a ? 255u : 0u;
dst_argb[3] = -a;
dst_argb += 4;
src_rgb += 2;
}
......@@ -121,10 +121,71 @@ void ARGB4444ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix) {
}
}
// C versions do the same
void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix) {
for (int x = 0; x < pix; ++x) {
uint8 b = src_argb[0];
uint8 g = src_argb[1];
uint8 r = src_argb[2];
dst_rgb[0] = b;
dst_rgb[1] = g;
dst_rgb[2] = r;
dst_rgb += 3;
src_argb += 4;
}
}
void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix) {
for (int x = 0; x < pix; ++x) {
uint8 b = src_argb[0];
uint8 g = src_argb[1];
uint8 r = src_argb[2];
dst_rgb[0] = r;
dst_rgb[1] = g;
dst_rgb[2] = b;
dst_rgb += 3;
src_argb += 4;
}
}
// TODO(fbarchard): support big endian CPU
void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix) {
for (int x = 0; x < pix; ++x) {
uint8 b = src_argb[0] >> 3;
uint8 g = src_argb[1] >> 2;
uint8 r = src_argb[2] >> 3;
*reinterpret_cast<uint16*>(dst_rgb) = (r << 11) | (g << 5) | b;
dst_rgb += 2;
src_argb += 4;
}
}
void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix) {
for (int x = 0; x < pix; ++x) {
uint8 b = src_argb[0] >> 3;
uint8 g = src_argb[1] >> 3;
uint8 r = src_argb[2] >> 3;
uint8 a = src_argb[2] >> 7;
*reinterpret_cast<uint16*>(dst_rgb) = (a << 15) | (r << 10) | (g << 5) | b;
dst_rgb += 2;
src_argb += 4;
}
}
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix) {
for (int x = 0; x < pix; ++x) {
uint8 b = src_argb[0] >> 4;
uint8 g = src_argb[1] >> 4;
uint8 r = src_argb[2] >> 4;
uint8 a = src_argb[2] >> 4;
*reinterpret_cast<uint16*>(dst_rgb) = (a << 12) | (r << 8) | (g << 4) | b;
dst_rgb += 2;
src_argb += 4;
}
}
void RGB24ToYRow_C(const uint8* src_argb, uint8* dst_y, int pix) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
BG24ToARGBRow_C(src_argb, row, pix);
RGB24ToARGBRow_C(src_argb, row, pix);
ARGBToYRow_C(row, dst_y, pix);
}
......@@ -155,8 +216,8 @@ void ARGB4444ToYRow_C(const uint8* src_argb, uint8* dst_y, int pix) {
void RGB24ToUVRow_C(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix) {
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
BG24ToARGBRow_C(src_argb, row, pix);
BG24ToARGBRow_C(src_argb + src_stride_argb, row + kMaxStride, pix);
RGB24ToARGBRow_C(src_argb, row, pix);
RGB24ToARGBRow_C(src_argb + src_stride_argb, row + kMaxStride, pix);
ARGBToUVRow_C(row, kMaxStride, dst_u, dst_v, pix);
}
......@@ -242,10 +303,9 @@ MAKEROWY(BGRA,1,2,3)
MAKEROWY(ABGR,0,1,2)
#if defined(HAS_RAWTOYROW_SSSE3)
void RGB24ToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
BG24ToARGBRow_SSSE3(src_argb, row, pix);
RGB24ToARGBRow_SSSE3(src_argb, row, pix);
ARGBToYRow_SSSE3(row, dst_y, pix);
}
......@@ -284,8 +344,8 @@ void ARGB4444ToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
void RGB24ToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix) {
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
BG24ToARGBRow_SSSE3(src_argb, row, pix);
BG24ToARGBRow_SSSE3(src_argb + src_stride_argb, row + kMaxStride, pix);
RGB24ToARGBRow_SSSE3(src_argb, row, pix);
RGB24ToARGBRow_SSSE3(src_argb + src_stride_argb, row + kMaxStride, pix);
ARGBToUVRow_SSSE3(row, kMaxStride, dst_u, dst_v, pix);
}
......@@ -332,8 +392,8 @@ void ARGB4444ToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
void RGB24ToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix) {
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
BG24ToARGBRow_SSSE3(src_argb, row, pix);
BG24ToARGBRow_SSSE3(src_argb + src_stride_argb, row + kMaxStride, pix);
RGB24ToARGBRow_SSSE3(src_argb, row, pix);
RGB24ToARGBRow_SSSE3(src_argb + src_stride_argb, row + kMaxStride, pix);
ARGBToUVRow_C(row, kMaxStride, dst_u, dst_v, pix);
}
......@@ -347,6 +407,110 @@ void RAWToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
#endif
#endif
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
// TODO(fbarchard): ARGBToRGB565Row_SSSE3
void FastConvertYUVToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToRGB565Row_C(row, rgb_buf, width);
}
void FastConvertYUVToARGB1555Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToARGB1555Row_C(row, rgb_buf, width);
}
void FastConvertYUVToARGB4444Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToARGB4444Row_C(row, rgb_buf, width);
}
void FastConvertYUVToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToRGB24Row_C(row, rgb_buf, width);
}
void FastConvertYUVToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToRAWRow_C(row, rgb_buf, width);
}
#endif
#ifdef HAS_FASTCONVERTYUVTOARGBROW_NEON
// TODO(fbarchard): ARGBToRGB565Row_NEON
void FastConvertYUVToRGB565Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_NEON(y_buf, u_buf, v_buf, row, width);
ARGBToRGB565Row_C(row, rgb_buf, width);
}
void FastConvertYUVToARGB1555Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_NEON(y_buf, u_buf, v_buf, row, width);
ARGBToARGB1555Row_C(row, rgb_buf, width);
}
void FastConvertYUVToARGB4444Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_NEON(y_buf, u_buf, v_buf, row, width);
ARGBToARGB4444Row_C(row, rgb_buf, width);
}
void FastConvertYUVToRGB24Row_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_NEON(y_buf, u_buf, v_buf, row, width);
ARGBToRGB24Row_C(row, rgb_buf, width);
}
void FastConvertYUVToRAWRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_NEON(y_buf, u_buf, v_buf, row, width);
ARGBToRAWRow_C(row, rgb_buf, width);
}
#endif
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) {
// Copy a Y to RGB.
......@@ -359,7 +523,7 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) {
}
}
// C reference code that mimic the YUV assembly.
// C reference code that mimics the YUV assembly.
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
......@@ -465,6 +629,132 @@ void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
}
}
static __inline void YuvPixel16(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
int ar, int rr,
int gr, int br,
int ashift, int rshift,
int gshift, int bshift) {
int32 y1 = (static_cast<int32>(y) - 16) * YG;
uint32 a = 255u >> ar;
uint32 b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6) >> br;
uint32 g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6) >> gr;
uint32 r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6) >> rr;
*reinterpret_cast<uint16*>(rgb_buf) = (b << bshift) |
(g << gshift) |
(r << rshift) |
(a << ashift);
}
void FastConvertYUVToRGB565Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel16(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0,
8, 3, 2, 3, 0, 11, 5, 0);
YuvPixel16(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 2,
8, 3, 2, 3, 0, 11, 5, 0);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0,
8, 3, 2, 3, 0, 11, 5, 0);
}
}
void FastConvertYUVToARGB1555Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel16(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0,
7, 3, 3, 3, 15, 10, 5, 0);
YuvPixel16(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 2,
7, 3, 3, 3, 15, 10, 5, 0);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0,
7, 3, 3, 3, 15, 10, 5, 0);
}
}
void FastConvertYUVToARGB4444Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel16(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0,
4, 4, 4, 4, 12, 8, 4, 0);
YuvPixel16(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 2,
4, 4, 4, 4, 12, 8, 4, 0);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0,
4, 4, 4, 4, 12, 8, 4, 0);
}
}
static __inline void YuvPixel24(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
int roffset, int goffset, int boffset) {
int32 y1 = (static_cast<int32>(y) - 16) * YG;
uint32 b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6);
uint32 g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6);
uint32 r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6);
rgb_buf[boffset] = static_cast<uint8>(b);
rgb_buf[goffset] = static_cast<uint8>(g);
rgb_buf[roffset] = static_cast<uint8>(r);
}
void FastConvertYUVToRGB24Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel24(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 2, 1, 0);
YuvPixel24(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 3, 2, 1, 0);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel24(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 2, 1, 0);
}
}
void FastConvertYUVToRAWRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel24(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 1, 2);
YuvPixel24(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 3, 0, 1, 2);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel24(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 1, 2);
}
}
void FastConvertYToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width) {
......
......@@ -49,8 +49,8 @@ CONST uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
// Shuffle table for converting BG24 to ARGB.
CONST uvec8 kShuffleMaskBG24ToARGB = {
// Shuffle table for converting RGB24 to ARGB.
CONST uvec8 kShuffleMaskRGB24ToARGB = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
};
......@@ -143,7 +143,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
);
}
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
"pslld $0x18,%%xmm5 \n"
......@@ -172,10 +172,10 @@ void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
"lea 0x40(%1),%1 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src_bg24), // %0
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
: "m"(kShuffleMaskBG24ToARGB) // %3
: "m"(kShuffleMaskRGB24ToARGB) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
......
......@@ -65,8 +65,8 @@ static const uvec8 kAddUV128 = {
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
};
// Shuffle table for converting BG24 to ARGB.
static const uvec8 kShuffleMaskBG24ToARGB = {
// Shuffle table for converting RGB24 to ARGB.
static const uvec8 kShuffleMaskRGB24ToARGB = {
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
};
......@@ -153,14 +153,14 @@ __asm {
}
__declspec(naked)
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
__asm {
mov eax, [esp + 4] // src_bg24
mov eax, [esp + 4] // src_rgb24
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // pix
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
movdqa xmm4, kShuffleMaskBG24ToARGB
movdqa xmm4, kShuffleMaskRGB24ToARGB
convertloop:
movdqa xmm0, [eax]
......@@ -229,6 +229,7 @@ __asm {
}
}
// TODO(fbarchard): Port ARGB4444ToARGBRow_SSE2 to gcc
__declspec(naked)
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
int pix) {
......@@ -243,8 +244,8 @@ __asm {
mov ecx, [esp + 12] // pix
convertloop:
movq xmm0, qword ptr [eax] // fetch 4 pixels of bgra4444
lea eax, [eax + 8]
movdqa xmm0, qword ptr [eax] // fetch 8 pixels of bgra4444
lea eax, [eax + 16]
movdqa xmm2, xmm0
pand xmm0, xmm4 // mask low nibbles
pand xmm2, xmm5 // mask high nibbles
......@@ -254,10 +255,13 @@ __asm {
psrlw xmm3, 4
por xmm0, xmm1
por xmm2, xmm3
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
movdqa [edx], xmm0 // store 4 pixels of ARGB
lea edx, [edx + 16]
sub ecx, 4
movdqa [edx + 16], xmm1 // store next 4 pixels of ARGB
lea edx, [edx + 32]
sub ecx, 8
ja convertloop
ret
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment