Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
aeae5d53
Commit
aeae5d53
authored
Feb 18, 2003
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimize
Originally committed as revision 9455 to
svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent
64094f37
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
77 additions
and
2 deletions
+77
-2
rgb2rgb.c
postproc/rgb2rgb.c
+5
-0
rgb2rgb_template.c
postproc/rgb2rgb_template.c
+72
-2
No files found.
postproc/rgb2rgb.c
View file @
aeae5d53
...
...
@@ -28,6 +28,11 @@ static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFU
static
const
uint64_t
mask32g
__attribute__
((
aligned
(
8
)))
=
0x0000FF000000FF00ULL
;
static
const
uint64_t
mask32r
__attribute__
((
aligned
(
8
)))
=
0x00FF000000FF0000ULL
;
static
const
uint64_t
mask32
__attribute__
((
aligned
(
8
)))
=
0x00FFFFFF00FFFFFFULL
;
static
const
uint64_t
mask3216br
__attribute__
((
aligned
(
8
)))
=
0x00F800F800F800F8ULL
;
static
const
uint64_t
mask3216g
__attribute__
((
aligned
(
8
)))
=
0x0000FC000000FC00ULL
;
static
const
uint64_t
mask3215g
__attribute__
((
aligned
(
8
)))
=
0x0000F8000000F800ULL
;
static
const
uint64_t
mul3216
__attribute__
((
aligned
(
8
)))
=
0x2000000420000004ULL
;
static
const
uint64_t
mul3215
__attribute__
((
aligned
(
8
)))
=
0x2000000820000008ULL
;
static
const
uint64_t
mask24b
__attribute__
((
aligned
(
8
)))
=
0x00FF0000FF0000FFULL
;
static
const
uint64_t
mask24g
__attribute__
((
aligned
(
8
)))
=
0xFF0000FF0000FF00ULL
;
static
const
uint64_t
mask24r
__attribute__
((
aligned
(
8
)))
=
0x0000FF0000FF0000ULL
;
...
...
postproc/rgb2rgb_template.c
View file @
aeae5d53
...
...
@@ -318,12 +318,46 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
mm_end
=
end
-
15
;
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
asm
volatile
(
"movq %3, %%mm5
\n\t
"
"movq %4, %%mm6
\n\t
"
"movq %5, %%mm7
\n\t
"
".balign 16
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1)
\n\t
"
"movd (%1), %%mm0
\n\t
"
"movd 4(%1), %%mm3
\n\t
"
"punpckldq 8(%1), %%mm0
\n\t
"
"punpckldq 12(%1), %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"pand %%mm6, %%mm0
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"pmaddwd %%mm7, %%mm0
\n\t
"
"pmaddwd %%mm7, %%mm3
\n\t
"
"pand %%mm5, %%mm1
\n\t
"
"pand %%mm5, %%mm4
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"psrld $5, %%mm0
\n\t
"
"pslld $11, %%mm3
\n\t
"
"por %%mm3, %%mm0
\n\t
"
MOVNTQ
" %%mm0, (%0)
\n\t
"
"addl $16, %1
\n\t
"
"addl $8, %0
\n\t
"
"cmpl %2, %1
\n\t
"
" jb 1b
\n\t
"
:
"+r"
(
d
),
"+r"
(
s
)
:
"r"
(
mm_end
),
"m"
(
mask3216g
),
"m"
(
mask3216br
),
"m"
(
mul3216
)
);
#else
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_16mask
),
"m"
(
green_16mask
));
mm_end
=
end
-
15
;
while
(
s
<
mm_end
)
{
__asm
__volatile
(
...
...
@@ -359,6 +393,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned
d
+=
4
;
s
+=
16
;
}
#endif
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
...
...
@@ -441,12 +476,46 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
mm_end
=
end
-
15
;
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
asm
volatile
(
"movq %3, %%mm5
\n\t
"
"movq %4, %%mm6
\n\t
"
"movq %5, %%mm7
\n\t
"
".balign 16
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1)
\n\t
"
"movd (%1), %%mm0
\n\t
"
"movd 4(%1), %%mm3
\n\t
"
"punpckldq 8(%1), %%mm0
\n\t
"
"punpckldq 12(%1), %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"pand %%mm6, %%mm0
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"pmaddwd %%mm7, %%mm0
\n\t
"
"pmaddwd %%mm7, %%mm3
\n\t
"
"pand %%mm5, %%mm1
\n\t
"
"pand %%mm5, %%mm4
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"psrld $6, %%mm0
\n\t
"
"pslld $10, %%mm3
\n\t
"
"por %%mm3, %%mm0
\n\t
"
MOVNTQ
" %%mm0, (%0)
\n\t
"
"addl $16, %1
\n\t
"
"addl $8, %0
\n\t
"
"cmpl %2, %1
\n\t
"
" jb 1b
\n\t
"
:
"+r"
(
d
),
"+r"
(
s
)
:
"r"
(
mm_end
),
"m"
(
mask3215g
),
"m"
(
mask3216br
),
"m"
(
mul3215
)
);
#else
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_15mask
),
"m"
(
green_15mask
));
mm_end
=
end
-
15
;
while
(
s
<
mm_end
)
{
__asm
__volatile
(
...
...
@@ -482,6 +551,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned
d
+=
4
;
s
+=
16
;
}
#endif
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment