Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
0e730494
Commit
0e730494
authored
Oct 22, 2013
by
Daniel Kang
Committed by
Diego Biurrun
Oct 23, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter: x86: Port gradfun filter optimizations to yasm
Signed-off-by:
Diego Biurrun
<
diego@biurrun.de
>
parent
2c993e8b
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
207 additions
and
218 deletions
+207
-218
Makefile
libavfilter/x86/Makefile
+2
-1
vf_gradfun.asm
libavfilter/x86/vf_gradfun.asm
+110
-0
vf_gradfun.c
libavfilter/x86/vf_gradfun.c
+0
-217
vf_gradfun_init.c
libavfilter/x86/vf_gradfun_init.c
+95
-0
No files found.
libavfilter/x86/Makefile
View file @
0e730494
OBJS-$(CONFIG_GRADFUN_FILTER)
+=
x86/vf_gradfun.o
OBJS-$(CONFIG_GRADFUN_FILTER)
+=
x86/vf_gradfun
_init
.o
OBJS-$(CONFIG_HQDN3D_FILTER)
+=
x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume_init.o
OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_GRADFUN_FILTER)
+=
x86/vf_gradfun.o
YASM-OBJS-$(CONFIG_HQDN3D_FILTER)
+=
x86/vf_hqdn3d.o
YASM-OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume.o
YASM-OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif.o
libavfilter/x86/vf_gradfun.asm
0 → 100644
View file @
0e730494
;******************************************************************************
;* x86-optimized functions for gradfun filter
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
pw_7f
:
times
8
dw
0x7F
pw_ff
:
times
8
dw
0xFF
SECTION
.
text
%macro
FILTER_LINE
1
movh
m0
,
[
r2
+
r0
]
movh
m1
,
[
r3
+
r0
]
punpcklbw
m0
,
m7
punpcklwd
m1
,
m1
psllw
m0
,
7
psubw
m1
,
m0
PABSW
m2
,
m1
pmulhuw
m2
,
m5
psubw
m2
,
m6
pminsw
m2
,
m7
pmullw
m2
,
m2
psllw
m1
,
2
paddw
m0
,
%1
pmulhw
m1
,
m2
paddw
m0
,
m1
psraw
m0
,
7
packuswb
m0
,
m0
movh
[
r1
+
r0
]
,
m0
%endmacro
INIT_MMX
mmxext
cglobal
gradfun_filter_line
,
6
,
6
movh
m5
,
r4d
pxor
m7
,
m7
pshufw
m5
,
m5
,
0
mova
m6
,
[
pw_7f
]
mova
m3
,
[r5]
mova
m4
,
[
r5
+
8
]
.
loop
:
FILTER_LINE
m3
add
r0
,
4
jge
.
end
FILTER_LINE
m4
add
r0
,
4
jl
.
loop
.
end
:
REP_RET
INIT_XMM
ssse3
cglobal
gradfun_filter_line
,
6
,
6
,
8
movd
m5
,
r4d
pxor
m7
,
m7
pshuflw
m5
,
m5
,
0
mova
m6
,
[
pw_7f
]
punpcklqdq
m5
,
m5
mova
m4
,
[r5]
.
loop
:
FILTER_LINE
m4
add
r0
,
8
jl
.
loop
REP_RET
%macro
BLUR_LINE
1
cglobal
gradfun_blur_line_
%1
,
6
,
6
,
8
mova
m7
,
[
pw_ff
]
.
loop
:
%1
m0
,
[
r4
+
r0
]
%1
m1
,
[
r5
+
r0
]
mova
m2
,
m0
mova
m3
,
m1
psrlw
m0
,
8
psrlw
m1
,
8
pand
m2
,
m7
pand
m3
,
m7
paddw
m0
,
m1
paddw
m2
,
m3
paddw
m0
,
m2
paddw
m0
,
[
r2
+
r0
]
mova
m1
,
[
r1
+
r0
]
mova
[
r1
+
r0
]
,
m0
psubw
m0
,
m1
mova
[
r3
+
r0
]
,
m0
add
r0
,
16
jl
.
loop
REP_RET
%endmacro
INIT_XMM
sse2
BLUR_LINE
movdqa
BLUR_LINE
movdqu
libavfilter/x86/vf_gradfun.c
deleted
100644 → 0
View file @
2c993e8b
/*
* Copyright (C) 2009 Loren Merritt <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavfilter/gradfun.h"
#if HAVE_INLINE_ASM
DECLARE_ALIGNED
(
16
,
static
const
uint16_t
,
pw_7f
)[
8
]
=
{
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
};
DECLARE_ALIGNED
(
16
,
static
const
uint16_t
,
pw_ff
)[
8
]
=
{
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
};
#if HAVE_MMXEXT_INLINE
static
void
gradfun_filter_line_mmxext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
3
)
{
x
=
width
&
~
3
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
__asm__
volatile
(
"movd %4, %%mm5
\n
"
"pxor %%mm7, %%mm7
\n
"
"pshufw $0, %%mm5, %%mm5
\n
"
"movq %6, %%mm6
\n
"
"movq (%5), %%mm3
\n
"
"movq 8(%5), %%mm4
\n
"
"1:
\n
"
"movd (%2,%0), %%mm0
\n
"
"movd (%3,%0), %%mm1
\n
"
"punpcklbw %%mm7, %%mm0
\n
"
"punpcklwd %%mm1, %%mm1
\n
"
"psllw $7, %%mm0
\n
"
"pxor %%mm2, %%mm2
\n
"
"psubw %%mm0, %%mm1
\n
"
// delta = dc - pix
"psubw %%mm1, %%mm2
\n
"
"pmaxsw %%mm1, %%mm2
\n
"
"pmulhuw %%mm5, %%mm2
\n
"
// m = abs(delta) * thresh >> 16
"psubw %%mm6, %%mm2
\n
"
"pminsw %%mm7, %%mm2
\n
"
// m = -max(0, 127-m)
"pmullw %%mm2, %%mm2
\n
"
"paddw %%mm3, %%mm0
\n
"
// pix += dither
"psllw $2, %%mm1
\n
"
// m = m*m*delta >> 14
"pmulhw %%mm2, %%mm1
\n
"
"paddw %%mm1, %%mm0
\n
"
// pix += m
"psraw $7, %%mm0
\n
"
"packuswb %%mm0, %%mm0
\n
"
"movd %%mm0, (%1,%0)
\n
"
// dst = clip(pix>>7)
"add $4, %0
\n
"
"jnl 2f
\n
"
"movd (%2,%0), %%mm0
\n
"
"movd (%3,%0), %%mm1
\n
"
"punpcklbw %%mm7, %%mm0
\n
"
"punpcklwd %%mm1, %%mm1
\n
"
"psllw $7, %%mm0
\n
"
"pxor %%mm2, %%mm2
\n
"
"psubw %%mm0, %%mm1
\n
"
// delta = dc - pix
"psubw %%mm1, %%mm2
\n
"
"pmaxsw %%mm1, %%mm2
\n
"
"pmulhuw %%mm5, %%mm2
\n
"
// m = abs(delta) * thresh >> 16
"psubw %%mm6, %%mm2
\n
"
"pminsw %%mm7, %%mm2
\n
"
// m = -max(0, 127-m)
"pmullw %%mm2, %%mm2
\n
"
"paddw %%mm4, %%mm0
\n
"
// pix += dither
"psllw $2, %%mm1
\n
"
// m = m*m*delta >> 14
"pmulhw %%mm2, %%mm1
\n
"
"paddw %%mm1, %%mm0
\n
"
// pix += m
"psraw $7, %%mm0
\n
"
"packuswb %%mm0, %%mm0
\n
"
"movd %%mm0, (%1,%0)
\n
"
// dst = clip(pix>>7)
"add $4, %0
\n
"
"jl 1b
\n
"
"2:
\n
"
"emms
\n
"
:
"+r"
(
x
)
:
"r"
(
dst
+
width
),
"r"
(
src
+
width
),
"r"
(
dc
+
width
/
2
),
"rm"
(
thresh
),
"r"
(
dithers
),
"m"
(
*
pw_7f
)
:
"memory"
);
}
#endif
#if HAVE_SSSE3_INLINE
static
void
gradfun_filter_line_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
7
)
{
// could be 10% faster if I somehow eliminated this
x
=
width
&
~
7
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
__asm__
volatile
(
"movd %4, %%xmm5
\n
"
"pxor %%xmm7, %%xmm7
\n
"
"pshuflw $0,%%xmm5, %%xmm5
\n
"
"movdqa %6, %%xmm6
\n
"
"punpcklqdq %%xmm5, %%xmm5
\n
"
"movdqa %5, %%xmm4
\n
"
"1:
\n
"
"movq (%2,%0), %%xmm0
\n
"
"movq (%3,%0), %%xmm1
\n
"
"punpcklbw %%xmm7, %%xmm0
\n
"
"punpcklwd %%xmm1, %%xmm1
\n
"
"psllw $7, %%xmm0
\n
"
"psubw %%xmm0, %%xmm1
\n
"
// delta = dc - pix
"pabsw %%xmm1, %%xmm2
\n
"
"pmulhuw %%xmm5, %%xmm2
\n
"
// m = abs(delta) * thresh >> 16
"psubw %%xmm6, %%xmm2
\n
"
"pminsw %%xmm7, %%xmm2
\n
"
// m = -max(0, 127-m)
"pmullw %%xmm2, %%xmm2
\n
"
"psllw $2, %%xmm1
\n
"
"paddw %%xmm4, %%xmm0
\n
"
// pix += dither
"pmulhw %%xmm2, %%xmm1
\n
"
// m = m*m*delta >> 14
"paddw %%xmm1, %%xmm0
\n
"
// pix += m
"psraw $7, %%xmm0
\n
"
"packuswb %%xmm0, %%xmm0
\n
"
"movq %%xmm0, (%1,%0)
\n
"
// dst = clip(pix>>7)
"add $8, %0
\n
"
"jl 1b
\n
"
:
"+&r"
(
x
)
:
"r"
(
dst
+
width
),
"r"
(
src
+
width
),
"r"
(
dc
+
width
/
2
),
"rm"
(
thresh
),
"m"
(
*
dithers
),
"m"
(
*
pw_7f
)
:
"memory"
);
}
#endif
/* HAVE_SSSE3_INLINE */
#if HAVE_SSE2_INLINE
static
void
gradfun_blur_line_sse2
(
uint16_t
*
dc
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint8_t
*
src
,
int
src_linesize
,
int
width
)
{
#define BLURV(load)\
intptr_t x = -2*width;\
__asm__ volatile(\
"movdqa %6, %%xmm7 \n"\
"1: \n"\
load" (%4,%0), %%xmm0 \n"\
load" (%5,%0), %%xmm1 \n"\
"movdqa %%xmm0, %%xmm2 \n"\
"movdqa %%xmm1, %%xmm3 \n"\
"psrlw $8, %%xmm0 \n"\
"psrlw $8, %%xmm1 \n"\
"pand %%xmm7, %%xmm2 \n"\
"pand %%xmm7, %%xmm3 \n"\
"paddw %%xmm1, %%xmm0 \n"\
"paddw %%xmm3, %%xmm2 \n"\
"paddw %%xmm2, %%xmm0 \n"\
"paddw (%2,%0), %%xmm0 \n"\
"movdqa (%1,%0), %%xmm1 \n"\
"movdqa %%xmm0, (%1,%0) \n"\
"psubw %%xmm1, %%xmm0 \n"\
"movdqa %%xmm0, (%3,%0) \n"\
"add $16, %0 \n"\
"jl 1b \n"\
:"+&r"(x)\
:"r"(buf+width),\
"r"(buf1+width),\
"r"(dc+width),\
"r"(src+width*2),\
"r"(src+width*2+src_linesize),\
"m"(*pw_ff)\
:"memory"\
);
if
(((
intptr_t
)
src
|
src_linesize
)
&
15
)
{
BLURV
(
"movdqu"
);
}
else
{
BLURV
(
"movdqa"
);
}
}
#endif
/* HAVE_SSE2_INLINE */
#endif
/* HAVE_INLINE_ASM */
av_cold
void
ff_gradfun_init_x86
(
GradFunContext
*
gf
)
{
#if HAVE_MMXEXT_INLINE
int
cpu_flags
=
av_get_cpu_flags
();
if
(
cpu_flags
&
AV_CPU_FLAG_MMXEXT
)
gf
->
filter_line
=
gradfun_filter_line_mmxext
;
#endif
#if HAVE_SSSE3_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
gf
->
filter_line
=
gradfun_filter_line_ssse3
;
#endif
#if HAVE_SSE2_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_SSE2
)
gf
->
blur_line
=
gradfun_blur_line_sse2
;
#endif
}
libavfilter/x86/vf_gradfun_init.c
0 → 100644
View file @
0e730494
/*
* Copyright (C) 2009 Loren Merritt <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/gradfun.h"
#if HAVE_YASM
void
ff_gradfun_filter_line_mmxext
(
intptr_t
x
,
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
thresh
,
const
uint16_t
*
dithers
);
static
void
gradfun_filter_line_mmxext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
3
)
{
x
=
width
&
~
3
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
ff_gradfun_filter_line_mmxext
(
x
,
dst
+
width
,
src
+
width
,
dc
+
width
/
2
,
thresh
,
dithers
);
}
void
ff_gradfun_filter_line_ssse3
(
intptr_t
x
,
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
thresh
,
const
uint16_t
*
dithers
);
static
void
gradfun_filter_line_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
7
)
{
// could be 10% faster if I somehow eliminated this
x
=
width
&
~
7
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
ff_gradfun_filter_line_ssse3
(
x
,
dst
+
width
,
src
+
width
,
dc
+
width
/
2
,
thresh
,
dithers
);
}
void
ff_gradfun_blur_line_movdqa_sse2
(
intptr_t
x
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint16_t
*
dc
,
uint8_t
*
src1
,
uint8_t
*
src2
);
void
ff_gradfun_blur_line_movdqu_sse2
(
intptr_t
x
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint16_t
*
dc
,
uint8_t
*
src1
,
uint8_t
*
src2
);
static
void
gradfun_blur_line_sse2
(
uint16_t
*
dc
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint8_t
*
src
,
int
src_linesize
,
int
width
)
{
intptr_t
x
=
-
2
*
width
;
if
(((
intptr_t
)
src
|
src_linesize
)
&
15
)
{
ff_gradfun_blur_line_movdqu_sse2
(
x
,
buf
+
width
,
buf1
+
width
,
dc
+
width
,
src
+
width
*
2
,
src
+
width
*
2
+
src_linesize
);
}
else
{
ff_gradfun_blur_line_movdqa_sse2
(
x
,
buf
+
width
,
buf1
+
width
,
dc
+
width
,
src
+
width
*
2
,
src
+
width
*
2
+
src_linesize
);
}
}
#endif
/* HAVE_YASM */
av_cold
void
ff_gradfun_init_x86
(
GradFunContext
*
gf
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
gf
->
filter_line
=
gradfun_filter_line_mmxext
;
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
gf
->
filter_line
=
gradfun_filter_line_ssse3
;
if
(
EXTERNAL_SSE2
(
cpu_flags
))
gf
->
blur_line
=
gradfun_blur_line_sse2
;
#endif
/* HAVE_YASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment