Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
9eb3da2f
Commit
9eb3da2f
authored
Jun 27, 2016
by
Matthieu Bouron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
asm: FF_-prefix internal macros used in inline assembly
See merge commit '
39d6d361
'.
parent
39d6d361
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1074 additions
and
1074 deletions
+1074
-1074
cabac.h
libavcodec/x86/cabac.h
+10
-10
h264_i386.h
libavcodec/x86/h264_i386.h
+6
-6
hpeldsp_rnd_template.c
libavcodec/x86/hpeldsp_rnd_template.c
+28
-28
me_cmp_init.c
libavcodec/x86/me_cmp_init.c
+22
-22
mpegvideo.c
libavcodec/x86/mpegvideo.c
+44
-44
mpegvideoenc_template.c
libavcodec/x86/mpegvideoenc_template.c
+18
-18
rnd_template.c
libavcodec/x86/rnd_template.c
+22
-22
snowdsp.c
libavcodec/x86/snowdsp.c
+88
-88
vc1dsp_mmx.c
libavcodec/x86/vc1dsp_mmx.c
+3
-3
vf_noise.c
libavfilter/x86/vf_noise.c
+20
-20
asm.h
libavutil/x86/asm.h
+33
-33
cpu.c
libavutil/x86/cpu.c
+2
-2
postprocess_template.c
libpostproc/postprocess_template.c
+316
-316
hscale_fast_bilinear_simd.c
libswscale/x86/hscale_fast_bilinear_simd.c
+58
-58
rgb2rgb_template.c
libswscale/x86/rgb2rgb_template.c
+182
-182
swscale.c
libswscale/x86/swscale.c
+15
-15
swscale_template.c
libswscale/x86/swscale_template.c
+207
-207
No files found.
libavcodec/x86/cabac.h
View file @
9eb3da2f
...
...
@@ -45,7 +45,7 @@
#define END_CHECK(end) ""
#else
#define END_CHECK(end) \
"cmp "end" , %%"
REG_c"
\n\t"\
"cmp "end" , %%"
FF_REG_c"
\n\t"\
"jge 1f \n\t"
#endif
...
...
@@ -92,11 +92,11 @@
"mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\
"jnz 2f \n\t"\
"mov "byte" , %%"
REG_c"
\n\t"\
"mov "byte" , %%"
FF_REG_c"
\n\t"\
END_CHECK(end)\
"add"
OPSIZE" $2
, "byte" \n\t"\
"add"
FF_OPSIZE" $2
, "byte" \n\t"\
"1: \n\t"\
"movzwl (%%"
REG_c") , "tmp"
\n\t"\
"movzwl (%%"
FF_REG_c") , "tmp"
\n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
...
...
@@ -153,11 +153,11 @@
"mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\
" jnz 2f \n\t"\
"mov "byte" , %%"
REG_c"
\n\t"\
"mov "byte" , %%"
FF_REG_c"
\n\t"\
END_CHECK(end)\
"add"
OPSIZE" $2
, "byte" \n\t"\
"add"
FF_OPSIZE" $2
, "byte" \n\t"\
"1: \n\t"\
"movzwl (%%"
REG_c") , "tmp"
\n\t"\
"movzwl (%%"
FF_REG_c") , "tmp"
\n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
...
...
@@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
"i"
(
offsetof
(
CABACContext
,
bytestream_end
))
TABLES_ARG
,
"1"
(
c
->
low
),
"2"
(
c
->
range
)
:
"%"
REG_c
,
"memory"
:
"%"
FF_
REG_c
,
"memory"
);
return
bit
&
1
;
}
...
...
@@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
"addl %%edx, %%eax
\n\t
"
"cmp %c5(%2), %1
\n\t
"
"jge 1f
\n\t
"
"add"
OPSIZE
"
$2, %c4(%2)
\n\t
"
"add"
FF_OPSIZE
"
$2, %c4(%2)
\n\t
"
#endif
"1:
\n\t
"
"movl %%eax, %c3(%2)
\n\t
"
...
...
@@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
"addl %%ecx, %%eax
\n\t
"
"cmp %c5(%2), %1
\n\t
"
"jge 1f
\n\t
"
"add"
OPSIZE
"
$2, %c4(%2)
\n\t
"
"add"
FF_OPSIZE
"
$2, %c4(%2)
\n\t
"
"1:
\n\t
"
"movl %%eax, %c3(%2)
\n\t
"
...
...
libavcodec/x86/h264_i386.h
View file @
9eb3da2f
...
...
@@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"sub %10, %1
\n\t
"
"mov %2, %0
\n\t
"
"movl %7, %%ecx
\n\t
"
"add %1, %%"
REG_c
"
\n\t
"
"add %1, %%"
FF_REG_c
"
\n\t
"
"movl %%ecx, (%0)
\n\t
"
"test $1, %4
\n\t
"
" jnz 5f
\n\t
"
"add"
OPSIZE
" $4, %2
\n\t
"
"add"
FF_OPSIZE
" $4, %2
\n\t
"
"4:
\n\t
"
"add $1, %1
\n\t
"
...
...
@@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
" jb 3b
\n\t
"
"mov %2, %0
\n\t
"
"movl %7, %%ecx
\n\t
"
"add %1, %%"
REG_c
"
\n\t
"
"add %1, %%"
FF_REG_c
"
\n\t
"
"movl %%ecx, (%0)
\n\t
"
"5:
\n\t
"
"add %9, %k0
\n\t
"
...
...
@@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"i"
(
offsetof
(
CABACContext
,
bytestream
)),
"i"
(
offsetof
(
CABACContext
,
bytestream_end
))
TABLES_ARG
:
"%"
REG_c
,
"memory"
:
"%"
FF_
REG_c
,
"memory"
);
return
coeff_count
;
}
...
...
@@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"test $1, %4
\n\t
"
" jnz 5f
\n\t
"
"add"
OPSIZE
" $4, %2
\n\t
"
"add"
FF_OPSIZE
" $4, %2
\n\t
"
"4:
\n\t
"
"add $1, %6
\n\t
"
...
...
@@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"i"
(
offsetof
(
CABACContext
,
bytestream
)),
"i"
(
offsetof
(
CABACContext
,
bytestream_end
)),
"i"
(
H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET
)
TABLES_ARG
:
"%"
REG_c
,
"memory"
:
"%"
FF_
REG_c
,
"memory"
);
return
coeff_count
;
}
...
...
libavcodec/x86/hpeldsp_rnd_template.c
View file @
9eb3da2f
...
...
@@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea (%3, %3), %%"
REG_a
"
\n\t
"
"lea (%3, %3), %%"
FF_REG_a
"
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq 1(%1), %%mm1
\n\t
"
"movq (%1, %3), %%mm2
\n\t
"
...
...
@@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
av_unused
static
void
DEF
(
put
,
pixels16_x2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea
(%3, %3), %%"
REG_a
"
\n\t
"
"lea
(%3, %3), %%"
FF_REG_a
"
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, 8(%2)
\n\t
"
"movq %%mm5, 8(%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq 1(%1), %%mm1
\n\t
"
"movq (%1, %3), %%mm2
\n\t
"
...
...
@@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, 8(%2)
\n\t
"
"movq %%mm5, 8(%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
av_unused
static
void
DEF
(
put
,
pixels8_y2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea (%3, %3), %%"
REG_a
"
\n\t
"
"lea (%3, %3), %%"
FF_REG_a
"
\n\t
"
"movq (%1), %%mm0
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"),%%mm2
\n\t
"
"movq (%1, %%"
FF_REG_a
"),%%mm2
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm0
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"),%%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"),%%mm0
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm1
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
av_unused
static
void
DEF
(
avg
,
pixels16_x2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
...
...
@@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea (%3, %3), %%"
REG_a
"
\n\t
"
"lea (%3, %3), %%"
FF_REG_a
"
\n\t
"
"movq (%1), %%mm0
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm2
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm0
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm5
)
"movq (%2), %%mm3
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm6
)
...
...
@@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
PAVGB_MMX
(
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
)
"movq %%mm0, (%2)
\n\t
"
"movq %%mm1, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm1
,
%%
mm5
)
"movq (%2), %%mm3
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm4
,
%%
mm2
,
%%
mm6
)
...
...
@@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
PAVGB_MMX
(
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
)
"movq %%mm2, (%2)
\n\t
"
"movq %%mm1, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
libavcodec/x86/me_cmp_init.c
View file @
9eb3da2f
...
...
@@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm4
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm4
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
"psubusb %%mm0, %%mm2
\n\t
"
"psubusb %%mm4, %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm5
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm5, %%mm1
\n\t
"
"por %%mm2, %%mm0
\n\t
"
...
...
@@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
(
stride
));
...
...
@@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddw %%mm0, %%mm1
\n\t
"
"paddw %%mm2, %%mm3
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm2
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"paddw %%mm5, %%mm3
\n\t
"
"psrlw $1, %%mm1
\n\t
"
...
...
@@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
"punpckhbw %%mm7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
"add %4, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
...
...
@@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
{
x86_reg
len
=
-
stride
*
h
;
__asm__
volatile
(
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
...
...
@@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1
\n\t
"
"paddw %%mm5, %%mm0
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm5
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
...
...
@@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm4, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"movq %%mm3, %%mm1
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
"add %4, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
...
...
libavcodec/x86/mpegvideo.c
View file @
9eb3da2f
...
...
@@ -188,13 +188,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -209,8 +209,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1
\n\t
"
// abs(block[i])*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psraw $3, %%mm0
\n\t
"
"psraw $3, %%mm1
\n\t
"
"psubw %%mm7, %%mm0
\n\t
"
...
...
@@ -223,13 +223,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1
\n\t
"
"pandn %%mm0, %%mm4
\n\t
"
"pandn %%mm1, %%mm5
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"js 1b
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"g"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
block
[
0
]
=
block0
;
}
...
...
@@ -251,13 +251,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -276,8 +276,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1
\n\t
"
// (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psraw $4, %%mm0
\n\t
"
"psraw $4, %%mm1
\n\t
"
"psubw %%mm7, %%mm0
\n\t
"
...
...
@@ -290,13 +290,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1
\n\t
"
"pandn %%mm0, %%mm4
\n\t
"
"pandn %%mm1, %%mm5
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"js 1b
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"g"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
}
...
...
@@ -326,13 +326,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -347,8 +347,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1
\n\t
"
// abs(block[i])*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psraw $4, %%mm0
\n\t
"
"psraw $4, %%mm1
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
...
...
@@ -357,13 +357,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1
\n\t
"
"pandn %%mm0, %%mm4
\n\t
"
"pandn %%mm1, %%mm5
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"jng 1b
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"g"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
block
[
0
]
=
block0
;
//Note, we do not do mismatch control for intra as errors cannot accumulate
...
...
@@ -390,13 +390,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -415,8 +415,8 @@ __asm__ volatile(
"paddw %%mm5, %%mm1
\n\t
"
// (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psrlw $5, %%mm0
\n\t
"
"psrlw $5, %%mm1
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
...
...
@@ -427,10 +427,10 @@ __asm__ volatile(
"pandn %%mm1, %%mm5
\n\t
"
"pxor %%mm4, %%mm7
\n\t
"
"pxor %%mm5, %%mm7
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"jng 1b
\n\t
"
"movd 124(%0, %3), %%mm0
\n\t
"
"movq %%mm7, %%mm6
\n\t
"
...
...
@@ -445,7 +445,7 @@ __asm__ volatile(
"movd %%mm0, 124(%0, %3)
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"r"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
}
...
...
libavcodec/x86/mpegvideoenc_template.c
View file @
9eb3da2f
...
...
@@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if
((
s
->
out_format
==
FMT_H263
||
s
->
out_format
==
FMT_H261
)
&&
s
->
mpeg_quant
==
0
){
__asm__
volatile
(
"movd %%"
REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
"movd %%"
FF_REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
SPREADW
(
MM
"3"
)
"pxor "
MM
"7, "
MM
"7
\n\t
"
// 0
"pxor "
MM
"4, "
MM
"4
\n\t
"
// 0
MOVQ
" (%2), "
MM
"5
\n\t
"
// qmat[0]
"pxor "
MM
"6, "
MM
"6
\n\t
"
"psubw (%3), "
MM
"6
\n\t
"
// -bias[0]
"mov $-128, %%"
REG_a
"
\n\t
"
"mov $-128, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
MOVQ
" (%1, %%"
REG_a
"), "
MM
"0
\n\t
"
// block[i]
MOVQ
" (%1, %%"
FF_REG_a
"), "
MM
"0
\n\t
"
// block[i]
SAVE_SIGN
(
MM
"1"
,
MM
"0"
)
// ABS(block[i])
"psubusw "
MM
"6, "
MM
"0
\n\t
"
// ABS(block[i]) + bias[0]
"pmulhw "
MM
"5, "
MM
"0
\n\t
"
// (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por "
MM
"0, "
MM
"4
\n\t
"
RESTORE_SIGN
(
MM
"1"
,
MM
"0"
)
// out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ
" "
MM
"0, (%5, %%"
REG_a
")
\n\t
"
MOVQ
" "
MM
"0, (%5, %%"
FF_REG_a
")
\n\t
"
"pcmpeqw "
MM
"7, "
MM
"0
\n\t
"
// out==0 ? 0xFF : 0x00
MOVQ
" (%4, %%"
REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
REG_a
")
\n\t
"
// 0
MOVQ
" (%4, %%"
FF_REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
FF_REG_a
")
\n\t
"
// 0
"pandn "
MM
"1, "
MM
"0
\n\t
"
PMAXW
(
MM
"0"
,
MM
"3"
)
"add $"
MMREG_WIDTH
", %%"
REG_a
"
\n\t
"
"add $"
MMREG_WIDTH
", %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
PMAX
(
MM
"3"
,
MM
"0"
)
"movd "
MM
"3, %%"
REG_a
"
\n\t
"
"movd "
MM
"3, %%"
FF_REG_a
"
\n\t
"
"movzbl %%al, %%eax
\n\t
"
// last_non_zero_p1
:
"+a"
(
last_non_zero_p1
)
:
"r"
(
block
+
64
),
"r"
(
qmat
),
"r"
(
bias
),
...
...
@@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
);
}
else
{
// FMT_H263
__asm__
volatile
(
"movd %%"
REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
"movd %%"
FF_REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
SPREADW
(
MM
"3"
)
"pxor "
MM
"7, "
MM
"7
\n\t
"
// 0
"pxor "
MM
"4, "
MM
"4
\n\t
"
// 0
"mov $-128, %%"
REG_a
"
\n\t
"
"mov $-128, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
MOVQ
" (%1, %%"
REG_a
"), "
MM
"0
\n\t
"
// block[i]
MOVQ
" (%1, %%"
FF_REG_a
"), "
MM
"0
\n\t
"
// block[i]
SAVE_SIGN
(
MM
"1"
,
MM
"0"
)
// ABS(block[i])
MOVQ
" (%3, %%"
REG_a
"), "
MM
"6
\n\t
"
// bias[0]
MOVQ
" (%3, %%"
FF_REG_a
"), "
MM
"6
\n\t
"
// bias[0]
"paddusw "
MM
"6, "
MM
"0
\n\t
"
// ABS(block[i]) + bias[0]
MOVQ
" (%2, %%"
REG_a
"), "
MM
"5
\n\t
"
// qmat[i]
MOVQ
" (%2, %%"
FF_REG_a
"), "
MM
"5
\n\t
"
// qmat[i]
"pmulhw "
MM
"5, "
MM
"0
\n\t
"
// (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por "
MM
"0, "
MM
"4
\n\t
"
RESTORE_SIGN
(
MM
"1"
,
MM
"0"
)
// out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ
" "
MM
"0, (%5, %%"
REG_a
")
\n\t
"
MOVQ
" "
MM
"0, (%5, %%"
FF_REG_a
")
\n\t
"
"pcmpeqw "
MM
"7, "
MM
"0
\n\t
"
// out==0 ? 0xFF : 0x00
MOVQ
" (%4, %%"
REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
REG_a
")
\n\t
"
// 0
MOVQ
" (%4, %%"
FF_REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
FF_REG_a
")
\n\t
"
// 0
"pandn "
MM
"1, "
MM
"0
\n\t
"
PMAXW
(
MM
"0"
,
MM
"3"
)
"add $"
MMREG_WIDTH
", %%"
REG_a
"
\n\t
"
"add $"
MMREG_WIDTH
", %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
PMAX
(
MM
"3"
,
MM
"0"
)
"movd "
MM
"3, %%"
REG_a
"
\n\t
"
"movd "
MM
"3, %%"
FF_REG_a
"
\n\t
"
"movzbl %%al, %%eax
\n\t
"
// last_non_zero_p1
:
"+a"
(
last_non_zero_p1
)
:
"r"
(
block
+
64
),
"r"
(
qmat
+
64
),
"r"
(
bias
+
64
),
...
...
libavcodec/x86/rnd_template.c
View file @
9eb3da2f
...
...
@@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"add %3, %1
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
...
...
@@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"psrlw $2, %%mm4
\n\t
"
"psrlw $2, %%mm5
\n\t
"
"packuswb %%mm5, %%mm4
\n\t
"
"movq %%mm4, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm4, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_
REG_a
"
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm0, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
"subl $2, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
)
:
"D"
(
block
),
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
// avg_pixels
...
...
@@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"add %3, %1
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
...
...
@@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"paddusw %%mm1, %%mm5
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"psrlw $2, %%mm5
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
FF_
REG_a
"), %%mm3
\n\t
"
"packuswb %%mm5, %%mm4
\n\t
"
"pcmpeqd %%mm2, %%mm2
\n\t
"
"paddb %%mm2, %%mm2
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm2
)
"movq %%mm5, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm5, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"paddusw %%mm5, %%mm1
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
FF_
REG_a
"), %%mm3
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"pcmpeqd %%mm2, %%mm2
\n\t
"
"paddb %%mm2, %%mm2
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm0
,
%%
mm1
,
%%
mm2
)
"movq %%mm1, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm1, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_
REG_a
"
\n\t
"
"subl $2, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
)
:
"D"
(
block
),
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
libavcodec/x86/snowdsp.c
View file @
9eb3da2f
...
...
@@ -390,10 +390,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
#if HAVE_7REGS
#define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
""op" ("r",%%"REG_d"), %%"t0" \n\t"\
""op" 16("r",%%"REG_d"), %%"t1" \n\t"\
""op" 32("r",%%"REG_d"), %%"t2" \n\t"\
""op" 48("r",%%"REG_d"), %%"t3" \n\t"
""op" ("r",%%"
FF_
REG_d"), %%"t0" \n\t"\
""op" 16("r",%%"
FF_
REG_d"), %%"t1" \n\t"\
""op" 32("r",%%"
FF_
REG_d"), %%"t2" \n\t"\
""op" 48("r",%%"
FF_
REG_d"), %%"t3" \n\t"
#define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
...
...
@@ -408,10 +408,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
"psubw %%"s3", %%"t3" \n\t"
#define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
"movdqa %%"s0", ("w",%%"
REG_d")
\n\t"\
"movdqa %%"s1", 16("w",%%"
REG_d")
\n\t"\
"movdqa %%"s2", 32("w",%%"
REG_d")
\n\t"\
"movdqa %%"s3", 48("w",%%"
REG_d")
\n\t"
"movdqa %%"s0", ("w",%%"
FF_REG_d")
\n\t"\
"movdqa %%"s1", 16("w",%%"
FF_REG_d")
\n\t"\
"movdqa %%"s2", 32("w",%%"
FF_REG_d")
\n\t"\
"movdqa %%"s3", 48("w",%%"
FF_REG_d")
\n\t"
#define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
"psraw $"n", %%"t0" \n\t"\
...
...
@@ -477,14 +477,14 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
"psrlw $13, %%xmm5
\n\t
"
"paddw %%xmm7, %%xmm5
\n\t
"
snow_vertical_compose_r2r_add
(
"xmm5"
,
"xmm5"
,
"xmm5"
,
"xmm5"
,
"xmm0"
,
"xmm2"
,
"xmm4"
,
"xmm6"
)
"movq (%2,%%"
REG_d
"), %%xmm1
\n\t
"
"movq 8(%2,%%"
REG_d
"), %%xmm3
\n\t
"
"movq (%2,%%"
FF_REG_d
"), %%xmm1
\n\t
"
"movq 8(%2,%%"
FF_REG_d
"), %%xmm3
\n\t
"
"paddw %%xmm7, %%xmm1
\n\t
"
"paddw %%xmm7, %%xmm3
\n\t
"
"pavgw %%xmm1, %%xmm0
\n\t
"
"pavgw %%xmm3, %%xmm2
\n\t
"
"movq 16(%2,%%"
REG_d
"), %%xmm1
\n\t
"
"movq 24(%2,%%"
REG_d
"), %%xmm3
\n\t
"
"movq 16(%2,%%"
FF_REG_d
"), %%xmm1
\n\t
"
"movq 24(%2,%%"
FF_REG_d
"), %%xmm3
\n\t
"
"paddw %%xmm7, %%xmm1
\n\t
"
"paddw %%xmm7, %%xmm3
\n\t
"
"pavgw %%xmm1, %%xmm4
\n\t
"
...
...
@@ -504,17 +504,17 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
snow_vertical_compose_sse2_store
(
"%2"
,
"xmm0"
,
"xmm2"
,
"xmm4"
,
"xmm6"
)
"2:
\n\t
"
"sub $64, %%"
REG_d
"
\n\t
"
"sub $64, %%"
FF_REG_d
"
\n\t
"
"jge 1b
\n\t
"
:
"+d"
(
i
)
:
"r"
(
b0
),
"r"
(
b1
),
"r"
(
b2
),
"r"
(
b3
),
"r"
(
b4
),
"r"
(
b5
));
}
#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
""op" ("r",%%"REG_d"), %%"t0" \n\t"\
""op" 8("r",%%"REG_d"), %%"t1" \n\t"\
""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
""op" 24("r",%%"REG_d"), %%"t3" \n\t"
""op" ("r",%%"
FF_
REG_d"), %%"t0" \n\t"\
""op" 8("r",%%"
FF_
REG_d"), %%"t1" \n\t"\
""op" 16("r",%%"
FF_
REG_d"), %%"t2" \n\t"\
""op" 24("r",%%"
FF_
REG_d"), %%"t3" \n\t"
#define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
...
...
@@ -523,10 +523,10 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
#define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
"movq %%"s0", ("w",%%"REG_d") \n\t"\
"movq %%"s1", 8("w",%%"REG_d") \n\t"\
"movq %%"s2", 16("w",%%"REG_d") \n\t"\
"movq %%"s3", 24("w",%%"REG_d") \n\t"
"movq %%"s0", ("w",%%"
FF_
REG_d") \n\t"\
"movq %%"s1", 8("w",%%"
FF_
REG_d") \n\t"\
"movq %%"s2", 16("w",%%"
FF_
REG_d") \n\t"\
"movq %%"s3", 24("w",%%"
FF_
REG_d") \n\t"
#define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
"movq %%"s0", %%"t0" \n\t"\
...
...
@@ -571,14 +571,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
"psrlw $13, %%mm5
\n\t
"
"paddw %%mm7, %%mm5
\n\t
"
snow_vertical_compose_r2r_add
(
"mm5"
,
"mm5"
,
"mm5"
,
"mm5"
,
"mm0"
,
"mm2"
,
"mm4"
,
"mm6"
)
"movq (%2,%%"
REG_d
"), %%mm1
\n\t
"
"movq 8(%2,%%"
REG_d
"), %%mm3
\n\t
"
"movq (%2,%%"
FF_REG_d
"), %%mm1
\n\t
"
"movq 8(%2,%%"
FF_REG_d
"), %%mm3
\n\t
"
"paddw %%mm7, %%mm1
\n\t
"
"paddw %%mm7, %%mm3
\n\t
"
"pavgw %%mm1, %%mm0
\n\t
"
"pavgw %%mm3, %%mm2
\n\t
"
"movq 16(%2,%%"
REG_d
"), %%mm1
\n\t
"
"movq 24(%2,%%"
REG_d
"), %%mm3
\n\t
"
"movq 16(%2,%%"
FF_REG_d
"), %%mm1
\n\t
"
"movq 24(%2,%%"
FF_REG_d
"), %%mm3
\n\t
"
"paddw %%mm7, %%mm1
\n\t
"
"paddw %%mm7, %%mm3
\n\t
"
"pavgw %%mm1, %%mm4
\n\t
"
...
...
@@ -598,7 +598,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
snow_vertical_compose_mmx_store
(
"%2"
,
"mm0"
,
"mm2"
,
"mm4"
,
"mm6"
)
"2:
\n\t
"
"sub $32, %%"
REG_d
"
\n\t
"
"sub $32, %%"
FF_REG_d
"
\n\t
"
"jge 1b
\n\t
"
:
"+d"
(
i
)
:
"r"
(
b0
),
"r"
(
b1
),
"r"
(
b2
),
"r"
(
b3
),
"r"
(
b4
),
"r"
(
b5
));
...
...
@@ -610,39 +610,39 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\
__asm__ volatile(\
"mov %7, %%"
REG_c"
\n\t"\
"mov %7, %%"
FF_REG_c"
\n\t"\
"mov %6, %2 \n\t"\
"mov %4, %%"
REG_S"
\n\t"\
"mov %4, %%"
FF_REG_S"
\n\t"\
"pxor %%xmm7, %%xmm7 \n\t"
/* 0 */
\
"pcmpeqd %%xmm3, %%xmm3 \n\t"\
"psllw $15, %%xmm3 \n\t"\
"psrlw $12, %%xmm3 \n\t"
/* FRAC_BITS >> 1 */
\
"1: \n\t"\
"mov %1, %%"
REG_D"
\n\t"\
"mov (%%"
REG_D"), %%"REG_D"
\n\t"\
"add %3, %%"
REG_D"
\n\t"
"mov %1, %%"
FF_REG_D"
\n\t"\
"mov (%%"
FF_REG_D"), %%"FF_REG_D"
\n\t"\
"add %3, %%"
FF_REG_D"
\n\t"
#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
"mov "
PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"
REG_d"; \n\t"\
"movq (%%"
REG_d"), %%"out_reg1"
\n\t"\
"movq (%%"
REG_d", %%"REG_c"), %%"out_reg2"
\n\t"\
"mov "
FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_
REG_d"; \n\t"\
"movq (%%"
FF_REG_d"), %%"out_reg1"
\n\t"\
"movq (%%"
FF_REG_d", %%"FF_REG_c"), %%"out_reg2"
\n\t"\
"punpcklbw %%xmm7, %%"out_reg1" \n\t"\
"punpcklbw %%xmm7, %%"out_reg2" \n\t"\
"movq "s_offset"(%%"
REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
"movq "s_offset"(%%"
FF_REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+16(%%"
FF_
REG_S"), %%xmm4 \n\t"\
"punpcklbw %%xmm7, %%xmm0 \n\t"\
"punpcklbw %%xmm7, %%xmm4 \n\t"\
"pmullw %%xmm0, %%"out_reg1" \n\t"\
"pmullw %%xmm4, %%"out_reg2" \n\t"
#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
"mov "
PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"
REG_d"; \n\t"\
"movq (%%"
REG_d"), %%"out_reg1"
\n\t"\
"movq 8(%%"
REG_d"), %%"out_reg2"
\n\t"\
"mov "
FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_
REG_d"; \n\t"\
"movq (%%"
FF_REG_d"), %%"out_reg1"
\n\t"\
"movq 8(%%"
FF_REG_d"), %%"out_reg2"
\n\t"\
"punpcklbw %%xmm7, %%"out_reg1" \n\t"\
"punpcklbw %%xmm7, %%"out_reg2" \n\t"\
"movq "s_offset"(%%"
REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
"movq "s_offset"(%%"
FF_REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+8(%%"
FF_
REG_S"), %%xmm4 \n\t"\
"punpcklbw %%xmm7, %%xmm0 \n\t"\
"punpcklbw %%xmm7, %%xmm4 \n\t"\
"pmullw %%xmm0, %%"out_reg1" \n\t"\
...
...
@@ -659,12 +659,12 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
"paddusw %%xmm6, %%xmm5 \n\t"
#define snow_inner_add_yblock_sse2_end_common1\
"add $32, %%"
REG_S"
\n\t"\
"add %%"
REG_c", %0
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*3(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*2(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*1(%%"REG_a");
\n\t"\
"add %%"
REG_c", (%%"REG_a")
\n\t"
"add $32, %%"
FF_REG_S"
\n\t"\
"add %%"
FF_REG_c", %0
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", (%%"FF_REG_a")
\n\t"
#define snow_inner_add_yblock_sse2_end_common2\
"jnz 1b \n\t"\
...
...
@@ -672,18 +672,18 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
:\
"rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\
"%"
REG_c"","%"REG_S"","%"REG_D"","%"
REG_d"");
"%"
FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_
REG_d"");
#define snow_inner_add_yblock_sse2_end_8\
"sal $1, %%"
REG_c"
\n\t"\
"add"
OPSIZE" $"PTR_SIZE"*2, %1
\n\t"\
"sal $1, %%"
FF_REG_c"
\n\t"\
"add"
FF_OPSIZE" $"FF_PTR_SIZE"*2, %1
\n\t"\
snow_inner_add_yblock_sse2_end_common1\
"sar $1, %%"
REG_c"
\n\t"\
"sar $1, %%"
FF_REG_c"
\n\t"\
"sub $2, %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
#define snow_inner_add_yblock_sse2_end_16\
"add"
OPSIZE" $"PTR_SIZE"*1, %1
\n\t"\
"add"
FF_OPSIZE" $"FF_PTR_SIZE"*1, %1
\n\t"\
snow_inner_add_yblock_sse2_end_common1\
"dec %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
...
...
@@ -696,28 +696,28 @@ snow_inner_add_yblock_sse2_accum_8("2", "8")
snow_inner_add_yblock_sse2_accum_8
(
"1"
,
"128"
)
snow_inner_add_yblock_sse2_accum_8
(
"0"
,
"136"
)
"mov %0, %%"
REG_d
"
\n\t
"
"movdqa (%%"
REG_D
"), %%xmm0
\n\t
"
"mov %0, %%"
FF_REG_d
"
\n\t
"
"movdqa (%%"
FF_REG_D
"), %%xmm0
\n\t
"
"movdqa %%xmm1, %%xmm2
\n\t
"
"punpckhwd %%xmm7, %%xmm1
\n\t
"
"punpcklwd %%xmm7, %%xmm2
\n\t
"
"paddd %%xmm2, %%xmm0
\n\t
"
"movdqa 16(%%"
REG_D
"), %%xmm2
\n\t
"
"movdqa 16(%%"
FF_REG_D
"), %%xmm2
\n\t
"
"paddd %%xmm1, %%xmm2
\n\t
"
"paddd %%xmm3, %%xmm0
\n\t
"
"paddd %%xmm3, %%xmm2
\n\t
"
"mov %1, %%"
REG_D
"
\n\t
"
"mov "
PTR_SIZE
"(%%"
REG_D
"), %%"
REG_D
";
\n\t
"
"add %3, %%"
REG_D
"
\n\t
"
"mov %1, %%"
FF_REG_D
"
\n\t
"
"mov "
FF_PTR_SIZE
"(%%"
FF_REG_D
"), %%"
FF_REG_D
";
\n\t
"
"add %3, %%"
FF_REG_D
"
\n\t
"
"movdqa (%%"
REG_D
"), %%xmm4
\n\t
"
"movdqa (%%"
FF_REG_D
"), %%xmm4
\n\t
"
"movdqa %%xmm5, %%xmm6
\n\t
"
"punpckhwd %%xmm7, %%xmm5
\n\t
"
"punpcklwd %%xmm7, %%xmm6
\n\t
"
"paddd %%xmm6, %%xmm4
\n\t
"
"movdqa 16(%%"
REG_D
"), %%xmm6
\n\t
"
"movdqa 16(%%"
FF_REG_D
"), %%xmm6
\n\t
"
"paddd %%xmm5, %%xmm6
\n\t
"
"paddd %%xmm3, %%xmm4
\n\t
"
"paddd %%xmm3, %%xmm6
\n\t
"
...
...
@@ -726,13 +726,13 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
"psrad $8, %%xmm2
\n\t
"
/* FRAC_BITS. */
"packssdw %%xmm2, %%xmm0
\n\t
"
"packuswb %%xmm7, %%xmm0
\n\t
"
"movq %%xmm0, (%%"
REG_d
")
\n\t
"
"movq %%xmm0, (%%"
FF_REG_d
")
\n\t
"
"psrad $8, %%xmm4
\n\t
"
/* FRAC_BITS. */
"psrad $8, %%xmm6
\n\t
"
/* FRAC_BITS. */
"packssdw %%xmm6, %%xmm4
\n\t
"
"packuswb %%xmm7, %%xmm4
\n\t
"
"movq %%xmm4, (%%"
REG_d
",%%"
REG_c
");
\n\t
"
"movq %%xmm4, (%%"
FF_REG_d
",%%"
FF_REG_c
");
\n\t
"
snow_inner_add_yblock_sse2_end_8
}
...
...
@@ -744,18 +744,18 @@ snow_inner_add_yblock_sse2_accum_16("2", "16")
snow_inner_add_yblock_sse2_accum_16
(
"1"
,
"512"
)
snow_inner_add_yblock_sse2_accum_16
(
"0"
,
"528"
)
"mov %0, %%"
REG_d
"
\n\t
"
"mov %0, %%"
FF_REG_d
"
\n\t
"
"psrlw $4, %%xmm1
\n\t
"
"psrlw $4, %%xmm5
\n\t
"
"paddw (%%"
REG_D
"), %%xmm1
\n\t
"
"paddw 16(%%"
REG_D
"), %%xmm5
\n\t
"
"paddw (%%"
FF_REG_D
"), %%xmm1
\n\t
"
"paddw 16(%%"
FF_REG_D
"), %%xmm5
\n\t
"
"paddw %%xmm3, %%xmm1
\n\t
"
"paddw %%xmm3, %%xmm5
\n\t
"
"psraw $4, %%xmm1
\n\t
"
/* FRAC_BITS. */
"psraw $4, %%xmm5
\n\t
"
/* FRAC_BITS. */
"packuswb %%xmm5, %%xmm1
\n\t
"
"movdqu %%xmm1, (%%"
REG_d
")
\n\t
"
"movdqu %%xmm1, (%%"
FF_REG_d
")
\n\t
"
snow_inner_add_yblock_sse2_end_16
}
...
...
@@ -764,26 +764,26 @@ snow_inner_add_yblock_sse2_end_16
IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\
__asm__ volatile(\
"mov %7, %%"
REG_c"
\n\t"\
"mov %7, %%"
FF_REG_c"
\n\t"\
"mov %6, %2 \n\t"\
"mov %4, %%"
REG_S"
\n\t"\
"mov %4, %%"
FF_REG_S"
\n\t"\
"pxor %%mm7, %%mm7 \n\t"
/* 0 */
\
"pcmpeqd %%mm3, %%mm3 \n\t"\
"psllw $15, %%mm3 \n\t"\
"psrlw $12, %%mm3 \n\t"
/* FRAC_BITS >> 1 */
\
"1: \n\t"\
"mov %1, %%"
REG_D"
\n\t"\
"mov (%%"
REG_D"), %%"REG_D"
\n\t"\
"add %3, %%"
REG_D"
\n\t"
"mov %1, %%"
FF_REG_D"
\n\t"\
"mov (%%"
FF_REG_D"), %%"FF_REG_D"
\n\t"\
"add %3, %%"
FF_REG_D"
\n\t"
#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
"mov "
PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"
REG_d"; \n\t"\
"movd "d_offset"(%%"
REG_d"), %%"out_reg1"
\n\t"\
"movd "d_offset"+4(%%"
REG_d"), %%"out_reg2"
\n\t"\
"mov "
FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_
REG_d"; \n\t"\
"movd "d_offset"(%%"
FF_REG_d"), %%"out_reg1"
\n\t"\
"movd "d_offset"+4(%%"
FF_REG_d"), %%"out_reg2"
\n\t"\
"punpcklbw %%mm7, %%"out_reg1" \n\t"\
"punpcklbw %%mm7, %%"out_reg2" \n\t"\
"movd "s_offset"(%%"
REG_S"), %%mm0
\n\t"\
"movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
"movd "s_offset"(%%"
FF_REG_S"), %%mm0
\n\t"\
"movd "s_offset"+4(%%"
FF_
REG_S"), %%mm4 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"pmullw %%mm0, %%"out_reg1" \n\t"\
...
...
@@ -795,32 +795,32 @@ snow_inner_add_yblock_sse2_end_16
"paddusw %%mm6, %%mm5 \n\t"
#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
"mov %0, %%"
REG_d"
\n\t"\
"mov %0, %%"
FF_REG_d"
\n\t"\
"psrlw $4, %%mm1 \n\t"\
"psrlw $4, %%mm5 \n\t"\
"paddw "read_offset"(%%"
REG_D"), %%mm1
\n\t"\
"paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
"paddw "read_offset"(%%"
FF_REG_D"), %%mm1
\n\t"\
"paddw "read_offset"+8(%%"
FF_
REG_D"), %%mm5 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"paddw %%mm3, %%mm5 \n\t"\
"psraw $4, %%mm1 \n\t"\
"psraw $4, %%mm5 \n\t"\
"packuswb %%mm5, %%mm1 \n\t"\
"movq %%mm1, "write_offset"(%%"REG_d") \n\t"
"movq %%mm1, "write_offset"(%%"
FF_
REG_d") \n\t"
#define snow_inner_add_yblock_mmx_end(s_step)\
"add $"s_step", %%"
REG_S"
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*3(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*2(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*1(%%"REG_a");
\n\t"\
"add %%"
REG_c", (%%"REG_a")
\n\t"\
"add"
OPSIZE " $"PTR_SIZE"*1, %1
\n\t"\
"add %%"
REG_c", %0
\n\t"\
"add $"s_step", %%"
FF_REG_S"
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", (%%"FF_REG_a")
\n\t"\
"add"
FF_OPSIZE " $"FF_PTR_SIZE"*1, %1
\n\t"\
"add %%"
FF_REG_c", %0
\n\t"\
"dec %2 \n\t"\
"jnz 1b \n\t"\
:"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
:\
"rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
"%"
REG_c"","%"REG_S"","%"REG_D"","%"
REG_d"");
"%"
FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_
REG_d"");
static
void
inner_add_yblock_bw_8_obmc_16_mmx
(
const
uint8_t
*
obmc
,
const
x86_reg
obmc_stride
,
uint8_t
*
*
block
,
int
b_w
,
x86_reg
b_h
,
int
src_x
,
int
src_y
,
x86_reg
src_stride
,
slice_buffer
*
sb
,
int
add
,
uint8_t
*
dst8
){
...
...
libavcodec/x86/vc1dsp_mmx.c
View file @
9eb3da2f
...
...
@@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
{\
rnd = 8-rnd;\
__asm__ volatile(\
"mov $8, %%"
REG_c"
\n\t"\
"mov $8, %%"
FF_REG_c"
\n\t"\
LOAD_ROUNDER_MMX("%5")\
"movq "MANGLE(ff_pw_9)", %%mm6\n\t"\
"1: \n\t"\
...
...
@@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
"movq %%mm3, (%1) \n\t"\
"add %6, %0 \n\t"\
"add %4, %1 \n\t"\
"dec %%"
REG_c"
\n\t"\
"dec %%"
FF_REG_c"
\n\t"\
"jnz 1b \n\t"\
: "+r"(src), "+r"(dst)\
: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
"g"(stride-offset)\
NAMED_CONSTRAINTS_ADD(ff_pw_9)\
: "%"REG_c, "memory"\
: "%"
FF_
REG_c, "memory"\
);\
}
...
...
libavfilter/x86/vf_noise.c
View file @
9eb3da2f
...
...
@@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
noise
+=
shift
;
__asm__
volatile
(
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
"pcmpeqb %%mm7, %%mm7
\n\t
"
"psllw $15, %%mm7
\n\t
"
"packsswb %%mm7, %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"paddsb %%mm1, %%mm0
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"movq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movq %%mm0, (%2, %%"
FF_REG_a
")
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmx_len
),
"r"
(
noise
+
mmx_len
),
"r"
(
dst
+
mmx_len
),
"g"
(
-
mmx_len
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
(
mmx_len
!=
len
)
ff_line_noise_c
(
dst
+
mmx_len
,
src
+
mmx_len
,
noise
+
mmx_len
,
len
-
mmx_len
,
0
);
...
...
@@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
x86_reg
mmx_len
=
len
&
(
~
7
);
__asm__
volatile
(
"mov %5, %%"
REG_a
"
\n\t
"
"mov %5, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"paddb (%2, %%"
REG_a
"), %%mm1
\n\t
"
"paddb (%3, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"paddb (%2, %%"
FF_REG_a
"), %%mm1
\n\t
"
"paddb (%3, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
"movq %%mm1, %%mm3
\n\t
"
"punpcklbw %%mm0, %%mm0
\n\t
"
...
...
@@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
"psrlw $8, %%mm1
\n\t
"
"psrlw $8, %%mm3
\n\t
"
"packuswb %%mm3, %%mm1
\n\t
"
"movq %%mm1, (%4, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movq %%mm1, (%4, %%"
FF_REG_a
")
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmx_len
),
"r"
(
shift
[
0
]
+
mmx_len
),
"r"
(
shift
[
1
]
+
mmx_len
),
"r"
(
shift
[
2
]
+
mmx_len
),
"r"
(
dst
+
mmx_len
),
"g"
(
-
mmx_len
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
(
mmx_len
!=
len
){
...
...
@@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
noise
+=
shift
;
__asm__
volatile
(
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
"pcmpeqb %%mm7, %%mm7
\n\t
"
"psllw $15, %%mm7
\n\t
"
"packsswb %%mm7, %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"paddsb %%mm1, %%mm0
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"movntq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movntq %%mm0, (%2, %%"
FF_REG_a
")
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmx_len
),
"r"
(
noise
+
mmx_len
),
"r"
(
dst
+
mmx_len
),
"g"
(
-
mmx_len
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
(
mmx_len
!=
len
)
ff_line_noise_c
(
dst
+
mmx_len
,
src
+
mmx_len
,
noise
+
mmx_len
,
len
-
mmx_len
,
0
);
...
...
libavutil/x86/asm.h
View file @
9eb3da2f
...
...
@@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
typedef
struct
ymm_reg
{
uint64_t
a
,
b
,
c
,
d
;
}
ymm_reg
;
#if ARCH_X86_64
# define OPSIZE "q"
# define REG_a "rax"
# define REG_b "rbx"
# define REG_c "rcx"
# define REG_d "rdx"
# define REG_D "rdi"
# define REG_S "rsi"
# define PTR_SIZE "8"
# define
FF_
OPSIZE "q"
# define
FF_
REG_a "rax"
# define
FF_
REG_b "rbx"
# define
FF_
REG_c "rcx"
# define
FF_
REG_d "rdx"
# define
FF_
REG_D "rdi"
# define
FF_
REG_S "rsi"
# define
FF_
PTR_SIZE "8"
typedef
int64_t
x86_reg
;
/*
REG_SP is defined in Solaris sys headers, so use
REG_sp */
# define REG_sp "rsp"
# define REG_BP "rbp"
# define REGBP rbp
# define REGa rax
# define REGb rbx
# define REGc rcx
# define REGd rdx
# define REGSP rsp
/*
FF_REG_SP is defined in Solaris sys headers, so use FF_
REG_sp */
# define
FF_
REG_sp "rsp"
# define
FF_
REG_BP "rbp"
# define
FF_
REGBP rbp
# define
FF_
REGa rax
# define
FF_
REGb rbx
# define
FF_
REGc rcx
# define
FF_
REGd rdx
# define
FF_
REGSP rsp
#elif ARCH_X86_32
# define OPSIZE "l"
# define REG_a "eax"
# define REG_b "ebx"
# define REG_c "ecx"
# define REG_d "edx"
# define REG_D "edi"
# define REG_S "esi"
# define PTR_SIZE "4"
# define
FF_
OPSIZE "l"
# define
FF_
REG_a "eax"
# define
FF_
REG_b "ebx"
# define
FF_
REG_c "ecx"
# define
FF_
REG_d "edx"
# define
FF_
REG_D "edi"
# define
FF_
REG_S "esi"
# define
FF_
PTR_SIZE "4"
typedef
int32_t
x86_reg
;
# define REG_sp "esp"
# define REG_BP "ebp"
# define REGBP ebp
# define REGa eax
# define REGb ebx
# define REGc ecx
# define REGd edx
# define REGSP esp
# define
FF_
REG_sp "esp"
# define
FF_
REG_BP "ebp"
# define
FF_
REGBP ebp
# define
FF_
REGa eax
# define
FF_
REGb ebx
# define
FF_
REGc ecx
# define
FF_
REGd edx
# define
FF_
REGSP esp
#else
typedef
int
x86_reg
;
#endif
...
...
libavutil/x86/cpu.c
View file @
9eb3da2f
...
...
@@ -41,9 +41,9 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index, eax, ebx, ecx, edx) \
__asm__ volatile ( \
"mov %%"
REG_b", %%"REG_S" \n\t"
\
"mov %%"
FF_REG_b", %%"FF_REG_S" \n\t"
\
"cpuid \n\t" \
"xchg %%"
REG_b", %%"REG_S
\
"xchg %%"
FF_REG_b", %%"FF_REG_S
\
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
: "0" (index), "2"(0))
...
...
libpostproc/postprocess_template.c
View file @
9eb3da2f
...
...
@@ -118,12 +118,12 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
);
__asm__
volatile
(
"lea (%2, %3), %%"
REG_a
"
\n\t
"
"lea (%2, %3), %%"
FF_REG_a
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
"movq (%2), %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm0, %%mm3
\n\t
"
"movq %%mm0, %%mm4
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
...
...
@@ -132,7 +132,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"paddb %%mm7, %%mm0
\n\t
"
"pcmpgtb %%mm6, %%mm0
\n\t
"
"movq (%%"
REG_a
",%3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
",%3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -140,7 +140,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -148,7 +148,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"lea (%%"
REG_a
", %3, 4), %%"
REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %3, 4), %%"
FF_REG_a
"
\n\t
"
"movq (%2, %3, 4), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
...
...
@@ -158,7 +158,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -166,7 +166,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -174,7 +174,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -207,7 +207,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
:
"=r"
(
numEq
),
"=r"
(
dcOk
)
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
numEq
=
(
-
numEq
)
&
0xFF
;
...
...
@@ -248,9 +248,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
"por %%mm2, %%mm6
\n\t
"
// First Line to Filter
"movq (%0, %1, 8), %%mm5
\n\t
"
"lea (%0, %1, 4), %%"
REG_a
"
\n\t
"
"lea (%0, %1, 8), %%"
REG_c
"
\n\t
"
"sub %1, %%"
REG_c
"
\n\t
"
"lea (%0, %1, 4), %%"
FF_REG_a
"
\n\t
"
"lea (%0, %1, 8), %%"
FF_REG_c
"
\n\t
"
"sub %1, %%"
FF_REG_c
"
\n\t
"
"add %1, %0
\n\t
"
// %0 points to line 1 not 0
"movq (%0, %1, 8), %%mm7
\n\t
"
"movq %%mm5, %%mm1
\n\t
"
...
...
@@ -279,7 +279,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
"movq (%0, %1, 4), %%mm2
\n\t
"
// 1
"movq %%mm2, %%mm5
\n\t
"
// 1
PAVGB
((
%%
REGa
),
%%
mm2
)
// 11 /2
PAVGB
((
%%
FF_REGa
),
%%
mm2
)
// 11 /2
PAVGB
((
%
0
,
%
1
,
2
),
%%
mm2
)
// 211 /4
"movq %%mm2, %%mm3
\n\t
"
// 211 /4
"movq (%0), %%mm4
\n\t
"
// 1
...
...
@@ -291,15 +291,15 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
PAVGB
(
%%
mm6
,
%%
mm0
)
//1 1 /2
"movq %%mm4, %%mm3
\n\t
"
// 1
PAVGB
((
%
0
,
%
1
,
2
),
%%
mm3
)
// 1 1 /2
PAVGB
((
%%
REGa
,
%
1
,
2
),
%%
mm5
)
// 11 /2
PAVGB
((
%%
REGa
),
%%
mm5
)
// 211 /4
PAVGB
((
%%
FF_REGa
,
%
1
,
2
),
%%
mm5
)
// 11 /2
PAVGB
((
%%
FF_REGa
),
%%
mm5
)
// 211 /4
PAVGB
(
%%
mm5
,
%%
mm3
)
// 2 2211 /8
PAVGB
(
%%
mm0
,
%%
mm3
)
//4242211 /16
"movq %%mm3, (%0,%1)
\n\t
"
// X
// mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
PAVGB
(
%%
mm4
,
%%
mm6
)
//11 /2
"movq (%%"
REG_c
"), %%mm0
\n\t
"
// 1
PAVGB
((
%%
REGa
,
%
1
,
2
),
%%
mm0
)
// 11/2
"movq (%%"
FF_REG_c
"), %%mm0
\n\t
"
// 1
PAVGB
((
%%
FF_REGa
,
%
1
,
2
),
%%
mm0
)
// 11/2
"movq %%mm0, %%mm3
\n\t
"
// 11/2
PAVGB
(
%%
mm1
,
%%
mm0
)
// 2 11/4
PAVGB
(
%%
mm6
,
%%
mm0
)
//222 11/8
...
...
@@ -307,17 +307,17 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
"movq (%0, %1, 2), %%mm2
\n\t
"
// 1
"movq %%mm0, (%0, %1, 2)
\n\t
"
// X
// mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
"movq (%%"
REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
((
%%
REGc
),
%%
mm0
)
// 11 /2
"movq (%%"
FF_REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
((
%%
FF_REGc
),
%%
mm0
)
// 11 /2
PAVGB
(
%%
mm0
,
%%
mm6
)
//11 11 /4
PAVGB
(
%%
mm1
,
%%
mm4
)
// 11 /2
PAVGB
(
%%
mm2
,
%%
mm1
)
// 11 /2
PAVGB
(
%%
mm1
,
%%
mm6
)
//1122 11 /8
PAVGB
(
%%
mm5
,
%%
mm6
)
//112242211 /16
"movq (%%"
REG_a
"), %%mm5
\n\t
"
// 1
"movq %%mm6, (%%"
REG_a
")
\n\t
"
// X
"movq (%%"
FF_REG_a
"), %%mm5
\n\t
"
// 1
"movq %%mm6, (%%"
FF_REG_a
")
\n\t
"
// X
// mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
// 1
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
// 1
PAVGB
(
%%
mm7
,
%%
mm6
)
// 11 /2
PAVGB
(
%%
mm4
,
%%
mm6
)
// 11 11 /4
PAVGB
(
%%
mm3
,
%%
mm6
)
// 11 2211 /8
...
...
@@ -330,29 +330,29 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
PAVGB
(
%%
mm7
,
%%
mm1
)
// 11 2 /4
PAVGB
(
%%
mm4
,
%%
mm5
)
// 11 /2
PAVGB
(
%%
mm5
,
%%
mm0
)
// 11 11 /4
"movq (%%"
REG_a
", %1, 2), %%mm6
\n\t
"
// 1
"movq (%%"
FF_REG_a
", %1, 2), %%mm6
\n\t
"
// 1
PAVGB
(
%%
mm6
,
%%
mm1
)
// 11 4 2 /8
PAVGB
(
%%
mm0
,
%%
mm1
)
// 11224222 /16
"movq %%mm1, (%%"
REG_a
", %1, 2)
\n\t
"
// X
"movq %%mm1, (%%"
FF_REG_a
", %1, 2)
\n\t
"
// X
// mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
PAVGB
((
%%
REGc
),
%%
mm2
)
// 112 4 /8
"movq (%%"
REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
((
%%
FF_REGc
),
%%
mm2
)
// 112 4 /8
"movq (%%"
FF_REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
(
%%
mm0
,
%%
mm6
)
// 1 1 /2
PAVGB
(
%%
mm7
,
%%
mm6
)
// 1 12 /4
PAVGB
(
%%
mm2
,
%%
mm6
)
// 1122424 /4
"movq %%mm6, (%%"
REG_c
")
\n\t
"
// X
"movq %%mm6, (%%"
FF_REG_c
")
\n\t
"
// X
// mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
PAVGB
(
%%
mm7
,
%%
mm5
)
// 11 2 /4
PAVGB
(
%%
mm7
,
%%
mm5
)
// 11 6 /8
PAVGB
(
%%
mm3
,
%%
mm0
)
// 112 /4
PAVGB
(
%%
mm0
,
%%
mm5
)
// 112246 /16
"movq %%mm5, (%%"
REG_a
", %1, 4)
\n\t
"
// X
"movq %%mm5, (%%"
FF_REG_a
", %1, 4)
\n\t
"
// X
"sub %1, %0
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const
int
l1
=
stride
;
...
...
@@ -411,18 +411,18 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
__asm__
volatile
(
"pxor %%mm7, %%mm7
\n\t
"
// 0
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_c
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_c
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"movq (%0, %1, 4), %%mm1
\n\t
"
// line 4
"movq %%mm1, %%mm2
\n\t
"
// line 4
"psubusb %%mm0, %%mm1
\n\t
"
"psubusb %%mm2, %%mm0
\n\t
"
"por %%mm1, %%mm0
\n\t
"
// |l2 - l3|
"movq (%%"
REG_c
"), %%mm3
\n\t
"
// line 5
"movq (%%"
REG_c
", %1), %%mm4
\n\t
"
// line 6
"movq (%%"
FF_REG_c
"), %%mm3
\n\t
"
// line 5
"movq (%%"
FF_REG_c
", %1), %%mm4
\n\t
"
// line 6
"movq %%mm3, %%mm5
\n\t
"
// line 5
"psubusb %%mm4, %%mm3
\n\t
"
"psubusb %%mm5, %%mm4
\n\t
"
...
...
@@ -454,44 +454,44 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%0, %1, 4)
\n\t
"
// line 4
"movq (%%"
REG_c
"), %%mm0
\n\t
"
// line 5
"movq (%%"
FF_REG_c
"), %%mm0
\n\t
"
// line 5
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l5-1 : l5
"paddusb %%mm3, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_c
")
\n\t
"
// line 5
"movq %%mm0, (%%"
FF_REG_c
")
\n\t
"
// line 5
PAVGB
(
%%
mm7
,
%%
mm1
)
// d/4
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l4-1 : l4
"psubusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_a
", %1, 2)
\n\t
"
// line 3
"movq %%mm0, (%%"
FF_REG_a
", %1, 2)
\n\t
"
// line 3
"movq (%%"
REG_c
", %1), %%mm0
\n\t
"
// line 6
"movq (%%"
FF_REG_c
", %1), %%mm0
\n\t
"
// line 6
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l5-1 : l5
"paddusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_c
", %1)
\n\t
"
// line 6
"movq %%mm0, (%%"
FF_REG_c
", %1)
\n\t
"
// line 6
PAVGB
(
%%
mm7
,
%%
mm1
)
// d/8
"movq (%%"
REG_a
", %1), %%mm0
\n\t
"
// line 2
"movq (%%"
FF_REG_a
", %1), %%mm0
\n\t
"
// line 2
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l2-1 : l2
"psubusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_a
", %1)
\n\t
"
// line 2
"movq %%mm0, (%%"
FF_REG_a
", %1)
\n\t
"
// line 2
"movq (%%"
REG_c
", %1, 2), %%mm0
\n\t
"
// line 7
"movq (%%"
FF_REG_c
", %1, 2), %%mm0
\n\t
"
// line 7
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l7-1 : l7
"paddusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_c
", %1, 2)
\n\t
"
// line 7
"movq %%mm0, (%%"
FF_REG_c
", %1, 2)
\n\t
"
// line 7
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
co
->
pQPb
)
NAMED_CONSTRAINTS_ADD
(
b01
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
...
...
@@ -553,8 +553,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
#if 0 //slightly more accurate and slightly slower
"pxor %%mm7, %%mm7 \n\t" // 0
"lea (%0, %1), %%"
REG_a"
\n\t"
"lea (%%"
REG_a", %1, 4), %%"REG_c"
\n\t"
"lea (%0, %1), %%"
FF_REG_a"
\n\t"
"lea (%%"
FF_REG_a", %1, 4), %%"FF_REG_c"
\n\t"
// 0 1 2 3 4 5 6 7
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1
...
...
@@ -567,8 +567,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4
PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8
"movq (%%"
REG_a"), %%mm1
\n\t" // l1
"movq (%%"
REG_a", %1, 2), %%mm3
\n\t" // l3
"movq (%%"
FF_REG_a"), %%mm1
\n\t" // l1
"movq (%%"
FF_REG_a", %1, 2), %%mm3
\n\t" // l3
"movq %%mm1, %%mm4 \n\t" // l1
PAVGB(%%mm7, %%mm1) // ~l1/2
PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4
...
...
@@ -586,7 +586,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4
PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8
"movq (%%"
REG_c"), %%mm2
\n\t" // l5
"movq (%%"
FF_REG_c"), %%mm2
\n\t" // l5
"movq %%mm3, %%mm5 \n\t" // l3
PAVGB(%%mm7, %%mm3) // ~l3/2
PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4
...
...
@@ -599,13 +599,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
"movq (%%"
REG_c", %1), %%mm6
\n\t" // l6
"movq (%%"
FF_REG_c", %1), %%mm6
\n\t" // l6
"movq %%mm6, %%mm5 \n\t" // l6
PAVGB(%%mm7, %%mm6) // ~l6/2
PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4
PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8
"movq (%%"
REG_c", %1, 2), %%mm5
\n\t" // l7
"movq (%%"
FF_REG_c", %1, 2), %%mm5
\n\t" // l7
"movq %%mm2, %%mm4 \n\t" // l5
PAVGB(%%mm7, %%mm2) // ~l5/2
PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4
...
...
@@ -632,7 +632,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"paddusb %%mm1, %%mm3 \n\t"
// "paddusb "MANGLE(b01)", %%mm3 \n\t"
"movq (%%"
REG_a", %1, 2), %%mm6
\n\t" //l3
"movq (%%"
FF_REG_a", %1, 2), %%mm6
\n\t" //l3
"movq (%0, %1, 4), %%mm5 \n\t" //l4
"movq (%0, %1, 4), %%mm4 \n\t" //l4
"psubusb %%mm6, %%mm5 \n\t"
...
...
@@ -646,7 +646,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubusb "MANGLE(b01)", %%mm3 \n\t"
PAVGB(%%mm7, %%mm3)
"movq (%%"
REG_a", %1, 2), %%mm0
\n\t"
"movq (%%"
FF_REG_a", %1, 2), %%mm0
\n\t"
"movq (%0, %1, 4), %%mm2 \n\t"
"pxor %%mm6, %%mm0 \n\t"
"pxor %%mm6, %%mm2 \n\t"
...
...
@@ -654,36 +654,36 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"paddb %%mm3, %%mm2 \n\t"
"pxor %%mm6, %%mm0 \n\t"
"pxor %%mm6, %%mm2 \n\t"
"movq %%mm0, (%%"
REG_a", %1, 2)
\n\t"
"movq %%mm0, (%%"
FF_REG_a", %1, 2)
\n\t"
"movq %%mm2, (%0, %1, 4) \n\t"
#endif //0
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"pcmpeqb %%mm6, %%mm6
\n\t
"
// -1
// 0 1 2 3 4 5 6 7
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1
"movq (%%"
REG_a
", %1, 2), %%mm1
\n\t
"
// l3
"movq (%%"
FF_REG_a
", %1, 2), %%mm1
\n\t
"
// l3
"movq (%0, %1, 4), %%mm0
\n\t
"
// l4
"pxor %%mm6, %%mm1
\n\t
"
// -l3-1
PAVGB
(
%%
mm1
,
%%
mm0
)
// -q+128 = (l4-l3+256)/2
// mm1=-l3-1, mm0=128-q
"movq (%%"
REG_a
", %1, 4), %%mm2
\n\t
"
// l5
"movq (%%"
REG_a
", %1), %%mm3
\n\t
"
// l2
"movq (%%"
FF_REG_a
", %1, 4), %%mm2
\n\t
"
// l5
"movq (%%"
FF_REG_a
", %1), %%mm3
\n\t
"
// l2
"pxor %%mm6, %%mm2
\n\t
"
// -l5-1
"movq %%mm2, %%mm5
\n\t
"
// -l5-1
"movq "
MANGLE
(
b80
)
", %%mm4
\n\t
"
// 128
"lea (%%"
REG_a
", %1, 4), %%"
REG_c
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_c
"
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm2
)
// (l2-l5+256)/2
PAVGB
(
%%
mm0
,
%%
mm4
)
// ~(l4-l3)/4 + 128
PAVGB
(
%%
mm2
,
%%
mm4
)
// ~(l2-l5)/4 +(l4-l3)/8 + 128
PAVGB
(
%%
mm0
,
%%
mm4
)
// ~(l2-l5)/8 +5(l4-l3)/16 + 128
// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1
"movq (%%"
REG_a
"), %%mm2
\n\t
"
// l1
"movq (%%"
FF_REG_a
"), %%mm2
\n\t
"
// l1
"pxor %%mm6, %%mm2
\n\t
"
// -l1-1
PAVGB
(
%%
mm3
,
%%
mm2
)
// (l2-l1+256)/2
PAVGB
((
%
0
),
%%
mm1
)
// (l0-l3+256)/2
...
...
@@ -693,8 +693,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
PAVGB
(
%%
mm2
,
%%
mm3
)
// ~(l0-l3)/8 +5(l2-l1)/16 + 128
// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
PAVGB
((
%%
REGc
,
%
1
),
%%
mm5
)
// (l6-l5+256)/2
"movq (%%"
REG_c
", %1, 2), %%mm1
\n\t
"
// l7
PAVGB
((
%%
FF_REGc
,
%
1
),
%%
mm5
)
// (l6-l5+256)/2
"movq (%%"
FF_REG_c
", %1, 2), %%mm1
\n\t
"
// l7
"pxor %%mm6, %%mm1
\n\t
"
// -l7-1
PAVGB
((
%
0
,
%
1
,
4
),
%%
mm1
)
// (l4-l7+256)/2
"movq "
MANGLE
(
b80
)
", %%mm2
\n\t
"
// 128
...
...
@@ -743,7 +743,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"pxor %%mm1, %%mm7
\n\t
"
// SIGN(d*q)
"pand %%mm7, %%mm4
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%0, %1, 4), %%mm2
\n\t
"
"pxor %%mm1, %%mm0
\n\t
"
"pxor %%mm1, %%mm2
\n\t
"
...
...
@@ -751,13 +751,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubb %%mm4, %%mm2
\n\t
"
"pxor %%mm1, %%mm0
\n\t
"
"pxor %%mm1, %%mm2
\n\t
"
"movq %%mm0, (%%"
REG_a
", %1, 2)
\n\t
"
"movq %%mm0, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq %%mm2, (%0, %1, 4)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
NAMED_CONSTRAINTS_ADD
(
b80
,
b00
,
b01
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
/*
...
...
@@ -830,12 +830,12 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"punpckhbw %%mm7, %%mm1
\n\t
"
// high part of line 0
"movq (%0, %1), %%mm2
\n\t
"
"lea (%0, %1, 2), %%"
REG_a
"
\n\t
"
"lea (%0, %1, 2), %%"
FF_REG_a
"
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// low part of line 1
"punpckhbw %%mm7, %%mm3
\n\t
"
// high part of line 1
"movq (%%"
REG_a
"), %%mm4
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm4
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
// low part of line 2
"punpckhbw %%mm7, %%mm5
\n\t
"
// high part of line 2
...
...
@@ -852,7 +852,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2
"movq (%%"
REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm2
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// L3
"punpckhbw %%mm7, %%mm3
\n\t
"
// H3
...
...
@@ -864,7 +864,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"movq %%mm0, (%3)
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, 8(%3)
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
// L4
"punpckhbw %%mm7, %%mm1
\n\t
"
// H4
...
...
@@ -878,7 +878,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - H3 + H4
"lea (%%"
REG_a
", %1), %0
\n\t
"
"lea (%%"
FF_REG_a
", %1), %0
\n\t
"
"psllw $2, %%mm2
\n\t
"
// 4L3 - 4L4
"psllw $2, %%mm3
\n\t
"
// 4H3 - 4H4
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4
...
...
@@ -893,10 +893,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4 - 2L5
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - 5H3 + 5H4 - 2H5
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpcklbw %%mm7, %%mm6
\n\t
"
// L6
"psubw %%mm6, %%mm2
\n\t
"
// L5 - L6
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpckhbw %%mm7, %%mm6
\n\t
"
// H6
"psubw %%mm6, %%mm3
\n\t
"
// H5 - H6
...
...
@@ -1045,7 +1045,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
:
"+r"
(
src
)
:
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
),
"r"
(
tmp
)
NAMED_CONSTRAINTS_ADD
(
w05
,
w20
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const
int
l1
=
stride
;
...
...
@@ -1104,8 +1104,8 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
"packuswb %%mm0, %%mm0
\n\t
"
"movq %%mm0, %3
\n\t
"
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
...
...
@@ -1128,13 +1128,13 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
#endif
#define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
FIND_MIN_MAX
((
%%
REGa
))
FIND_MIN_MAX
((
%%
REGa
,
%
1
))
FIND_MIN_MAX
((
%%
REGa
,
%
1
,
2
))
FIND_MIN_MAX
((
%%
FF_
REGa
))
FIND_MIN_MAX
((
%%
FF_
REGa
,
%
1
))
FIND_MIN_MAX
((
%%
FF_
REGa
,
%
1
,
2
))
FIND_MIN_MAX
((
%
0
,
%
1
,
4
))
FIND_MIN_MAX
((
%%
REGd
))
FIND_MIN_MAX
((
%%
REGd
,
%
1
))
FIND_MIN_MAX
((
%%
REGd
,
%
1
,
2
))
FIND_MIN_MAX
((
%%
FF_
REGd
))
FIND_MIN_MAX
((
%%
FF_
REGd
,
%
1
))
FIND_MIN_MAX
((
%%
FF_
REGd
,
%
1
,
2
))
FIND_MIN_MAX
((
%
0
,
%
1
,
8
))
"movq %%mm7, %%mm4
\n\t
"
...
...
@@ -1218,13 +1218,13 @@ FIND_MIN_MAX((%0, %1, 8))
"paddb %%mm2, %%mm0
\n\t
"
"paddb %%mm3, %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm2
\n\t
"
// L11
"movq (%%"
FF_REG_a
"), %%mm2
\n\t
"
// L11
"movq %%mm2, %%mm3
\n\t
"
// L11
"movq %%mm2, %%mm4
\n\t
"
// L11
"psllq $8, %%mm3
\n\t
"
"psrlq $8, %%mm4
\n\t
"
"movd -4(%%"
REG_a
"), %%mm5
\n\t
"
"movd 8(%%"
REG_a
"), %%mm6
\n\t
"
"movd -4(%%"
FF_REG_a
"), %%mm5
\n\t
"
"movd 8(%%"
FF_REG_a
"), %%mm6
\n\t
"
"psrlq $24, %%mm5
\n\t
"
"psllq $56, %%mm6
\n\t
"
"por %%mm5, %%mm3
\n\t
"
// L01
...
...
@@ -1305,19 +1305,19 @@ FIND_MIN_MAX((%0, %1, 8))
*/
//DERING_CORE(dst ,src ,ppsx ,psx ,sx ,pplx ,plx ,lx ,t0 ,t1)
DERING_CORE
((
%%
REGa
)
,(
%%
REGa
,
%
1
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGa
,
%
1
)
,(
%%
REGa
,
%
1
,
2
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGa
,
%
1
,
2
),(
%
0
,
%
1
,
4
)
,
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
4
)
,(
%%
REGd
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGd
)
,(
%%
REGd
,
%
1
)
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGd
,
%
1
)
,(
%%
REGd
,
%
1
,
2
),
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGd
,
%
1
,
2
),(
%
0
,
%
1
,
8
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
8
)
,(
%%
REGd
,
%
1
,
4
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGa
)
,(
%%
FF_
REGa
,
%
1
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGa
,
%
1
)
,(
%%
FF_
REGa
,
%
1
,
2
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGa
,
%
1
,
2
),(
%
0
,
%
1
,
4
)
,
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
4
)
,(
%%
FF_
REGd
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGd
)
,(
%%
FF_
REGd
,
%
1
)
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGd
,
%
1
)
,(
%%
FF_
REGd
,
%
1
,
2
),
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGd
,
%
1
,
2
),(
%
0
,
%
1
,
8
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
8
)
,(
%%
FF_
REGd
,
%
1
,
4
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
"1:
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
),
"m"
(
c
->
pQPb2
),
"q"
(
tmp
)
NAMED_CONSTRAINTS_ADD
(
deringThreshold
,
b00
,
b02
,
b08
)
:
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_sp
:
"%"
FF_REG_a
,
"%"
FF_REG_d
,
"%"
FF_
REG_sp
);
#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
int
y
;
...
...
@@ -1452,27 +1452,27 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
4
*
stride
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_c
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_c
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq (%0), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm1
\n\t
"
PAVGB
(
%%
mm1
,
%%
mm0
)
"movq %%mm0, (%%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_a
")
\n\t
"
"movq (%0, %1, 4), %%mm0
\n\t
"
PAVGB
(
%%
mm0
,
%%
mm1
)
"movq %%mm1, (%%"
REG_a
", %1, 2)
\n\t
"
"movq (%%"
REG_c
", %1), %%mm1
\n\t
"
"movq %%mm1, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq (%%"
FF_REG_c
", %1), %%mm1
\n\t
"
PAVGB
(
%%
mm1
,
%%
mm0
)
"movq %%mm0, (%%"
REG_c
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_c
")
\n\t
"
"movq (%0, %1, 8), %%mm0
\n\t
"
PAVGB
(
%%
mm0
,
%%
mm1
)
"movq %%mm1, (%%"
REG_c
", %1, 2)
\n\t
"
"movq %%mm1, (%%"
FF_REG_c
", %1, 2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
#else
int
a
,
b
,
x
;
...
...
@@ -1505,10 +1505,10 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
stride
*
3
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%%"
REG_d
", %1, 4), %%"
REG_c
"
\n\t
"
"add %1, %%"
REG_c
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
"lea (%%"
FF_REG_d
", %1, 4), %%"
FF_REG_c
"
\n\t
"
"add %1, %%"
FF_REG_c
"
\n\t
"
#if TEMPLATE_PP_SSE2
"pxor %%xmm7, %%xmm7
\n\t
"
#define REAL_DEINT_CUBIC(a,b,c,d,e)\
...
...
@@ -1554,17 +1554,17 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
#endif //TEMPLATE_PP_SSE2
#define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
DEINT_CUBIC
((
%
0
)
,
(
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
,
%
1
))
DEINT_CUBIC
((
%%
REGa
,
%
1
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
),
(
%
0
,
%
1
,
8
))
DEINT_CUBIC
((
%
0
,
%
1
,
4
)
,
(
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGc
))
DEINT_CUBIC
((
%%
REGd
,
%
1
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGd
,
%
1
,
4
),
(
%%
REGc
)
,
(
%%
REGc
,
%
1
,
2
))
DEINT_CUBIC
((
%
0
)
,
(
%%
FF_REGa
,
%
1
),
(
%%
FF_REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_
REGd
,
%
1
))
DEINT_CUBIC
((
%%
FF_REGa
,
%
1
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_
REGd
,
%
1
),
(
%
0
,
%
1
,
8
))
DEINT_CUBIC
((
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
,
%
1
),
(
%%
FF_REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_
REGc
))
DEINT_CUBIC
((
%%
FF_REGd
,
%
1
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_REGd
,
%
1
,
4
),
(
%%
FF_REGc
)
,
(
%%
FF_
REGc
,
%
1
,
2
))
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
#if TEMPLATE_PP_SSE2
XMM_CLOBBERS
(
"%xmm0"
,
"%xmm1"
,
"%xmm2"
,
"%xmm3"
,
"%xmm7"
,)
#endif
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_c
"%"
FF_REG_a
,
"%"
FF_REG_d
,
"%"
FF_
REG_c
);
#undef REAL_DEINT_CUBIC
#else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
...
...
@@ -1592,8 +1592,8 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
stride
*
4
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"movq (%2), %%mm0
\n\t
"
// 0 1 2 3 4 5 6 7 8 9 10
...
...
@@ -1629,14 +1629,14 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp
#define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
DEINT_FF
((
%
0
)
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
))
DEINT_FF
((
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
)
DEINT_FF
((
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
))
DEINT_FF
((
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGd
,
%
1
,
4
))
DEINT_FF
((
%
0
)
,
(
%%
FF_REGa
)
,
(
%%
FF_REGa
,
%
1
),
(
%%
FF_
REGa
,
%
1
,
2
))
DEINT_FF
((
%%
FF_REGa
,
%
1
),
(
%%
FF_REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_
REGd
)
)
DEINT_FF
((
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_REGd
,
%
1
),
(
%%
FF_
REGd
,
%
1
,
2
))
DEINT_FF
((
%%
FF_REGd
,
%
1
),
(
%%
FF_REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_
REGd
,
%
1
,
4
))
"movq %%mm0, (%2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int
x
;
...
...
@@ -1671,8 +1671,8 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp
#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
src
+=
stride
*
4
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"movq (%2), %%mm0
\n\t
"
"movq (%3), %%mm1
\n\t
"
...
...
@@ -1714,19 +1714,19 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp
#define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
)
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
)
,
(
%%
REGa
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
REGa
,
%
1
)
,
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
)
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
)
,
(
%%
REGd
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
REGd
,
%
1
)
,
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGd
,
%
1
,
4
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
)
,
(
%%
FF_REGa
)
,
(
%%
FF_
REGa
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGa
)
,
(
%%
FF_REGa
,
%
1
)
,
(
%%
FF_
REGa
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
FF_REGa
,
%
1
)
,
(
%%
FF_
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_
REGd
)
)
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_
REGd
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGd
)
,
(
%%
FF_REGd
,
%
1
)
,
(
%%
FF_
REGd
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
FF_REGd
,
%
1
)
,
(
%%
FF_
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_
REGd
,
%
1
,
4
))
"movq %%mm0, (%2)
\n\t
"
"movq %%mm1, (%3)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"r"
(
tmp
),
"r"
(
tmp2
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
int
x
;
...
...
@@ -1772,49 +1772,49 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
4
*
stride
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%2), %%mm0
\n\t
"
// L0
"movq (%%"
REG_a
"), %%mm1
\n\t
"
// L2
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
// L2
PAVGB
(
%%
mm1
,
%%
mm0
)
// L0+L2
"movq (%0), %%mm2
\n\t
"
// L1
PAVGB
(
%%
mm2
,
%%
mm0
)
"movq %%mm0, (%0)
\n\t
"
"movq (%%"
REG_a
", %1), %%mm0
\n\t
"
// L3
"movq (%%"
FF_REG_a
", %1), %%mm0
\n\t
"
// L3
PAVGB
(
%%
mm0
,
%%
mm2
)
// L1+L3
PAVGB
(
%%
mm1
,
%%
mm2
)
// 2L2 + L1 + L3
"movq %%mm2, (%%"
REG_a
")
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm2
\n\t
"
// L4
"movq %%mm2, (%%"
FF_REG_a
")
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm2
\n\t
"
// L4
PAVGB
(
%%
mm2
,
%%
mm1
)
// L2+L4
PAVGB
(
%%
mm0
,
%%
mm1
)
// 2L3 + L2 + L4
"movq %%mm1, (%%"
REG_a
", %1)
\n\t
"
"movq %%mm1, (%%"
FF_REG_a
", %1)
\n\t
"
"movq (%0, %1, 4), %%mm1
\n\t
"
// L5
PAVGB
(
%%
mm1
,
%%
mm0
)
// L3+L5
PAVGB
(
%%
mm2
,
%%
mm0
)
// 2L4 + L3 + L5
"movq %%mm0, (%%"
REG_a
", %1, 2)
\n\t
"
"movq (%%"
REG_d
"), %%mm0
\n\t
"
// L6
"movq %%mm0, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq (%%"
FF_REG_d
"), %%mm0
\n\t
"
// L6
PAVGB
(
%%
mm0
,
%%
mm2
)
// L4+L6
PAVGB
(
%%
mm1
,
%%
mm2
)
// 2L5 + L4 + L6
"movq %%mm2, (%0, %1, 4)
\n\t
"
"movq (%%"
REG_d
", %1), %%mm2
\n\t
"
// L7
"movq (%%"
FF_REG_d
", %1), %%mm2
\n\t
"
// L7
PAVGB
(
%%
mm2
,
%%
mm1
)
// L5+L7
PAVGB
(
%%
mm0
,
%%
mm1
)
// 2L6 + L5 + L7
"movq %%mm1, (%%"
REG_d
")
\n\t
"
"movq (%%"
REG_d
", %1, 2), %%mm1
\n\t
"
// L8
"movq %%mm1, (%%"
FF_REG_d
")
\n\t
"
"movq (%%"
FF_REG_d
", %1, 2), %%mm1
\n\t
"
// L8
PAVGB
(
%%
mm1
,
%%
mm0
)
// L6+L8
PAVGB
(
%%
mm2
,
%%
mm0
)
// 2L7 + L6 + L8
"movq %%mm0, (%%"
REG_d
", %1)
\n\t
"
"movq %%mm0, (%%"
FF_REG_d
", %1)
\n\t
"
"movq (%0, %1, 8), %%mm0
\n\t
"
// L9
PAVGB
(
%%
mm0
,
%%
mm2
)
// L7+L9
PAVGB
(
%%
mm1
,
%%
mm2
)
// 2L8 + L7 + L9
"movq %%mm2, (%%"
REG_d
", %1, 2)
\n\t
"
"movq %%mm2, (%%"
FF_REG_d
", %1, 2)
\n\t
"
"movq %%mm1, (%2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int
a
,
b
,
c
,
x
;
...
...
@@ -1874,57 +1874,57 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
src
+=
4
*
stride
;
#if TEMPLATE_PP_MMXEXT
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%0), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm0, %%mm3
\n\t
"
"pmaxub %%mm1, %%mm0
\n\t
"
"pminub %%mm3, %%mm1
\n\t
"
"pmaxub %%mm2, %%mm1
\n\t
"
"pminub %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_a
")
\n\t
"
"movq (%0, %1, 4), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"pmaxub %%mm1, %%mm2
\n\t
"
"pminub %%mm3, %%mm1
\n\t
"
"pmaxub %%mm0, %%mm1
\n\t
"
"pminub %%mm1, %%mm2
\n\t
"
"movq %%mm2, (%%"
REG_a
", %1, 2)
\n\t
"
"movq %%mm2, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq (%%"
REG_d
"), %%mm2
\n\t
"
"movq (%%"
REG_d
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_d
"), %%mm2
\n\t
"
"movq (%%"
FF_REG_d
", %1), %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"pmaxub %%mm0, %%mm2
\n\t
"
"pminub %%mm3, %%mm0
\n\t
"
"pmaxub %%mm1, %%mm0
\n\t
"
"pminub %%mm0, %%mm2
\n\t
"
"movq %%mm2, (%%"
REG_d
")
\n\t
"
"movq %%mm2, (%%"
FF_REG_d
")
\n\t
"
"movq (%%"
REG_d
", %1, 2), %%mm2
\n\t
"
"movq (%%"
FF_REG_d
", %1, 2), %%mm2
\n\t
"
"movq (%0, %1, 8), %%mm0
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"pmaxub %%mm0, %%mm2
\n\t
"
"pminub %%mm3, %%mm0
\n\t
"
"pmaxub %%mm1, %%mm0
\n\t
"
"pminub %%mm0, %%mm2
\n\t
"
"movq %%mm2, (%%"
REG_d
", %1, 2)
\n\t
"
"movq %%mm2, (%%"
FF_REG_d
", %1, 2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else // MMX without MMX2
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"pxor %%mm7, %%mm7
\n\t
"
...
...
@@ -1954,13 +1954,13 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
"movq %%mm0, " #b " \n\t"
#define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
MEDIAN
((
%
0
)
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
))
MEDIAN
((
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
))
MEDIAN
((
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
))
MEDIAN
((
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
))
MEDIAN
((
%
0
)
,
(
%%
FF_REGa
)
,
(
%%
FF_
REGa
,
%
1
))
MEDIAN
((
%%
FF_REGa
,
%
1
),
(
%%
FF_
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
))
MEDIAN
((
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_
REGd
,
%
1
))
MEDIAN
((
%%
FF_REGd
,
%
1
),
(
%%
FF_
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
))
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#endif //TEMPLATE_PP_MMXEXT
#else //TEMPLATE_PP_MMX
...
...
@@ -1992,17 +1992,17 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
static
inline
void
RENAME
(
transpose1
)(
uint8_t
*
dst1
,
uint8_t
*
dst2
,
const
uint8_t
*
src
,
int
srcStride
)
{
__asm__
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%0), %%mm0
\n\t
"
// 12345678
"movq (%%"
REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq %%mm0, %%mm2
\n\t
"
// 12345678
"punpcklbw %%mm1, %%mm0
\n\t
"
// 1a2b3c4d
"punpckhbw %%mm1, %%mm2
\n\t
"
// 5e6f7g8h
"movq (%%"
REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm3
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"punpcklbw %%mm3, %%mm1
\n\t
"
"punpckhbw %%mm3, %%mm4
\n\t
"
...
...
@@ -2029,16 +2029,16 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
"psrlq $32, %%mm1
\n\t
"
"movd %%mm1, 112(%3)
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_a
"
\n\t
"
"movq (%0, %1, 4), %%mm0
\n\t
"
// 12345678
"movq (%%"
REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq %%mm0, %%mm2
\n\t
"
// 12345678
"punpcklbw %%mm1, %%mm0
\n\t
"
// 1a2b3c4d
"punpckhbw %%mm1, %%mm2
\n\t
"
// 5e6f7g8h
"movq (%%"
REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm3
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"punpcklbw %%mm3, %%mm1
\n\t
"
"punpckhbw %%mm3, %%mm4
\n\t
"
...
...
@@ -2067,7 +2067,7 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
::
"r"
(
src
),
"r"
((
x86_reg
)
srcStride
),
"r"
(
dst1
),
"r"
(
dst2
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
}
...
...
@@ -2077,8 +2077,8 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
static
inline
void
RENAME
(
transpose2
)(
uint8_t
*
dst
,
int
dstStride
,
const
uint8_t
*
src
)
{
__asm__
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
",%1,4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
",%1,4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%2), %%mm0
\n\t
"
// 12345678
...
...
@@ -2102,16 +2102,16 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t
"movd %%mm0, (%0)
\n\t
"
"psrlq $32, %%mm0
\n\t
"
"movd %%mm0, (%%"
REG_a
")
\n\t
"
"movd %%mm3, (%%"
REG_a
", %1)
\n\t
"
"movd %%mm0, (%%"
FF_REG_a
")
\n\t
"
"movd %%mm3, (%%"
FF_REG_a
", %1)
\n\t
"
"psrlq $32, %%mm3
\n\t
"
"movd %%mm3, (%%"
REG_a
", %1, 2)
\n\t
"
"movd %%mm3, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movd %%mm2, (%0, %1, 4)
\n\t
"
"psrlq $32, %%mm2
\n\t
"
"movd %%mm2, (%%"
REG_d
")
\n\t
"
"movd %%mm1, (%%"
REG_d
", %1)
\n\t
"
"movd %%mm2, (%%"
FF_REG_d
")
\n\t
"
"movd %%mm1, (%%"
FF_REG_d
", %1)
\n\t
"
"psrlq $32, %%mm1
\n\t
"
"movd %%mm1, (%%"
REG_d
", %1, 2)
\n\t
"
"movd %%mm1, (%%"
FF_REG_d
", %1, 2)
\n\t
"
"movq 64(%2), %%mm0
\n\t
"
// 12345678
...
...
@@ -2135,19 +2135,19 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t
"movd %%mm0, 4(%0)
\n\t
"
"psrlq $32, %%mm0
\n\t
"
"movd %%mm0, 4(%%"
REG_a
")
\n\t
"
"movd %%mm3, 4(%%"
REG_a
", %1)
\n\t
"
"movd %%mm0, 4(%%"
FF_REG_a
")
\n\t
"
"movd %%mm3, 4(%%"
FF_REG_a
", %1)
\n\t
"
"psrlq $32, %%mm3
\n\t
"
"movd %%mm3, 4(%%"
REG_a
", %1, 2)
\n\t
"
"movd %%mm3, 4(%%"
FF_REG_a
", %1, 2)
\n\t
"
"movd %%mm2, 4(%0, %1, 4)
\n\t
"
"psrlq $32, %%mm2
\n\t
"
"movd %%mm2, 4(%%"
REG_d
")
\n\t
"
"movd %%mm1, 4(%%"
REG_d
", %1)
\n\t
"
"movd %%mm2, 4(%%"
FF_REG_d
")
\n\t
"
"movd %%mm1, 4(%%"
FF_REG_d
", %1)
\n\t
"
"psrlq $32, %%mm1
\n\t
"
"movd %%mm1, 4(%%"
REG_d
", %1, 2)
\n\t
"
"movd %%mm1, 4(%%"
FF_REG_d
", %1, 2)
\n\t
"
::
"r"
(
dst
),
"r"
((
x86_reg
)
dstStride
),
"r"
(
src
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
}
#endif //TEMPLATE_PP_MMX
...
...
@@ -2166,9 +2166,9 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
//#define L1_DIFF //u should change the thresholds too if u try that one
#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
__asm__
volatile
(
"lea (%2, %2, 2), %%"
REG_a
"
\n\t
"
// 3*stride
"lea (%2, %2, 4), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%2, %2, 2), %%"
FF_REG_a
"
\n\t
"
// 3*stride
"lea (%2, %2, 4), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
// 0 1 2 3 4 5 6 7 8 9
// %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2
//FIXME reorder?
...
...
@@ -2179,21 +2179,21 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
"psadbw (%1, %2), %%mm1
\n\t
"
// |L1-R1|
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"psadbw (%1, %2, 2), %%mm2
\n\t
"
// |L2-R2|
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"psadbw (%1, %%"
REG_a
"), %%mm3
\n\t
"
// |L3-R3|
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"psadbw (%1, %%"
FF_REG_a
"), %%mm3
\n\t
"
// |L3-R3|
"movq (%0, %2, 4), %%mm4
\n\t
"
// L4
"paddw %%mm1, %%mm0
\n\t
"
"psadbw (%1, %2, 4), %%mm4
\n\t
"
// |L4-R4|
"movq (%0, %%"
REG_d
"), %%mm5
\n\t
"
// L5
"movq (%0, %%"
FF_REG_d
"), %%mm5
\n\t
"
// L5
"paddw %%mm2, %%mm0
\n\t
"
"psadbw (%1, %%"
REG_d
"), %%mm5
\n\t
"
// |L5-R5|
"movq (%0, %%"
REG_a
", 2), %%mm6
\n\t
"
// L6
"psadbw (%1, %%"
FF_REG_d
"), %%mm5
\n\t
"
// |L5-R5|
"movq (%0, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// L6
"paddw %%mm3, %%mm0
\n\t
"
"psadbw (%1, %%"
REG_a
", 2), %%mm6
\n\t
"
// |L6-R6|
"movq (%0, %%"
REG_c
"), %%mm7
\n\t
"
// L7
"psadbw (%1, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// |L6-R6|
"movq (%0, %%"
FF_REG_c
"), %%mm7
\n\t
"
// L7
"paddw %%mm4, %%mm0
\n\t
"
"psadbw (%1, %%"
REG_c
"), %%mm7
\n\t
"
// |L7-R7|
"psadbw (%1, %%"
FF_REG_c
"), %%mm7
\n\t
"
// |L7-R7|
"paddw %%mm5, %%mm6
\n\t
"
"paddw %%mm7, %%mm6
\n\t
"
"paddw %%mm6, %%mm0
\n\t
"
...
...
@@ -2242,11 +2242,11 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
L2_DIFF_CORE
((
%
0
)
,
(
%
1
))
L2_DIFF_CORE
((
%
0
,
%
2
)
,
(
%
1
,
%
2
))
L2_DIFF_CORE
((
%
0
,
%
2
,
2
)
,
(
%
1
,
%
2
,
2
))
L2_DIFF_CORE
((
%
0
,
%%
REGa
)
,
(
%
1
,
%%
REGa
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGa
)
,
(
%
1
,
%%
FF_
REGa
))
L2_DIFF_CORE
((
%
0
,
%
2
,
4
)
,
(
%
1
,
%
2
,
4
))
L2_DIFF_CORE
((
%
0
,
%%
REGd
)
,
(
%
1
,
%%
REGd
))
L2_DIFF_CORE
((
%
0
,
%%
REGa
,
2
),
(
%
1
,
%%
REGa
,
2
))
L2_DIFF_CORE
((
%
0
,
%%
REGc
)
,
(
%
1
,
%%
REGc
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGd
)
,
(
%
1
,
%%
FF_
REGd
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGa
,
2
),
(
%
1
,
%%
FF_
REGa
,
2
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGc
)
,
(
%
1
,
%%
FF_
REGc
))
#endif //L1_DIFF
...
...
@@ -2255,94 +2255,94 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"paddd %%mm0, %%mm4
\n\t
"
"movd %%mm4, %%ecx
\n\t
"
"shll $2, %%ecx
\n\t
"
"mov %3, %%"
REG_d
"
\n\t
"
"addl -4(%%"
REG_d
"), %%ecx
\n\t
"
"addl 4(%%"
REG_d
"), %%ecx
\n\t
"
"addl -1024(%%"
REG_d
"), %%ecx
\n\t
"
"mov %3, %%"
FF_REG_d
"
\n\t
"
"addl -4(%%"
FF_REG_d
"), %%ecx
\n\t
"
"addl 4(%%"
FF_REG_d
"), %%ecx
\n\t
"
"addl -1024(%%"
FF_REG_d
"), %%ecx
\n\t
"
"addl $4, %%ecx
\n\t
"
"addl 1024(%%"
REG_d
"), %%ecx
\n\t
"
"addl 1024(%%"
FF_REG_d
"), %%ecx
\n\t
"
"shrl $3, %%ecx
\n\t
"
"movl %%ecx, (%%"
REG_d
")
\n\t
"
"movl %%ecx, (%%"
FF_REG_d
")
\n\t
"
// "mov %3, %%"
REG_c"
\n\t"
// "mov %%"
REG_c", test
\n\t"
// "mov %3, %%"
FF_REG_c"
\n\t"
// "mov %%"
FF_REG_c", test
\n\t"
// "jmp 4f \n\t"
"cmpl 512(%%"
REG_d
"), %%ecx
\n\t
"
"cmpl 512(%%"
FF_REG_d
"), %%ecx
\n\t
"
" jb 2f
\n\t
"
"cmpl 516(%%"
REG_d
"), %%ecx
\n\t
"
"cmpl 516(%%"
FF_REG_d
"), %%ecx
\n\t
"
" jb 1f
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %2, 4), %%mm4
\n\t
"
// L4
"movq (%0, %%"
REG_d
"), %%mm5
\n\t
"
// L5
"movq (%0, %%"
REG_a
", 2), %%mm6
\n\t
"
// L6
"movq (%0, %%"
REG_c
"), %%mm7
\n\t
"
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm5
\n\t
"
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm7
\n\t
"
// L7
"movq %%mm0, (%1)
\n\t
"
// L0
"movq %%mm1, (%1, %2)
\n\t
"
// L1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// L3
"movq %%mm4, (%1, %2, 4)
\n\t
"
// L4
"movq %%mm5, (%1, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm6, (%1, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%1, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm5, (%1, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm6, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%1, %%"
FF_REG_c
")
\n\t
"
// L7
"jmp 4f
\n\t
"
"1:
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
PAVGB
((
%
1
),
%%
mm0
)
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
PAVGB
((
%
1
,
%
2
),
%%
mm1
)
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
PAVGB
((
%
1
,
%
2
,
2
),
%%
mm2
)
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
PAVGB
((
%
1
,
%%
REGa
),
%%
mm3
)
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
PAVGB
((
%
1
,
%%
FF_REGa
),
%%
mm3
)
// L3
"movq (%0, %2, 4), %%mm4
\n\t
"
// L4
PAVGB
((
%
1
,
%
2
,
4
),
%%
mm4
)
// L4
"movq (%0, %%"
REG_d
"), %%mm5
\n\t
"
// L5
PAVGB
((
%
1
,
%%
REGd
),
%%
mm5
)
// L5
"movq (%0, %%"
REG_a
", 2), %%mm6
\n\t
"
// L6
PAVGB
((
%
1
,
%%
REGa
,
2
),
%%
mm6
)
// L6
"movq (%0, %%"
REG_c
"), %%mm7
\n\t
"
// L7
PAVGB
((
%
1
,
%%
REGc
),
%%
mm7
)
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm5
\n\t
"
// L5
PAVGB
((
%
1
,
%%
FF_REGd
),
%%
mm5
)
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// L6
PAVGB
((
%
1
,
%%
FF_REGa
,
2
),
%%
mm6
)
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm7
\n\t
"
// L7
PAVGB
((
%
1
,
%%
FF_REGc
),
%%
mm7
)
// L7
"movq %%mm0, (%1)
\n\t
"
// R0
"movq %%mm1, (%1, %2)
\n\t
"
// R1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// R2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// R3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// R3
"movq %%mm4, (%1, %2, 4)
\n\t
"
// R4
"movq %%mm5, (%1, %%"
REG_d
")
\n\t
"
// R5
"movq %%mm6, (%1, %%"
REG_a
", 2)
\n\t
"
// R6
"movq %%mm7, (%1, %%"
REG_c
")
\n\t
"
// R7
"movq %%mm5, (%1, %%"
FF_REG_d
")
\n\t
"
// R5
"movq %%mm6, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// R6
"movq %%mm7, (%1, %%"
FF_REG_c
")
\n\t
"
// R7
"movq %%mm0, (%0)
\n\t
"
// L0
"movq %%mm1, (%0, %2)
\n\t
"
// L1
"movq %%mm2, (%0, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%0, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%0, %%"
FF_REG_a
")
\n\t
"
// L3
"movq %%mm4, (%0, %2, 4)
\n\t
"
// L4
"movq %%mm5, (%0, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm6, (%0, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%0, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm5, (%0, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm6, (%0, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%0, %%"
FF_REG_c
")
\n\t
"
// L7
"jmp 4f
\n\t
"
"2:
\n\t
"
"cmpl 508(%%"
REG_d
"), %%ecx
\n\t
"
"cmpl 508(%%"
FF_REG_d
"), %%ecx
\n\t
"
" jb 3f
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"movq (%1), %%mm4
\n\t
"
// R0
"movq (%1, %2), %%mm5
\n\t
"
// R1
"movq (%1, %2, 2), %%mm6
\n\t
"
// R2
"movq (%1, %%"
REG_a
"), %%mm7
\n\t
"
// R3
"movq (%1, %%"
FF_REG_a
"), %%mm7
\n\t
"
// R3
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2354,20 +2354,20 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"movq %%mm0, (%1)
\n\t
"
// R0
"movq %%mm1, (%1, %2)
\n\t
"
// R1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// R2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// R3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// R3
"movq %%mm0, (%0)
\n\t
"
// L0
"movq %%mm1, (%0, %2)
\n\t
"
// L1
"movq %%mm2, (%0, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%0, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%0, %%"
FF_REG_a
")
\n\t
"
// L3
"movq (%0, %2, 4), %%mm0
\n\t
"
// L4
"movq (%0, %%"
REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
REG_c
"), %%mm3
\n\t
"
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm3
\n\t
"
// L7
"movq (%1, %2, 4), %%mm4
\n\t
"
// R4
"movq (%1, %%"
REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
REG_c
"), %%mm7
\n\t
"
// R7
"movq (%1, %%"
FF_REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
FF_REG_c
"), %%mm7
\n\t
"
// R7
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2377,26 +2377,26 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
PAVGB
(
%%
mm6
,
%%
mm2
)
PAVGB
(
%%
mm7
,
%%
mm3
)
"movq %%mm0, (%1, %2, 4)
\n\t
"
// R4
"movq %%mm1, (%1, %%"
REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
REG_c
")
\n\t
"
// R7
"movq %%mm1, (%1, %%"
FF_REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
FF_REG_c
")
\n\t
"
// R7
"movq %%mm0, (%0, %2, 4)
\n\t
"
// L4
"movq %%mm1, (%0, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm1, (%0, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
FF_REG_c
")
\n\t
"
// L7
"jmp 4f
\n\t
"
"3:
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"movq (%1), %%mm4
\n\t
"
// R0
"movq (%1, %2), %%mm5
\n\t
"
// R1
"movq (%1, %2, 2), %%mm6
\n\t
"
// R2
"movq (%1, %%"
REG_a
"), %%mm7
\n\t
"
// R3
"movq (%1, %%"
FF_REG_a
"), %%mm7
\n\t
"
// R3
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2412,20 +2412,20 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"movq %%mm0, (%1)
\n\t
"
// R0
"movq %%mm1, (%1, %2)
\n\t
"
// R1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// R2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// R3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// R3
"movq %%mm0, (%0)
\n\t
"
// L0
"movq %%mm1, (%0, %2)
\n\t
"
// L1
"movq %%mm2, (%0, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%0, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%0, %%"
FF_REG_a
")
\n\t
"
// L3
"movq (%0, %2, 4), %%mm0
\n\t
"
// L4
"movq (%0, %%"
REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
REG_c
"), %%mm3
\n\t
"
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm3
\n\t
"
// L7
"movq (%1, %2, 4), %%mm4
\n\t
"
// R4
"movq (%1, %%"
REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
REG_c
"), %%mm7
\n\t
"
// R7
"movq (%1, %%"
FF_REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
FF_REG_c
"), %%mm7
\n\t
"
// R7
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2439,19 +2439,19 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
PAVGB
(
%%
mm6
,
%%
mm2
)
PAVGB
(
%%
mm7
,
%%
mm3
)
"movq %%mm0, (%1, %2, 4)
\n\t
"
// R4
"movq %%mm1, (%1, %%"
REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
REG_c
")
\n\t
"
// R7
"movq %%mm1, (%1, %%"
FF_REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
FF_REG_c
")
\n\t
"
// R7
"movq %%mm0, (%0, %2, 4)
\n\t
"
// L4
"movq %%mm1, (%0, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm1, (%0, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
FF_REG_c
")
\n\t
"
// L7
"4:
\n\t
"
::
"r"
(
src
),
"r"
(
tempBlurred
),
"r"
((
x86_reg
)
stride
),
"m"
(
tempBlurredPast
)
NAMED_CONSTRAINTS_ADD
(
b80
)
:
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_c
,
"memory"
:
"%"
FF_REG_a
,
"%"
FF_REG_d
,
"%"
FF_
REG_c
,
"memory"
);
#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
{
...
...
@@ -2556,19 +2556,19 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
);
__asm__
volatile
(
"lea (%2, %3), %%"
REG_a
"
\n\t
"
"lea (%2, %3), %%"
FF_REG_a
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
"movq (%2), %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm1, %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"psubb %%mm1, %%mm0
\n\t
"
// mm0 = difference
"paddb %%mm7, %%mm0
\n\t
"
"pcmpgtb %%mm6, %%mm0
\n\t
"
"movq (%%"
REG_a
",%3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
",%3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -2576,7 +2576,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -2584,7 +2584,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"lea (%%"
REG_a
", %3, 4), %%"
REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %3, 4), %%"
FF_REG_a
"
\n\t
"
"movq (%2, %3, 4), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
...
...
@@ -2594,7 +2594,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -2602,7 +2602,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -2610,7 +2610,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -2626,7 +2626,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 4), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 4), %%mm1
\n\t
"
"psubb %%mm1, %%mm2
\n\t
"
"paddb %%mm7, %%mm2
\n\t
"
"pcmpgtb %%mm6, %%mm2
\n\t
"
...
...
@@ -2651,7 +2651,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
:
"=m"
(
eq_mask
),
"=m"
(
dc_mask
)
:
"r"
(
src
),
"r"
((
x86_reg
)
step
),
"m"
(
c
->
pQPb
),
"m"
(
c
->
ppMode
.
flatnessThreshold
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
both_masks
=
dc_mask
&
eq_mask
;
...
...
@@ -2851,12 +2851,12 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"punpckhbw %%mm7, %%mm1
\n\t
"
// high part of line 0
"movq (%0, %1), %%mm2
\n\t
"
"lea (%0, %1, 2), %%"
REG_a
"
\n\t
"
"lea (%0, %1, 2), %%"
FF_REG_a
"
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// low part of line 1
"punpckhbw %%mm7, %%mm3
\n\t
"
// high part of line 1
"movq (%%"
REG_a
"), %%mm4
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm4
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
// low part of line 2
"punpckhbw %%mm7, %%mm5
\n\t
"
// high part of line 2
...
...
@@ -2873,7 +2873,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2
"movq (%%"
REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm2
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// L3
"punpckhbw %%mm7, %%mm3
\n\t
"
// H3
...
...
@@ -2885,7 +2885,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"movq %%mm0, (%4)
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, 8(%4)
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
// L4
"punpckhbw %%mm7, %%mm1
\n\t
"
// H4
...
...
@@ -2899,7 +2899,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - H3 + H4
"lea (%%"
REG_a
", %1), %0
\n\t
"
"lea (%%"
FF_REG_a
", %1), %0
\n\t
"
"psllw $2, %%mm2
\n\t
"
// 4L3 - 4L4
"psllw $2, %%mm3
\n\t
"
// 4H3 - 4H4
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4
...
...
@@ -2914,10 +2914,10 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4 - 2L5
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - 5H3 + 5H4 - 2H5
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpcklbw %%mm7, %%mm6
\n\t
"
// L6
"psubw %%mm6, %%mm2
\n\t
"
// L5 - L6
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpckhbw %%mm7, %%mm6
\n\t
"
// H6
"psubw %%mm6, %%mm3
\n\t
"
// H5 - H6
...
...
@@ -3068,7 +3068,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
:
"+r"
(
temp_src
)
:
"r"
((
x86_reg
)
step
),
"m"
(
c
->
pQPb
),
"m"
(
eq_mask
),
"r"
(
tmp
)
NAMED_CONSTRAINTS_ADD
(
w05
,
w20
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
}
/*if(step==16){
...
...
@@ -3099,10 +3099,10 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
if
(
levelFix
){
#if TEMPLATE_PP_MMX && HAVE_6REGS
__asm__
volatile
(
"movq (%%"
REG_a
"), %%mm2
\n\t
"
// packedYOffset
"movq 8(%%"
REG_a
"), %%mm3
\n\t
"
// packedYScale
"lea (%2,%4), %%"
REG_a
"
\n\t
"
"lea (%3,%5), %%"
REG_d
"
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm2
\n\t
"
// packedYOffset
"movq 8(%%"
FF_REG_a
"), %%mm3
\n\t
"
// packedYScale
"lea (%2,%4), %%"
FF_REG_a
"
\n\t
"
"lea (%3,%5), %%"
FF_REG_d
"
\n\t
"
"pxor %%mm4, %%mm4
\n\t
"
#if TEMPLATE_PP_MMXEXT
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
...
...
@@ -3159,11 +3159,11 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
REAL_SCALED_CPY(src1, src2, dst1, dst2)
SCALED_CPY
((
%
2
)
,
(
%
2
,
%
4
)
,
(
%
3
)
,
(
%
3
,
%
5
))
SCALED_CPY
((
%
2
,
%
4
,
2
),
(
%%
REGa
,
%
4
,
2
),
(
%
3
,
%
5
,
2
),
(
%%
REGd
,
%
5
,
2
))
SCALED_CPY
((
%
2
,
%
4
,
4
),
(
%%
REGa
,
%
4
,
4
),
(
%
3
,
%
5
,
4
),
(
%%
REGd
,
%
5
,
4
))
"lea (%%"
REG_a
",%4,4), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_d
",%5,4), %%"
REG_d
"
\n\t
"
SCALED_CPY
((
%%
REGa
,
%
4
),
(
%%
REGa
,
%
4
,
2
),
(
%%
REGd
,
%
5
),
(
%%
REGd
,
%
5
,
2
))
SCALED_CPY
((
%
2
,
%
4
,
2
),
(
%%
FF_REGa
,
%
4
,
2
),
(
%
3
,
%
5
,
2
),
(
%%
FF_
REGd
,
%
5
,
2
))
SCALED_CPY
((
%
2
,
%
4
,
4
),
(
%%
FF_REGa
,
%
4
,
4
),
(
%
3
,
%
5
,
4
),
(
%%
FF_
REGd
,
%
5
,
4
))
"lea (%%"
FF_REG_a
",%4,4), %%"
FF_
REG_a
"
\n\t
"
"lea (%%"
FF_REG_d
",%5,4), %%"
FF_
REG_d
"
\n\t
"
SCALED_CPY
((
%%
FF_REGa
,
%
4
),
(
%%
FF_REGa
,
%
4
,
2
),
(
%%
FF_REGd
,
%
5
),
(
%%
FF_
REGd
,
%
5
,
2
))
:
"=&a"
(
packedOffsetAndScale
)
...
...
@@ -3172,7 +3172,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
"r"
(
dst
),
"r"
((
x86_reg
)
srcStride
),
"r"
((
x86_reg
)
dstStride
)
:
"%"
REG_d
:
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMX && HAVE_6REGS
for
(
i
=
0
;
i
<
8
;
i
++
)
...
...
@@ -3182,8 +3182,8 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
}
else
{
#if TEMPLATE_PP_MMX && HAVE_6REGS
__asm__
volatile
(
"lea (%0,%2), %%"
REG_a
"
\n\t
"
"lea (%1,%3), %%"
REG_d
"
\n\t
"
"lea (%0,%2), %%"
FF_REG_a
"
\n\t
"
"lea (%1,%3), %%"
FF_REG_d
"
\n\t
"
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
"movq " #src1 ", %%mm0 \n\t"\
...
...
@@ -3195,17 +3195,17 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
SIMPLE_CPY
((
%
0
)
,
(
%
0
,
%
2
)
,
(
%
1
)
,
(
%
1
,
%
3
))
SIMPLE_CPY
((
%
0
,
%
2
,
2
),
(
%%
REGa
,
%
2
,
2
),
(
%
1
,
%
3
,
2
),
(
%%
REGd
,
%
3
,
2
))
SIMPLE_CPY
((
%
0
,
%
2
,
4
),
(
%%
REGa
,
%
2
,
4
),
(
%
1
,
%
3
,
4
),
(
%%
REGd
,
%
3
,
4
))
"lea (%%"
REG_a
",%2,4), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_d
",%3,4), %%"
REG_d
"
\n\t
"
SIMPLE_CPY
((
%%
REGa
,
%
2
),
(
%%
REGa
,
%
2
,
2
),
(
%%
REGd
,
%
3
),
(
%%
REGd
,
%
3
,
2
))
SIMPLE_CPY
((
%
0
,
%
2
,
2
),
(
%%
FF_REGa
,
%
2
,
2
),
(
%
1
,
%
3
,
2
),
(
%%
FF_
REGd
,
%
3
,
2
))
SIMPLE_CPY
((
%
0
,
%
2
,
4
),
(
%%
FF_REGa
,
%
2
,
4
),
(
%
1
,
%
3
,
4
),
(
%%
FF_
REGd
,
%
3
,
4
))
"lea (%%"
FF_REG_a
",%2,4), %%"
FF_
REG_a
"
\n\t
"
"lea (%%"
FF_REG_d
",%3,4), %%"
FF_
REG_d
"
\n\t
"
SIMPLE_CPY
((
%%
FF_REGa
,
%
2
),
(
%%
FF_REGa
,
%
2
,
2
),
(
%%
FF_REGd
,
%
3
),
(
%%
FF_
REGd
,
%
3
,
2
))
:
:
"r"
(
src
),
"r"
(
dst
),
"r"
((
x86_reg
)
srcStride
),
"r"
((
x86_reg
)
dstStride
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMX && HAVE_6REGS
for
(
i
=
0
;
i
<
8
;
i
++
)
...
...
libswscale/x86/hscale_fast_bilinear_simd.c
View file @
9eb3da2f
...
...
@@ -55,9 +55,9 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"jmp 9f
\n\t
"
// Begin
"0:
\n\t
"
"movq (%%"
REG_d
", %%"
REG_a
"), %%mm3
\n\t
"
"movd (%%"
REG_c
", %%"
REG_S
"), %%mm0
\n\t
"
"movd 1(%%"
REG_c
", %%"
REG_S
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_d
", %%"
FF_REG_a
"), %%mm3
\n\t
"
"movd (%%"
FF_REG_c
", %%"
FF_REG_S
"), %%mm0
\n\t
"
"movd 1(%%"
FF_REG_c
", %%"
FF_REG_S
"), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"pshufw $0xFF, %%mm1, %%mm1
\n\t
"
...
...
@@ -65,14 +65,14 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"pshufw $0xFF, %%mm0, %%mm0
\n\t
"
"2:
\n\t
"
"psubw %%mm1, %%mm0
\n\t
"
"movl 8(%%"
REG_b
", %%"
REG_a
"), %%esi
\n\t
"
"movl 8(%%"
FF_REG_b
", %%"
FF_REG_a
"), %%esi
\n\t
"
"pmullw %%mm3, %%mm0
\n\t
"
"psllw $7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_D
", %%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_D
", %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
// End
"9:
\n\t
"
"lea "
LOCAL_MANGLE
(
0
b
)
", %0
\n\t
"
...
...
@@ -94,22 +94,22 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"jmp 9f
\n\t
"
// Begin
"0:
\n\t
"
"movq (%%"
REG_d
", %%"
REG_a
"), %%mm3
\n\t
"
"movd (%%"
REG_c
", %%"
REG_S
"), %%mm0
\n\t
"
"movq (%%"
FF_REG_d
", %%"
FF_REG_a
"), %%mm3
\n\t
"
"movd (%%"
FF_REG_c
", %%"
FF_REG_S
"), %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"pshufw $0xFF, %%mm0, %%mm1
\n\t
"
"1:
\n\t
"
"pshufw $0xFF, %%mm0, %%mm0
\n\t
"
"2:
\n\t
"
"psubw %%mm1, %%mm0
\n\t
"
"movl 8(%%"
REG_b
", %%"
REG_a
"), %%esi
\n\t
"
"movl 8(%%"
FF_REG_b
", %%"
FF_REG_a
"), %%esi
\n\t
"
"pmullw %%mm3, %%mm0
\n\t
"
"psllw $7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_D
", %%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_D
", %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
// End
"9:
\n\t
"
"lea "
LOCAL_MANGLE
(
0
b
)
", %0
\n\t
"
...
...
@@ -206,39 +206,39 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
__asm__
volatile
(
#if ARCH_X86_64
"mov -8(%%rsp), %%"
REG_a
"
\n\t
"
"mov
%%"
REG_a
", %5
\n\t
"
// retsave
"mov -8(%%rsp), %%"
FF_REG_a
"
\n\t
"
"mov
%%"
FF_REG_a
", %5
\n\t
"
// retsave
#else
#if defined(PIC)
"mov
%%"
REG_b
", %5
\n\t
"
// ebxsave
"mov
%%"
FF_REG_b
", %5
\n\t
"
// ebxsave
#endif
#endif
"pxor %%mm7, %%mm7
\n\t
"
"mov %0, %%"
REG_c
"
\n\t
"
"mov %1, %%"
REG_D
"
\n\t
"
"mov %2, %%"
REG_d
"
\n\t
"
"mov %3, %%"
REG_b
"
\n\t
"
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
PREFETCH
"
(%%"
REG_c
")
\n\t
"
PREFETCH
"
32(%%"
REG_c
")
\n\t
"
PREFETCH
"
64(%%"
REG_c
")
\n\t
"
"mov %0, %%"
FF_REG_c
"
\n\t
"
"mov %1, %%"
FF_REG_D
"
\n\t
"
"mov %2, %%"
FF_REG_d
"
\n\t
"
"mov %3, %%"
FF_REG_b
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
// i
PREFETCH
"
(%%"
FF_REG_c
")
\n\t
"
PREFETCH
"
32(%%"
FF_REG_c
")
\n\t
"
PREFETCH
"
64(%%"
FF_REG_c
")
\n\t
"
#if ARCH_X86_64
#define CALL_MMXEXT_FILTER_CODE \
"movl
(%%"REG_b"), %%esi
\n\t"\
"movl
(%%"FF_REG_b"), %%esi
\n\t"\
"call *%4 \n\t"\
"movl (%%"
REG_b", %%"REG_a"), %%esi
\n\t"\
"add
%%"REG_S", %%"
REG_c" \n\t"\
"add
%%"REG_a", %%"
REG_D" \n\t"\
"xor
%%"REG_a", %%"
REG_a" \n\t"\
"movl (%%"
FF_REG_b", %%"FF_REG_a"), %%esi
\n\t"\
"add
%%"FF_REG_S", %%"FF_
REG_c" \n\t"\
"add
%%"FF_REG_a", %%"FF_
REG_D" \n\t"\
"xor
%%"FF_REG_a", %%"FF_
REG_a" \n\t"\
#else
#define CALL_MMXEXT_FILTER_CODE \
"movl
(%%"
REG_b"), %%esi \n\t"\
"movl
(%%"FF_
REG_b"), %%esi \n\t"\
"call *%4 \n\t"\
"addl (%%"
REG_b", %%"REG_a"), %%"
REG_c" \n\t"\
"add
%%"REG_a", %%"
REG_D" \n\t"\
"xor
%%"REG_a", %%"
REG_a" \n\t"\
"addl (%%"
FF_REG_b", %%"FF_REG_a"), %%"FF_
REG_c" \n\t"\
"add
%%"FF_REG_a", %%"FF_
REG_D" \n\t"\
"xor
%%"FF_REG_a", %%"FF_
REG_a" \n\t"\
#endif
/* ARCH_X86_64 */
...
...
@@ -252,11 +252,11 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
CALL_MMXEXT_FILTER_CODE
#if ARCH_X86_64
"mov %5, %%"
REG_a
"
\n\t
"
"mov
%%"
REG_a
", -8(%%rsp)
\n\t
"
"mov %5, %%"
FF_
REG_a
"
\n\t
"
"mov
%%"
FF_REG_a
", -8(%%rsp)
\n\t
"
#else
#if defined(PIC)
"mov %5, %%"
REG_b
"
\n\t
"
"mov %5, %%"
FF_
REG_b
"
\n\t
"
#endif
#endif
::
"m"
(
src
),
"m"
(
dst
),
"m"
(
filter
),
"m"
(
filterPos
),
...
...
@@ -268,9 +268,9 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
,
"m"
(
ebxsave
)
#endif
#endif
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_D
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_D
#if ARCH_X86_64 || !defined(PIC)
,
"%"
REG_b
,
"%"
FF_
REG_b
#endif
);
...
...
@@ -295,33 +295,33 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
#endif
__asm__
volatile
(
#if ARCH_X86_64
"mov -8(%%rsp), %%"
REG_a
"
\n\t
"
"mov
%%"
REG_a
", %7
\n\t
"
// retsave
"mov -8(%%rsp), %%"
FF_REG_a
"
\n\t
"
"mov
%%"
FF_REG_a
", %7
\n\t
"
// retsave
#else
#if defined(PIC)
"mov
%%"
REG_b
", %7
\n\t
"
// ebxsave
"mov
%%"
FF_REG_b
", %7
\n\t
"
// ebxsave
#endif
#endif
"pxor %%mm7, %%mm7
\n\t
"
"mov %0, %%"
REG_c
"
\n\t
"
"mov %1, %%"
REG_D
"
\n\t
"
"mov %2, %%"
REG_d
"
\n\t
"
"mov %3, %%"
REG_b
"
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
PREFETCH
" (%%"
REG_c
")
\n\t
"
PREFETCH
" 32(%%"
REG_c
")
\n\t
"
PREFETCH
" 64(%%"
REG_c
")
\n\t
"
"mov %0, %%"
FF_REG_c
"
\n\t
"
"mov %1, %%"
FF_REG_D
"
\n\t
"
"mov %2, %%"
FF_REG_d
"
\n\t
"
"mov %3, %%"
FF_REG_b
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
// i
PREFETCH
" (%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 32(%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 64(%%"
FF_REG_c
")
\n\t
"
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
"mov
%5, %%"
REG_c
"
\n\t
"
// src2
"mov
%6, %%"
REG_D
"
\n\t
"
// dst2
PREFETCH
" (%%"
REG_c
")
\n\t
"
PREFETCH
" 32(%%"
REG_c
")
\n\t
"
PREFETCH
" 64(%%"
REG_c
")
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
// i
"mov
%5, %%"
FF_REG_c
"
\n\t
"
// src2
"mov
%6, %%"
FF_REG_D
"
\n\t
"
// dst2
PREFETCH
" (%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 32(%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 64(%%"
FF_REG_c
")
\n\t
"
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
...
...
@@ -329,11 +329,11 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
CALL_MMXEXT_FILTER_CODE
#if ARCH_X86_64
"mov
%7, %%"
REG_a
"
\n\t
"
"mov %%"
REG_a
", -8(%%rsp)
\n\t
"
"mov
%7, %%"
FF_REG_a
"
\n\t
"
"mov %%"
FF_REG_a
", -8(%%rsp)
\n\t
"
#else
#if defined(PIC)
"mov %7, %%"
REG_b
"
\n\t
"
"mov %7, %%"
FF_
REG_b
"
\n\t
"
#endif
#endif
::
"m"
(
src1
),
"m"
(
dst1
),
"m"
(
filter
),
"m"
(
filterPos
),
...
...
@@ -345,9 +345,9 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
,
"m"
(
ebxsave
)
#endif
#endif
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_D
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_D
#if ARCH_X86_64 || !defined(PIC)
,
"%"
REG_b
,
"%"
FF_
REG_b
#endif
);
...
...
libswscale/x86/rgb2rgb_template.c
View file @
9eb3da2f
...
...
@@ -1101,43 +1101,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
unsigned
i
;
x86_reg
mmx_size
=
23
-
src_size
;
__asm__
volatile
(
"test %%"
REG_a
", %%"
REG_a
"
\n\t
"
"test %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"jns 2f
\n\t
"
"movq "
MANGLE
(
mask24r
)
", %%mm5
\n\t
"
"movq "
MANGLE
(
mask24g
)
", %%mm6
\n\t
"
"movq "
MANGLE
(
mask24b
)
", %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1, %%"
REG_a
")
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
"movq
(%1, %%"
REG_a
"), %%mm1
\n\t
"
// BGR BGR BG
"movq
2(%1, %%"
REG_a
"), %%mm2
\n\t
"
// R BGR BGR B
PREFETCH
" 32(%1, %%"
FF_REG_a
")
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
"movq
(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// BGR BGR BG
"movq
2(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// R BGR BGR B
"psllq $16, %%mm0
\n\t
"
// 00 BGR BGR
"pand %%mm5, %%mm0
\n\t
"
"pand %%mm6, %%mm1
\n\t
"
"pand %%mm7, %%mm2
\n\t
"
"por %%mm0, %%mm1
\n\t
"
"por %%mm2, %%mm1
\n\t
"
"movq
6(%1, %%"
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
MOVNTQ
" %%mm1,
(%2, %%"
REG_a
")
\n\t
"
// RGB RGB RG
"movq
8(%1, %%"
REG_a
"), %%mm1
\n\t
"
// R BGR BGR B
"movq
10(%1, %%"
REG_a
"), %%mm2
\n\t
"
// GR BGR BGR
"movq
6(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
MOVNTQ
" %%mm1,
(%2, %%"
FF_
REG_a
")
\n\t
"
// RGB RGB RG
"movq
8(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// R BGR BGR B
"movq
10(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// GR BGR BGR
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm5, %%mm1
\n\t
"
"pand %%mm6, %%mm2
\n\t
"
"por %%mm0, %%mm1
\n\t
"
"por %%mm2, %%mm1
\n\t
"
"movq
14(%1, %%"
REG_a
"), %%mm0
\n\t
"
// R BGR BGR B
MOVNTQ
" %%mm1,
8(%2, %%"
REG_a
")
\n\t
"
// B RGB RGB R
"movq
16(%1, %%"
REG_a
"), %%mm1
\n\t
"
// GR BGR BGR
"movq
18(%1, %%"
REG_a
"), %%mm2
\n\t
"
// BGR BGR BG
"movq
14(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// R BGR BGR B
MOVNTQ
" %%mm1,
8(%2, %%"
FF_REG_a
")
\n\t
"
// B RGB RGB R
"movq
16(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// GR BGR BGR
"movq
18(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// BGR BGR BG
"pand %%mm6, %%mm0
\n\t
"
"pand %%mm7, %%mm1
\n\t
"
"pand %%mm5, %%mm2
\n\t
"
"por %%mm0, %%mm1
\n\t
"
"por %%mm2, %%mm1
\n\t
"
MOVNTQ
" %%mm1, 16(%2, %%"
REG_a
")
\n\t
"
"add $24, %%"
REG_a
"
\n\t
"
MOVNTQ
" %%mm1, 16(%2, %%"
FF_REG_a
")
\n\t
"
"add $24, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
"2:
\n\t
"
:
"+a"
(
mmx_size
)
...
...
@@ -1173,20 +1173,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
for
(
y
=
0
;
y
<
height
;
y
++
)
{
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__
volatile
(
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
32(%1, %%"
REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
REG_a
")
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm0
\n\t
"
// U(0)
PREFETCH
"
32(%1, %%"
FF_
REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
FF_
REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
FF_
REG_a
")
\n\t
"
"movq
(%2, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// U(0)
"movq %%mm0, %%mm2
\n\t
"
// U(0)
"movq
(%3, %%"
REG_a
"), %%mm1
\n\t
"
// V(0)
"movq
(%3, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// V(0)
"punpcklbw %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2
\n\t
"
// UVUV UVUV(8)
"movq
(%1, %%"
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq
(%1, %%"
FF_
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
FF_
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq %%mm3, %%mm4
\n\t
"
// Y(0)
"movq %%mm5, %%mm6
\n\t
"
// Y(8)
"punpcklbw %%mm0, %%mm3
\n\t
"
// YUYV YUYV(0)
...
...
@@ -1194,16 +1194,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm2, %%mm5
\n\t
"
// YUYV YUYV(8)
"punpckhbw %%mm2, %%mm6
\n\t
"
// YUYV YUYV(12)
MOVNTQ
" %%mm3, (%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm5, 16(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm3, (%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm5, 16(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
FF_
REG_a
", 4)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dst
),
"r"
(
ysrc
),
"r"
(
usrc
),
"r"
(
vsrc
),
"g"
(
chromWidth
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
((
y
&
(
vertLumPerChroma
-
1
))
==
vertLumPerChroma
-
1
)
{
usrc
+=
chromStride
;
...
...
@@ -1238,20 +1238,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
for
(
y
=
0
;
y
<
height
;
y
++
)
{
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
32(%1, %%"
REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
REG_a
")
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm0
\n\t
"
// U(0)
PREFETCH
"
32(%1, %%"
FF_REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
FF_REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
FF_REG_a
")
\n\t
"
"movq
(%2, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// U(0)
"movq %%mm0, %%mm2
\n\t
"
// U(0)
"movq
(%3, %%"
REG_a
"), %%mm1
\n\t
"
// V(0)
"movq
(%3, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// V(0)
"punpcklbw %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2
\n\t
"
// UVUV UVUV(8)
"movq
(%1, %%"
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq
(%1, %%"
FF_
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
FF_
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq %%mm0, %%mm4
\n\t
"
// Y(0)
"movq %%mm2, %%mm6
\n\t
"
// Y(8)
"punpcklbw %%mm3, %%mm0
\n\t
"
// YUYV YUYV(0)
...
...
@@ -1259,16 +1259,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm5, %%mm2
\n\t
"
// YUYV YUYV(8)
"punpckhbw %%mm5, %%mm6
\n\t
"
// YUYV YUYV(12)
MOVNTQ
" %%mm0, (%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm0, (%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
FF_
REG_a
", 4)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dst
),
"r"
(
ysrc
),
"r"
(
usrc
),
"r"
(
vsrc
),
"g"
(
chromWidth
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
((
y
&
(
vertLumPerChroma
-
1
))
==
vertLumPerChroma
-
1
)
{
usrc
+=
chromStride
;
...
...
@@ -1326,14 +1326,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const
x86_reg
chromWidth
=
width
>>
1
;
for
(
y
=
0
;
y
<
height
;
y
+=
2
)
{
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"pcmpeqw %%mm7, %%mm7
\n\t
"
"psrlw $8, %%mm7
\n\t
"
// FF,00,FF,00...
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq
(%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
PREFETCH
" 64(%0, %%"
FF_REG_a
", 4)
\n\t
"
"movq
(%0, %%"
FF_
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq %%mm0, %%mm2
\n\t
"
// YUYV YUYV(0)
"movq %%mm1, %%mm3
\n\t
"
// YUYV YUYV(4)
"psrlw $8, %%mm0
\n\t
"
// U0V0 U0V0(0)
...
...
@@ -1343,10 +1343,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(0)
MOVNTQ
" %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, (%1, %%"
FF_REG_a
", 2)
\n\t
"
"movq
16(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(12)
"movq
16(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
FF_
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(12)
"movq %%mm1, %%mm3
\n\t
"
// YUYV YUYV(8)
"movq %%mm2, %%mm4
\n\t
"
// YUYV YUYV(12)
"psrlw $8, %%mm1
\n\t
"
// U0V0 U0V0(8)
...
...
@@ -1356,7 +1356,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1
\n\t
"
// UVUV UVUV(8)
"packuswb %%mm4, %%mm3
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm3, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
// UVUV UVUV(0)
"movq %%mm1, %%mm3
\n\t
"
// UVUV UVUV(8)
...
...
@@ -1367,28 +1367,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// VVVV VVVV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// UUUU UUUU(0)
MOVNTQ
" %%mm0, (%3, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm0, (%3, %%"
FF_
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
ydst
+=
lumStride
;
src
+=
srcStride
;
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq
(%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq
16(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
PREFETCH
" 64(%0, %%"
FF_REG_a
", 4)
\n\t
"
"movq
(%0, %%"
FF_
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq
16(%0, %%"
FF_
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
FF_
REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
"pand %%mm7, %%mm0
\n\t
"
// Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm1
\n\t
"
// Y0Y0 Y0Y0(4)
"pand %%mm7, %%mm2
\n\t
"
// Y0Y0 Y0Y0(8)
...
...
@@ -1396,15 +1396,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// YYYY YYYY(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm0, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
udst
+=
chromStride
;
vdst
+=
chromStride
;
...
...
@@ -1438,23 +1438,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
if
(
mmxSize
)
{
__asm__
volatile
(
"mov
%4, %%"
REG_a
"
\n\t
"
"mov
%4, %%"
FF_REG_a
"
\n\t
"
"movq "
MANGLE
(
mmx_ff
)
", %%mm0
\n\t
"
"movq
(%0, %%"
REG_a
"), %%mm4
\n\t
"
"movq
(%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq %%mm4, %%mm2
\n\t
"
"psllq $8, %%mm4
\n\t
"
"pand %%mm0, %%mm2
\n\t
"
"por %%mm2, %%mm4
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm5
\n\t
"
"movq %%mm5, %%mm3
\n\t
"
"psllq $8, %%mm5
\n\t
"
"pand %%mm0, %%mm3
\n\t
"
"por %%mm3, %%mm5
\n\t
"
"1:
\n\t
"
"movq
(%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq
1(%0, %%"
REG_a
"), %%mm2
\n\t
"
"movq
1(%1, %%"
REG_a
"), %%mm3
\n\t
"
"movq
(%0, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
"movq
1(%0, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq
1(%1, %%"
FF_
REG_a
"), %%mm3
\n\t
"
PAVGB
" %%mm0, %%mm5
\n\t
"
PAVGB
" %%mm0, %%mm3
\n\t
"
PAVGB
" %%mm0, %%mm5
\n\t
"
...
...
@@ -1469,19 +1469,19 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
"punpckhbw %%mm3, %%mm7
\n\t
"
"punpcklbw %%mm2, %%mm4
\n\t
"
"punpckhbw %%mm2, %%mm6
\n\t
"
MOVNTQ
" %%mm5, (%2, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm7, 8(%2, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm4, (%3, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm6, 8(%3, %%"
REG_a
", 2)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movq
-1(%0, %%"
REG_a
"), %%mm4
\n\t
"
"movq
-1(%1, %%"
REG_a
"), %%mm5
\n\t
"
MOVNTQ
" %%mm5, (%2, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm7, 8(%2, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm4, (%3, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm6, 8(%3, %%"
FF_
REG_a
", 2)
\n\t
"
"add $8, %%"
FF_
REG_a
"
\n\t
"
"movq
-1(%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq
-1(%1, %%"
FF_
REG_a
"), %%mm5
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmxSize
),
"r"
(
src
+
srcStride
+
mmxSize
),
"r"
(
dst
+
mmxSize
*
2
),
"r"
(
dst
+
dstStride
+
mmxSize
*
2
),
"g"
(
-
mmxSize
)
NAMED_CONSTRAINTS_ADD
(
mmx_ff
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
}
else
{
mmxSize
=
1
;
...
...
@@ -1532,14 +1532,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const
x86_reg
chromWidth
=
width
>>
1
;
for
(
y
=
0
;
y
<
height
;
y
+=
2
)
{
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"pcmpeqw %%mm7, %%mm7
\n\t
"
"psrlw $8, %%mm7
\n\t
"
// FF,00,FF,00...
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq (%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// UYVY UYVY(0)
"movq 8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(4)
PREFETCH
" 64(%0, %%"
FF_
REG_a
", 4)
\n\t
"
"movq (%0, %%"
FF_
REG_a
", 4), %%mm0
\n\t
"
// UYVY UYVY(0)
"movq 8(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(4)
"movq %%mm0, %%mm2
\n\t
"
// UYVY UYVY(0)
"movq %%mm1, %%mm3
\n\t
"
// UYVY UYVY(4)
"pand %%mm7, %%mm0
\n\t
"
// U0V0 U0V0(0)
...
...
@@ -1549,10 +1549,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(0)
MOVNTQ
" %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, (%1, %%"
FF_
REG_a
", 2)
\n\t
"
"movq 16(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(8)
"movq 24(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// UYVY UYVY(12)
"movq 16(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(8)
"movq 24(%0, %%"
FF_
REG_a
", 4), %%mm2
\n\t
"
// UYVY UYVY(12)
"movq %%mm1, %%mm3
\n\t
"
// UYVY UYVY(8)
"movq %%mm2, %%mm4
\n\t
"
// UYVY UYVY(12)
"pand %%mm7, %%mm1
\n\t
"
// U0V0 U0V0(8)
...
...
@@ -1562,7 +1562,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1
\n\t
"
// UVUV UVUV(8)
"packuswb %%mm4, %%mm3
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm3, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
// UVUV UVUV(0)
"movq %%mm1, %%mm3
\n\t
"
// UVUV UVUV(8)
...
...
@@ -1573,28 +1573,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// VVVV VVVV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// UUUU UUUU(0)
MOVNTQ
" %%mm0, (%3, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm0, (%3, %%"
FF_
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
ydst
+=
lumStride
;
src
+=
srcStride
;
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq (%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq 8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq 16(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq 24(%0, %%"
REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
PREFETCH
" 64(%0, %%"
FF_REG_a
", 4)
\n\t
"
"movq (%0, %%"
FF_REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq 8(%0, %%"
FF_REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq 16(%0, %%"
FF_REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq 24(%0, %%"
FF_REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
"psrlw $8, %%mm0
\n\t
"
// Y0Y0 Y0Y0(0)
"psrlw $8, %%mm1
\n\t
"
// Y0Y0 Y0Y0(4)
"psrlw $8, %%mm2
\n\t
"
// Y0Y0 Y0Y0(8)
...
...
@@ -1602,15 +1602,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// YYYY YYYY(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm0, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
udst
+=
chromStride
;
vdst
+=
chromStride
;
...
...
@@ -1655,20 +1655,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
int
i
;
for
(
i
=
0
;
i
<
2
;
i
++
)
{
__asm__
volatile
(
"mov %2, %%"
REG_a
"
\n\t
"
"mov %2, %%"
FF_REG_a
"
\n\t
"
"movq "
BGR2Y_IDX
"(%3), %%mm6
\n\t
"
"movq "
MANGLE
(
ff_w1111
)
", %%mm5
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_d
"
\n\t
"
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_REG_d
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
64(%0, %%"
REG_d
")
\n\t
"
"movd
(%0, %%"
REG_d
"), %%mm0
\n\t
"
"movd
3(%0, %%"
REG_d
"), %%mm1
\n\t
"
PREFETCH
"
64(%0, %%"
FF_
REG_d
")
\n\t
"
"movd
(%0, %%"
FF_
REG_d
"), %%mm0
\n\t
"
"movd
3(%0, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"movd
6(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
9(%0, %%"
REG_d
"), %%mm3
\n\t
"
"movd
6(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
9(%0, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"pmaddwd %%mm6, %%mm0
\n\t
"
...
...
@@ -1686,12 +1686,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm2, %%mm0
\n\t
"
"psraw $7, %%mm0
\n\t
"
"movd
12(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movd
15(%0, %%"
REG_d
"), %%mm1
\n\t
"
"movd
12(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movd
15(%0, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"movd
18(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
21(%0, %%"
REG_d
"), %%mm3
\n\t
"
"movd
18(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
21(%0, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"pmaddwd %%mm6, %%mm4
\n\t
"
...
...
@@ -1706,40 +1706,40 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm3, %%mm2
\n\t
"
"pmaddwd %%mm5, %%mm4
\n\t
"
"pmaddwd %%mm5, %%mm2
\n\t
"
"add $24, %%"
REG_d
"
\n\t
"
"add $24, %%"
FF_REG_d
"
\n\t
"
"packssdw %%mm2, %%mm4
\n\t
"
"psraw $7, %%mm4
\n\t
"
"packuswb %%mm4, %%mm0
\n\t
"
"paddusb "
MANGLE
(
ff_bgr2YOffset
)
", %%mm0
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
FF_
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
:
"r"
(
src
+
width
*
3
),
"r"
(
ydst
+
width
),
"g"
((
x86_reg
)
-
width
),
"r"
(
rgb2yuv
)
NAMED_CONSTRAINTS_ADD
(
ff_w1111
,
ff_bgr2YOffset
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
ydst
+=
lumStride
;
src
+=
srcStride
;
}
src
-=
srcStride
*
2
;
__asm__
volatile
(
"mov %4, %%"
REG_a
"
\n\t
"
"mov %4, %%"
FF_REG_a
"
\n\t
"
"movq "
MANGLE
(
ff_w1111
)
", %%mm5
\n\t
"
"movq "
BGR2U_IDX
"(%5), %%mm6
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_d
"
\n\t
"
"add
%%"
REG_d
", %%"
REG_d
"
\n\t
"
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_REG_d
"
\n\t
"
"add
%%"
FF_REG_d
", %%"
FF_REG_d
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
64(%0, %%"
REG_d
")
\n\t
"
PREFETCH
"
64(%1, %%"
REG_d
")
\n\t
"
PREFETCH
"
64(%0, %%"
FF_
REG_d
")
\n\t
"
PREFETCH
"
64(%1, %%"
FF_
REG_d
")
\n\t
"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq
(%0, %%"
REG_d
"), %%mm0
\n\t
"
"movq
(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movq
6(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movq
6(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movq
(%0, %%"
FF_
REG_d
"), %%mm0
\n\t
"
"movq
(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movq
6(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movq
6(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
PAVGB
" %%mm1, %%mm0
\n\t
"
PAVGB
" %%mm3, %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
...
...
@@ -1751,10 +1751,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
#else
"movd
(%0, %%"
REG_d
"), %%mm0
\n\t
"
"movd
(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
3(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
3(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
(%0, %%"
FF_
REG_d
"), %%mm0
\n\t
"
"movd
(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
3(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
3(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1762,10 +1762,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"movd
6(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movd
6(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
9(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
9(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
6(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movd
6(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
9(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
9(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1795,10 +1795,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psraw $7, %%mm0
\n\t
"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq
12(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movq
12(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movq
18(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movq
18(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movq
12(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movq
12(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movq
18(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movq
18(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
PAVGB
" %%mm1, %%mm4
\n\t
"
PAVGB
" %%mm3, %%mm2
\n\t
"
"movq %%mm4, %%mm1
\n\t
"
...
...
@@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
#else
"movd
12(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movd
12(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
15(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
15(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
12(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movd
12(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
15(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
15(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1821,10 +1821,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"paddw %%mm1, %%mm4
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm4
\n\t
"
"movd
18(%0, %%"
REG_d
"), %%mm5
\n\t
"
"movd
18(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
21(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
21(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
18(%0, %%"
FF_
REG_d
"), %%mm5
\n\t
"
"movd
18(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
21(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
21(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1851,7 +1851,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm3, %%mm1
\n\t
"
"pmaddwd %%mm5, %%mm4
\n\t
"
"pmaddwd %%mm5, %%mm1
\n\t
"
"add $24, %%"
REG_d
"
\n\t
"
"add $24, %%"
FF_REG_d
"
\n\t
"
"packssdw %%mm1, %%mm4
\n\t
"
// V3 V2 U3 U2
"psraw $7, %%mm4
\n\t
"
...
...
@@ -1860,14 +1860,14 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpckhdq %%mm4, %%mm1
\n\t
"
"packsswb %%mm1, %%mm0
\n\t
"
"paddb "
MANGLE
(
ff_bgr2UVOffset
)
", %%mm0
\n\t
"
"movd %%mm0, (%2, %%"
REG_a
")
\n\t
"
"movd %%mm0, (%2, %%"
FF_
REG_a
")
\n\t
"
"punpckhdq %%mm0, %%mm0
\n\t
"
"movd %%mm0, (%3, %%"
REG_a
")
\n\t
"
"add $4, %%"
REG_a
"
\n\t
"
"movd %%mm0, (%3, %%"
FF_
REG_a
")
\n\t
"
"add $4, %%"
FF_
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
:
"r"
(
src
+
chromWidth
*
6
),
"r"
(
src
+
srcStride
+
chromWidth
*
6
),
"r"
(
udst
+
chromWidth
),
"r"
(
vdst
+
chromWidth
),
"g"
(
-
chromWidth
),
"r"
(
rgb2yuv
)
NAMED_CONSTRAINTS_ADD
(
ff_w1111
,
ff_bgr2UVOffset
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
udst
+=
chromStride
;
...
...
@@ -1898,49 +1898,49 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
#if COMPILE_TEMPLATE_SSE2
if
(
!
((((
intptr_t
)
src1
)
|
((
intptr_t
)
src2
)
|
((
intptr_t
)
dest
))
&
15
))
{
__asm__
(
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_
REG_a
"
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%1, %%"
REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
REG_a
")
\n\t
"
"movdqa
(%1, %%"
REG_a
"), %%xmm0
\n\t
"
"movdqa
(%1, %%"
REG_a
"), %%xmm1
\n\t
"
"movdqa
(%2, %%"
REG_a
"), %%xmm2
\n\t
"
PREFETCH
" 64(%1, %%"
FF_REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
FF_REG_a
")
\n\t
"
"movdqa
(%1, %%"
FF_
REG_a
"), %%xmm0
\n\t
"
"movdqa
(%1, %%"
FF_
REG_a
"), %%xmm1
\n\t
"
"movdqa
(%2, %%"
FF_
REG_a
"), %%xmm2
\n\t
"
"punpcklbw %%xmm2, %%xmm0
\n\t
"
"punpckhbw %%xmm2, %%xmm1
\n\t
"
"movntdq %%xmm0, (%0, %%"
REG_a
", 2)
\n\t
"
"movntdq %%xmm1, 16(%0, %%"
REG_a
", 2)
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"cmp %3, %%"
REG_a
"
\n\t
"
"movntdq %%xmm0, (%0, %%"
FF_REG_a
", 2)
\n\t
"
"movntdq %%xmm1, 16(%0, %%"
FF_REG_a
", 2)
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"cmp %3, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dest
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
width
-
15
)
:
"memory"
,
XMM_CLOBBERS
(
"xmm0"
,
"xmm1"
,
"xmm2"
,)
"%"
REG_a
:
"memory"
,
XMM_CLOBBERS
(
"xmm0"
,
"xmm1"
,
"xmm2"
,)
"%"
FF_
REG_a
);
}
else
#endif
__asm__
(
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%1, %%"
REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
REG_a
")
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq
8(%1, %%"
REG_a
"), %%mm2
\n\t
"
PREFETCH
" 64(%1, %%"
FF_REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
FF_REG_a
")
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq
8(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq
8(%2, %%"
REG_a
"), %%mm5
\n\t
"
"movq
(%2, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq
8(%2, %%"
FF_
REG_a
"), %%mm5
\n\t
"
"punpcklbw %%mm4, %%mm0
\n\t
"
"punpckhbw %%mm4, %%mm1
\n\t
"
"punpcklbw %%mm5, %%mm2
\n\t
"
"punpckhbw %%mm5, %%mm3
\n\t
"
MOVNTQ
" %%mm0, (%0, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm1, 8(%0, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 24(%0, %%"
REG_a
", 2)
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"cmp %3, %%"
REG_a
"
\n\t
"
MOVNTQ
" %%mm0, (%0, %%"
FF_REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm1, 8(%0, %%"
FF_REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
FF_REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 24(%0, %%"
FF_REG_a
", 2)
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"cmp %3, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dest
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
width
-
15
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
}
...
...
libswscale/x86/swscale.c
View file @
9eb3da2f
...
...
@@ -220,16 +220,16 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"movdqa %%xmm3, %%xmm4 \n\t" \
"movdqa %%xmm3, %%xmm7 \n\t" \
"movl %3, %%ecx \n\t" \
"mov %0, %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"mov %0, %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
".p2align 4 \n\t"
/* FIXME Unroll? */
\
"1: \n\t"\
"movddup 8(%%"
REG_d"), %%xmm0
\n\t"
/* filterCoeff */
\
"movdqa (%%"
REG_S", %%"REG_c", 2), %%xmm2
\n\t"
/* srcData */
\
"movdqa 16(%%"
REG_S", %%"REG_c", 2), %%xmm5
\n\t"
/* srcData */
\
"add $16, %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"test %%"
REG_S", %%"REG_S"
\n\t"\
"movddup 8(%%"
FF_REG_d"), %%xmm0
\n\t"
/* filterCoeff */
\
"movdqa (%%"
FF_REG_S", %%"FF_REG_c", 2), %%xmm2
\n\t"
/* srcData */
\
"movdqa 16(%%"
FF_REG_S", %%"FF_REG_c", 2), %%xmm5
\n\t"
/* srcData */
\
"add $16, %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"test %%"
FF_REG_S", %%"FF_REG_S"
\n\t"\
"pmulhw %%xmm0, %%xmm2 \n\t"\
"pmulhw %%xmm0, %%xmm5 \n\t"\
"paddw %%xmm2, %%xmm3 \n\t"\
...
...
@@ -238,13 +238,13 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"psraw $3, %%xmm3 \n\t"\
"psraw $3, %%xmm4 \n\t"\
"packuswb %%xmm4, %%xmm3 \n\t"\
"movntdq %%xmm3, (%1, %%"
REG_c")
\n\t"\
"add $16, %%"
REG_c"
\n\t"\
"cmp %2, %%"
REG_c"
\n\t"\
"movntdq %%xmm3, (%1, %%"
FF_REG_c")
\n\t"\
"add $16, %%"
FF_REG_c"
\n\t"\
"cmp %2, %%"
FF_REG_c"
\n\t"\
"movdqa %%xmm7, %%xmm3 \n\t" \
"movdqa %%xmm7, %%xmm4 \n\t" \
"mov %0, %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"mov %0, %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"jb 1b \n\t"
if
(
offset
)
{
...
...
@@ -259,7 +259,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"r"
(
dest
-
offset
),
"g"
((
x86_reg
)(
dstW
+
offset
)),
"m"
(
offset
),
"m"
(
filterSize
),
"m"
(((
uint64_t
*
)
dither
)[
0
])
:
XMM_CLOBBERS
(
"%xmm0"
,
"%xmm1"
,
"%xmm2"
,
"%xmm3"
,
"%xmm4"
,
"%xmm5"
,
"%xmm7"
,)
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_c
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_c
);
}
else
{
__asm__
volatile
(
...
...
@@ -269,7 +269,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"r"
(
dest
-
offset
),
"g"
((
x86_reg
)(
dstW
+
offset
)),
"m"
(
offset
),
"m"
(
filterSize
),
"m"
(((
uint64_t
*
)
dither
)[
0
])
:
XMM_CLOBBERS
(
"%xmm0"
,
"%xmm1"
,
"%xmm2"
,
"%xmm3"
,
"%xmm4"
,
"%xmm5"
,
"%xmm7"
,)
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_c
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_c
);
}
}
...
...
libswscale/x86/swscale_template.c
View file @
9eb3da2f
...
...
@@ -88,16 +88,16 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"movq %%mm3, %%mm6
\n\t
"
"movq %%mm4, %%mm7
\n\t
"
"movl %3, %%ecx
\n\t
"
"mov %0, %%"
REG_d
"
\n\t
"
\
"mov (%%"
REG_d
"), %%"
REG_S
"
\n\t
"
\
"mov %0, %%"
FF_REG_d
"
\n\t
"
\
"mov (%%"
FF_REG_d
"), %%"
FF_REG_S
"
\n\t
"
\
".p2align 4
\n\t
"
/* FIXME Unroll? */
\
"1:
\n\t
"
\
"movq 8(%%"
REG_d
"), %%mm0
\n\t
"
/* filterCoeff */
\
"movq (%%"
REG_S
", %%"
REG_c
", 2), %%mm2
\n\t
"
/* srcData */
\
"movq 8(%%"
REG_S
", %%"
REG_c
", 2), %%mm5
\n\t
"
/* srcData */
\
"add $16, %%"
REG_d
"
\n\t
"
\
"mov (%%"
REG_d
"), %%"
REG_S
"
\n\t
"
\
"test %%"
REG_S
", %%"
REG_S
"
\n\t
"
\
"movq 8(%%"
FF_REG_d
"), %%mm0
\n\t
"
/* filterCoeff */
\
"movq (%%"
FF_REG_S
", %%"
FF_REG_c
", 2), %%mm2
\n\t
"
/* srcData */
\
"movq 8(%%"
FF_REG_S
", %%"
FF_REG_c
", 2), %%mm5
\n\t
"
/* srcData */
\
"add $16, %%"
FF_REG_d
"
\n\t
"
\
"mov (%%"
FF_REG_d
"), %%"
FF_REG_S
"
\n\t
"
\
"test %%"
FF_REG_S
", %%"
FF_REG_S
"
\n\t
"
\
"pmulhw %%mm0, %%mm2
\n\t
"
\
"pmulhw %%mm0, %%mm5
\n\t
"
\
"paddw %%mm2, %%mm3
\n\t
"
\
...
...
@@ -106,62 +106,62 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"psraw $3, %%mm3
\n\t
"
\
"psraw $3, %%mm4
\n\t
"
\
"packuswb %%mm4, %%mm3
\n\t
"
MOVNTQ2
" %%mm3, (%1, %%"
REG_c
")
\n\t
"
"add $8, %%"
REG_c
"
\n\t
"
\
"cmp %2, %%"
REG_c
"
\n\t
"
\
MOVNTQ2
" %%mm3, (%1, %%"
FF_
REG_c
")
\n\t
"
"add $8, %%"
FF_REG_c
"
\n\t
"
\
"cmp %2, %%"
FF_REG_c
"
\n\t
"
\
"movq %%mm6, %%mm3
\n\t
"
"movq %%mm7, %%mm4
\n\t
"
"mov %0, %%"
REG_d
"
\n\t
"
\
"mov (%%"
REG_d
"), %%"
REG_S
"
\n\t
"
\
"mov %0, %%"
FF_REG_d
"
\n\t
"
\
"mov (%%"
FF_REG_d
"), %%"
FF_
REG_S
"
\n\t
"
\
"jb 1b
\n\t
"
\
::
"g"
(
filter
),
"r"
(
dest
-
offset
),
"g"
((
x86_reg
)(
dstW
+
offset
)),
"m"
(
offset
)
:
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_c
:
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_c
);
}
#define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\
"xor
%%"REG_a", %%"REG_a"
\n\t"\
"xor
%%"FF_REG_a", %%"FF_REG_a"
\n\t"\
".p2align 4 \n\t"\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq
8(%%"
REG_d"), %%mm0 \n\t"
/* filterCoeff */
\
"movq
(%%"REG_S", %%"REG_a"), %%mm2
\n\t"
/* UsrcData */
\
"add %6, %%"
REG_S"
\n\t" \
"movq
(%%"REG_S", %%"REG_a"), %%mm5
\n\t"
/* VsrcData */
\
"add $16, %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"movq
8(%%"FF_
REG_d"), %%mm0 \n\t"
/* filterCoeff */
\
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm2
\n\t"
/* UsrcData */
\
"add %6, %%"
FF_REG_S"
\n\t" \
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm5
\n\t"
/* VsrcData */
\
"add $16, %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
" jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
"lea "offset"(%0), %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"lea "offset"(%0), %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
"movq "#dst1", "#dst2" \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq
8(%%"
REG_d"), "#coeff" \n\t"
/* filterCoeff */
\
"movq (%%"
REG_S", %%"REG_a", 2), "#src1"
\n\t"
/* Y1srcData */
\
"movq 8(%%"
REG_S", %%"REG_a", 2), "#src2"
\n\t"
/* Y2srcData */
\
"add $16, %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"movq
8(%%"FF_
REG_d"), "#coeff" \n\t"
/* filterCoeff */
\
"movq (%%"
FF_REG_S", %%"FF_REG_a", 2), "#src1"
\n\t"
/* Y1srcData */
\
"movq 8(%%"
FF_REG_S", %%"FF_REG_a", 2), "#src2"
\n\t"
/* Y2srcData */
\
"add $16, %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pmulhw "#coeff", "#src1" \n\t"\
"pmulhw "#coeff", "#src2" \n\t"\
"paddw "#src1", "#dst1" \n\t"\
"paddw "#src2", "#dst2" \n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
" jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX \
...
...
@@ -173,41 +173,41 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \
NAMED_CONSTRAINTS_ADD(bF8,bFC) \
: "%"
REG_a, "%"REG_d, "%"
REG_S \
: "%"
FF_REG_a, "%"FF_REG_d, "%"FF_
REG_S \
);
#define YSCALEYUV2PACKEDX_ACCURATE_UV \
__asm__ volatile(\
"xor %%"
REG_a", %%"REG_a"
\n\t"\
"xor %%"
FF_REG_a", %%"FF_REG_a"
\n\t"\
".p2align 4 \n\t"\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq
(%%"REG_S", %%"REG_a"), %%mm0
\n\t"
/* UsrcData */
\
"add %6, %%"
REG_S"
\n\t" \
"movq
(%%"REG_S", %%"REG_a"), %%mm2
\n\t"
/* VsrcData */
\
"mov "STR(APCK_PTR2)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"movq
(%%"REG_S", %%"REG_a"), %%mm1
\n\t"
/* UsrcData */
\
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm0
\n\t"
/* UsrcData */
\
"add %6, %%"
FF_REG_S"
\n\t" \
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm2
\n\t"
/* VsrcData */
\
"mov "STR(APCK_PTR2)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm1
\n\t"
/* UsrcData */
\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"
REG_d"),%%mm1
\n\t"
/* filterCoeff */
\
"movq "STR(APCK_COEF)"(%%"
FF_REG_d"),%%mm1
\n\t"
/* filterCoeff */
\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"add %6, %%"
REG_S"
\n\t" \
"movq
(%%"REG_S", %%"REG_a"), %%mm3
\n\t"
/* VsrcData */
\
"mov "STR(APCK_SIZE)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
REG_d"
\n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"add %6, %%"
FF_REG_S"
\n\t" \
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm3
\n\t"
/* VsrcData */
\
"mov "STR(APCK_SIZE)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
FF_REG_d"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
...
...
@@ -229,30 +229,30 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"movq %%mm6, "V_TEMP"(%0) \n\t"\
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
"lea "offset"(%0), %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"lea "offset"(%0), %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pxor %%mm1, %%mm1 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq (%%"
REG_S", %%"REG_a", 2), %%mm0
\n\t"
/* Y1srcData */
\
"movq 8(%%"
REG_S", %%"REG_a", 2), %%mm2
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_PTR2)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"movq (%%"
REG_S", %%"REG_a", 2), %%mm4
\n\t"
/* Y1srcData */
\
"movq (%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm0
\n\t"
/* Y1srcData */
\
"movq 8(%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm2
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_PTR2)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq (%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm4
\n\t"
/* Y1srcData */
\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm4, %%mm0 \n\t"\
"punpckhwd %%mm4, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"
REG_d"), %%mm4
\n\t"
/* filterCoeff */
\
"movq "STR(APCK_COEF)"(%%"
FF_REG_d"), %%mm4
\n\t"
/* filterCoeff */
\
"pmaddwd %%mm4, %%mm0 \n\t"\
"pmaddwd %%mm4, %%mm3 \n\t"\
"paddd %%mm0, %%mm1 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 8(%%"
REG_S", %%"REG_a", 2), %%mm3
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_SIZE)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
REG_d"
\n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"movq 8(%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm3
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_SIZE)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
FF_REG_d"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
...
...
@@ -359,13 +359,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm2
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm2
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
else
{
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
}
...
...
@@ -388,13 +388,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
else
{
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
}
...
...
@@ -417,13 +417,13 @@ static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
else
{
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
}
...
...
@@ -476,7 +476,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB16
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB16
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -500,7 +500,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB16
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB16
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -553,7 +553,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB15
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB15
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -577,7 +577,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB15
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB15
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -705,14 +705,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
REG_c
"
\n\t
"
WRITEBGR24
(
%%
REGc
,
"%5"
,
%%
REGa
)
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_
REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
FF_
REG_c
"
\n\t
"
WRITEBGR24
(
%%
FF_REGc
,
"%5"
,
%%
FF_
REGa
)
::
"r"
(
&
c
->
redDither
),
"m"
(
dummy
),
"m"
(
dummy
),
"m"
(
dummy
),
"r"
(
dest
),
"m"
(
dstW_reg
),
"m"
(
uv_off
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_
REG_S
);
}
...
...
@@ -730,14 +730,14 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
REG_c
"
\n\t
"
WRITEBGR24
(
%%
REGc
,
"%5"
,
%%
REGa
)
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
FF_REG_c
"
\n\t
"
WRITEBGR24
(
%%
FF_REGc
,
"%5"
,
%%
FF_
REGa
)
::
"r"
(
&
c
->
redDither
),
"m"
(
dummy
),
"m"
(
dummy
),
"m"
(
dummy
),
"r"
(
dest
),
"m"
(
dstW_reg
),
"m"
(
uv_off
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_
REG_S
);
}
#endif
/* HAVE_6REGS */
...
...
@@ -776,7 +776,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4
\n\t
"
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
WRITEYUY2
(
%
4
,
"%5"
,
%%
REGa
)
WRITEYUY2
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -797,7 +797,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4
\n\t
"
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
WRITEYUY2
(
%
4
,
"%5"
,
%%
REGa
)
WRITEYUY2
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -908,37 +908,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
c
->
u_temp
=
(
intptr_t
)
abuf0
;
c
->
v_temp
=
(
intptr_t
)
abuf1
;
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"push %0
\n\t
"
"push %1
\n\t
"
"mov "
U_TEMP
"(%5), %0
\n\t
"
"mov "
V_TEMP
"(%5), %1
\n\t
"
YSCALEYUV2RGB_YA
(
%%
REGBP
,
%
5
,
%
0
,
%
1
)
YSCALEYUV2RGB_YA
(
%%
FF_
REGBP
,
%
5
,
%
0
,
%
1
)
"psraw $3, %%mm1
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1
\n\t
"
"pop %1
\n\t
"
"pop %0
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
#endif
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -954,14 +954,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR24
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
...
...
@@ -977,10 +977,10 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -988,9 +988,9 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB15
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB15
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
)
...
...
@@ -1006,10 +1006,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1017,9 +1017,9 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB16
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB16
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
,
bFC
)
...
...
@@ -1075,13 +1075,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2PACKED
(
%%
FF_
REGBP
,
%
5
)
WRITEYUY2
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -1217,27 +1217,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
const
int16_t
*
ubuf1
=
ubuf
[
0
];
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
needAlpha
)
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
FF_
REGBP
)
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -1246,27 +1246,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
const
int16_t
*
ubuf1
=
ubuf
[
1
];
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
needAlpha
)
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
FF_
REGBP
)
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -1285,14 +1285,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR24
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
...
...
@@ -1300,14 +1300,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR24
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
...
...
@@ -1326,10 +1326,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1337,9 +1337,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB15
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB15
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
)
...
...
@@ -1347,10 +1347,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1358,9 +1358,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB15
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB15
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
)
...
...
@@ -1379,10 +1379,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1390,9 +1390,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB16
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB16
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
,
bFC
)
...
...
@@ -1400,10 +1400,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1411,9 +1411,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB16
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB16
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
,
bFC
)
...
...
@@ -1469,26 +1469,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2PACKED1
(
%%
FF_
REGBP
,
%
5
)
WRITEYUY2
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1b
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2PACKED1b
(
%%
FF_
REGBP
,
%
5
)
WRITEYUY2
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment