Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
3178ee4c
Commit
3178ee4c
authored
Jan 05, 2003
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
qpel in b frames bugfixes
Originally committed as revision 1398 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
3643bd9c
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
152 additions
and
149 deletions
+152
-149
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+146
-143
dsputil_mmx_rnd.h
libavcodec/i386/dsputil_mmx_rnd.h
+6
-6
No files found.
libavcodec/i386/dsputil_mmx.c
View file @
3178ee4c
...
@@ -651,6 +651,9 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride)
...
@@ -651,6 +651,9 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride)
WARPER88_1616
(
hadamard8_diff_mmx
,
hadamard8_diff16_mmx
)
WARPER88_1616
(
hadamard8_diff_mmx
,
hadamard8_diff16_mmx
)
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
#define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
"paddw " #m4 ", " #m3 " \n\t"
/* x1 */
\
"paddw " #m4 ", " #m3 " \n\t"
/* x1 */
\
"movq " #pw_20 ", %%mm4 \n\t"
/* 20 */
\
"movq " #pw_20 ", %%mm4 \n\t"
/* 20 */
\
...
@@ -672,7 +675,7 @@ WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
...
@@ -672,7 +675,7 @@ WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
"packuswb %%mm5, %%mm5 \n\t"\
"packuswb %%mm5, %%mm5 \n\t"\
OP(%%mm5, out, %%mm7, d)
OP(%%mm5, out, %%mm7, d)
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP)\
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP
_MMX2, OP_3DNOW
)\
void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
uint64_t temp;\
uint64_t temp;\
\
\
...
@@ -738,7 +741,7 @@ void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstSt
...
@@ -738,7 +741,7 @@ void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstSt
"psraw $5, %%mm3 \n\t"\
"psraw $5, %%mm3 \n\t"\
"movq %7, %%mm1 \n\t"\
"movq %7, %%mm1 \n\t"\
"packuswb %%mm3, %%mm1 \n\t"\
"packuswb %%mm3, %%mm1 \n\t"\
OP(%%mm1, (%1),%%mm4, q)\
OP
_MMX2
(%%mm1, (%1),%%mm4, q)\
/* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */
\
/* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */
\
\
\
"movq 9(%0), %%mm1 \n\t"
/* JKLMNOPQ */
\
"movq 9(%0), %%mm1 \n\t"
/* JKLMNOPQ */
\
...
@@ -784,7 +787,7 @@ void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstSt
...
@@ -784,7 +787,7 @@ void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstSt
"paddw %%mm3, %%mm4 \n\t"
/* 20a - 6b + 3c - d */
\
"paddw %%mm3, %%mm4 \n\t"
/* 20a - 6b + 3c - d */
\
"psraw $5, %%mm4 \n\t"\
"psraw $5, %%mm4 \n\t"\
"packuswb %%mm4, %%mm0 \n\t"\
"packuswb %%mm4, %%mm0 \n\t"\
OP(%%mm0, 8(%1), %%mm4, q)\
OP
_MMX2
(%%mm0, 8(%1), %%mm4, q)\
\
\
"addl %3, %0 \n\t"\
"addl %3, %0 \n\t"\
"addl %4, %1 \n\t"\
"addl %4, %1 \n\t"\
...
@@ -828,7 +831,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
...
@@ -828,7 +831,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
OP(%%mm0, (%1), %%mm1, q)\
OP
_3DNOW
(%%mm0, (%1), %%mm1, q)\
"movq 16(%0), %%mm0 \n\t"\
"movq 16(%0), %%mm0 \n\t"\
"movq 24(%0), %%mm1 \n\t"\
"movq 24(%0), %%mm1 \n\t"\
"paddw %2, %%mm0 \n\t"\
"paddw %2, %%mm0 \n\t"\
...
@@ -836,7 +839,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
...
@@ -836,7 +839,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
OP(%%mm0, 8(%1), %%mm1, q)\
OP
_3DNOW
(%%mm0, 8(%1), %%mm1, q)\
:: "r"(temp), "r"(dst), "m"(ROUNDER)\
:: "r"(temp), "r"(dst), "m"(ROUNDER)\
);\
);\
dst+=dstStride;\
dst+=dstStride;\
...
@@ -844,88 +847,6 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
...
@@ -844,88 +847,6 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
}\
}\
}\
}\
\
\
void OPNAME ## mpeg4_qpel16_v_lowpass_mmx(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
uint64_t temp[17*4];\
uint64_t *temp_ptr= temp;\
int count= 17;\
\
/*FIXME unroll */
\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq (%0), %%mm1 \n\t"\
"movq 8(%0), %%mm2 \n\t"\
"movq 8(%0), %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpckhbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpckhbw %%mm7, %%mm3 \n\t"\
"movq %%mm0, (%1) \n\t"\
"movq %%mm1, 17*8(%1) \n\t"\
"movq %%mm2, (%1, %4) \n\t"\
"movq %%mm3, (%1, %5) \n\t"\
"addl $8, %1 \n\t"\
"addl %3, %0 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+r" (src), "+r" (temp_ptr), "+r"(count)\
: "r" (srcStride), "r"(2*8*17), "r"(3*8*17)\
);\
\
temp_ptr= temp;\
count=4;\
\
/*FIXME reorder for speed */
\
/*FIXME remove push/pop gcc 2.95 bug workaround here and in the other 3 lowpass filters */
\
asm volatile(\
/*"pxor %%mm7, %%mm7 \n\t"*/
\
"pushl %0 \n\t"\
"pushl %1 \n\t"\
"pushl %2 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm1 \n\t"\
"movq 16(%0), %%mm2 \n\t"\
"movq 24(%0), %%mm3 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
"addl %4, %1 \n\t" \
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
\
"addl $136, %0 \n\t"\
"addl %8, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
"popl %2 \n\t"\
"popl %1 \n\t"\
"popl %0 \n\t"\
\
:: "r"(temp_ptr), "r"(dst), "r"(count),\
"r"(dstStride), "r"(2*dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*dstStride)\
);\
}\
void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
uint64_t temp;\
uint64_t temp;\
\
\
...
@@ -983,7 +904,7 @@ void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStr
...
@@ -983,7 +904,7 @@ void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStr
"paddw %%mm1, %%mm3 \n\t"
/* 20a - 6b + 3c - d */
\
"paddw %%mm1, %%mm3 \n\t"
/* 20a - 6b + 3c - d */
\
"psraw $5, %%mm3 \n\t"\
"psraw $5, %%mm3 \n\t"\
"packuswb %%mm3, %%mm0 \n\t"\
"packuswb %%mm3, %%mm0 \n\t"\
OP(%%mm0, (%1), %%mm4, q)\
OP
_MMX2
(%%mm0, (%1), %%mm4, q)\
\
\
"addl %3, %0 \n\t"\
"addl %3, %0 \n\t"\
"addl %4, %1 \n\t"\
"addl %4, %1 \n\t"\
...
@@ -1019,15 +940,100 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, in
...
@@ -1019,15 +940,100 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, in
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
OP(%%mm0, (%1), %%mm1, q)\
OP
_3DNOW
(%%mm0, (%1), %%mm1, q)\
:: "r"(temp), "r"(dst), "m"(ROUNDER)\
:: "r"(temp), "r"(dst), "m"(ROUNDER)\
);\
);\
dst+=dstStride;\
dst+=dstStride;\
src+=srcStride;\
src+=srcStride;\
}\
}\
}
#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
\
static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
uint64_t temp[17*4];\
uint64_t *temp_ptr= temp;\
int count= 17;\
\
/*FIXME unroll */
\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq (%0), %%mm1 \n\t"\
"movq 8(%0), %%mm2 \n\t"\
"movq 8(%0), %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpckhbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpckhbw %%mm7, %%mm3 \n\t"\
"movq %%mm0, (%1) \n\t"\
"movq %%mm1, 17*8(%1) \n\t"\
"movq %%mm2, (%1, %4) \n\t"\
"movq %%mm3, (%1, %5) \n\t"\
"addl $8, %1 \n\t"\
"addl %3, %0 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+r" (src), "+r" (temp_ptr), "+r"(count)\
: "r" (srcStride), "r"(2*8*17), "r"(3*8*17)\
);\
\
temp_ptr= temp;\
count=4;\
\
/*FIXME reorder for speed */
\
/*FIXME remove push/pop gcc 2.95 bug workaround here and in the other 3 lowpass filters */
\
asm volatile(\
/*"pxor %%mm7, %%mm7 \n\t"*/
\
"pushl %0 \n\t"\
"pushl %1 \n\t"\
"pushl %2 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm1 \n\t"\
"movq 16(%0), %%mm2 \n\t"\
"movq 24(%0), %%mm3 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
"addl %4, %1 \n\t"\
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
\
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
"addl %4, %1 \n\t" \
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
\
"addl $136, %0 \n\t"\
"addl %8, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
"popl %2 \n\t"\
"popl %1 \n\t"\
"popl %0 \n\t"\
\
:: "r"(temp_ptr), "r"(dst), "r"(count),\
"r"(dstStride), "r"(2*dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*dstStride)\
);\
}\
}\
\
\
void OPNAME ## mpeg4_qpel8_v_lowpass_
mmx
(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
void OPNAME ## mpeg4_qpel8_v_lowpass_
## MMX
(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
uint64_t temp[9*4];\
uint64_t temp[9*4];\
uint64_t *temp_ptr= temp;\
uint64_t *temp_ptr= temp;\
int count= 9;\
int count= 9;\
...
@@ -1089,12 +1095,10 @@ void OPNAME ## mpeg4_qpel8_v_lowpass_mmx(uint8_t *dst, uint8_t *src, int dstStri
...
@@ -1089,12 +1095,10 @@ void OPNAME ## mpeg4_qpel8_v_lowpass_mmx(uint8_t *dst, uint8_t *src, int dstStri
:: "r"(temp_ptr), "r"(dst), "r"(count),\
:: "r"(temp_ptr), "r"(dst), "r"(count),\
"r"(dstStride), "r"(2*dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*dstStride)\
"r"(dstStride), "r"(2*dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*dstStride)\
);\
);\
}
}\
#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
\
\
static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
put_
pixels8_mmx(dst, src, stride, 8);\
OPNAME ##
pixels8_mmx(dst, src, stride, 8);\
}\
}\
\
\
static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1118,18 +1122,18 @@ static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1118,18 +1122,18 @@ static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[32];\
uint64_t temp[32];\
uint8_t * const half= (uint8_t*)temp;\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(half, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(half, src, 8, stride);\
OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
}\
}\
\
\
static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## mpeg4_qpel8_v_lowpass_
mmx
(dst, src, stride, stride);\
OPNAME ## mpeg4_qpel8_v_lowpass_
## MMX
(dst, src, stride, stride);\
}\
}\
\
\
static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[32];\
uint64_t temp[32];\
uint8_t * const half= (uint8_t*)temp;\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(half, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(half, src, 8, stride);\
OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
}\
}\
static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1138,8 +1142,8 @@ static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1138,8 +1142,8 @@ static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_mmx(dst, src, (uint8_t*)half, stride, 8);\
OPNAME ## pixels8_l4_mmx(dst, src, (uint8_t*)half, stride, 8);\
}\
}\
static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1148,8 +1152,8 @@ static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1148,8 +1152,8 @@ static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_mmx(dst, src+1, (uint8_t*)half, stride, 8);\
OPNAME ## pixels8_l4_mmx(dst, src+1, (uint8_t*)half, stride, 8);\
}\
}\
static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1158,8 +1162,8 @@ static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1158,8 +1162,8 @@ static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 8);\
OPNAME ## pixels8_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 8);\
}\
}\
static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1168,8 +1172,8 @@ static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1168,8 +1172,8 @@ static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 8);\
OPNAME ## pixels8_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 8);\
}\
}\
static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1177,7 +1181,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1177,7 +1181,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
}\
}\
static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1185,7 +1189,7 @@ static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1185,7 +1189,7 @@ static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
}\
}\
static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1194,8 +1198,8 @@ static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1194,8 +1198,8 @@ static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
}\
}\
static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1204,18 +1208,18 @@ static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1204,18 +1208,18 @@ static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_
mmx
(halfHV, halfH, 8, 8);\
put ## RND ## mpeg4_qpel8_v_lowpass_
## MMX
(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
}\
}\
static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[9*2];\
uint64_t half[9*2];\
uint8_t * const halfH= ((uint8_t*)half);\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass_
mmx
(dst, halfH, stride, 8);\
OPNAME ## mpeg4_qpel8_v_lowpass_
## MMX
(dst, halfH, stride, 8);\
}\
}\
static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
put_
pixels16_mmx(dst, src, stride, 16);\
OPNAME ##
pixels16_mmx(dst, src, stride, 16);\
}\
}\
\
\
static void OPNAME ## qpel16_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1239,18 +1243,18 @@ static void OPNAME ## qpel16_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1239,18 +1243,18 @@ static void OPNAME ## qpel16_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[32];\
uint64_t temp[32];\
uint8_t * const half= (uint8_t*)temp;\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(half, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(half, src, 16, stride);\
OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\
OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\
}\
}\
\
\
static void OPNAME ## qpel16_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## mpeg4_qpel16_v_lowpass_
mmx
(dst, src, stride, stride);\
OPNAME ## mpeg4_qpel16_v_lowpass_
## MMX
(dst, src, stride, stride);\
}\
}\
\
\
static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[32];\
uint64_t temp[32];\
uint8_t * const half= (uint8_t*)temp;\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(half, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(half, src, 16, stride);\
OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
}\
}\
static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1259,8 +1263,8 @@ static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1259,8 +1263,8 @@ static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_mmx(dst, src, (uint8_t*)half, stride, 16);\
OPNAME ## pixels16_l4_mmx(dst, src, (uint8_t*)half, stride, 16);\
}\
}\
static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1269,8 +1273,8 @@ static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1269,8 +1273,8 @@ static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_mmx(dst, src+1, (uint8_t*)half, stride, 16);\
OPNAME ## pixels16_l4_mmx(dst, src+1, (uint8_t*)half, stride, 16);\
}\
}\
static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1279,8 +1283,8 @@ static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1279,8 +1283,8 @@ static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 16);\
OPNAME ## pixels16_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 16);\
}\
}\
static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1289,8 +1293,8 @@ static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1289,8 +1293,8 @@ static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 16);\
OPNAME ## pixels16_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 16);\
}\
}\
static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1298,7 +1302,7 @@ static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1298,7 +1302,7 @@ static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
}\
}\
static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1306,7 +1310,7 @@ static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1306,7 +1310,7 @@ static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
}\
}\
static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1315,8 +1319,8 @@ static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1315,8 +1319,8 @@ static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
}\
}\
static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1325,38 +1329,36 @@ static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
...
@@ -1325,38 +1329,36 @@ static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_
mmx
(halfHV, halfH, 16, 16);\
put ## RND ## mpeg4_qpel16_v_lowpass_
## MMX
(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
}\
}\
static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[17*2];\
uint64_t half[17*2];\
uint8_t * const halfH= ((uint8_t*)half);\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass_
mmx
(dst, halfH, stride, 16);\
OPNAME ## mpeg4_qpel16_v_lowpass_
## MMX
(dst, halfH, stride, 16);\
}
}
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
#define AVG_OP(a,b,temp, size) \
#define AVG_
3DNOW_
OP(a,b,temp, size) \
"mov" #size " " #b ", " #temp " \n\t"\
"mov" #size " " #b ", " #temp " \n\t"\
"pavgusb " #temp ", " #a " \n\t"\
"pavgusb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
"mov" #size " " #a ", " #b " \n\t"
#define AVG_MMX2_OP(a,b,temp, size) \
QPEL_BASE
(
put_
,
ff_pw_16
,
_
,
PUT_OP
)
QPEL_BASE
(
avg_
,
ff_pw_16
,
_
,
AVG_OP
)
QPEL_BASE
(
put_no_rnd_
,
ff_pw_15
,
_no_rnd_
,
PUT_OP
)
QPEL_OP
(
put_
,
ff_pw_16
,
_
,
PUT_OP
,
3
dnow
)
QPEL_OP
(
avg_
,
ff_pw_16
,
_
,
AVG_OP
,
3
dnow
)
QPEL_OP
(
put_no_rnd_
,
ff_pw_15
,
_no_rnd_
,
PUT_OP
,
3
dnow
)
#undef AVG_OP
#define AVG_OP(a,b,temp, size) \
"mov" #size " " #b ", " #temp " \n\t"\
"mov" #size " " #b ", " #temp " \n\t"\
"pavgb " #temp ", " #a " \n\t"\
"pavgb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
"mov" #size " " #a ", " #b " \n\t"
QPEL_BASE
(
put_
,
ff_pw_16
,
_
,
PUT_OP
,
PUT_OP
)
QPEL_BASE
(
avg_
,
ff_pw_16
,
_
,
AVG_MMX2_OP
,
AVG_3DNOW_OP
)
QPEL_BASE
(
put_no_rnd_
,
ff_pw_15
,
_no_rnd_
,
PUT_OP
,
PUT_OP
)
QPEL_OP
(
put_
,
ff_pw_16
,
_
,
PUT_OP
,
3
dnow
)
QPEL_OP
(
avg_
,
ff_pw_16
,
_
,
AVG_3DNOW_OP
,
3
dnow
)
QPEL_OP
(
put_no_rnd_
,
ff_pw_15
,
_no_rnd_
,
PUT_OP
,
3
dnow
)
QPEL_OP
(
put_
,
ff_pw_16
,
_
,
PUT_OP
,
mmx2
)
QPEL_OP
(
put_
,
ff_pw_16
,
_
,
PUT_OP
,
mmx2
)
QPEL_OP
(
avg_
,
ff_pw_16
,
_
,
AVG_OP
,
mmx2
)
QPEL_OP
(
avg_
,
ff_pw_16
,
_
,
AVG_
MMX2_
OP
,
mmx2
)
QPEL_OP
(
put_no_rnd_
,
ff_pw_15
,
_no_rnd_
,
PUT_OP
,
mmx2
)
QPEL_OP
(
put_no_rnd_
,
ff_pw_15
,
_no_rnd_
,
PUT_OP
,
mmx2
)
#if 0
#if 0
...
@@ -1485,6 +1487,7 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
...
@@ -1485,6 +1487,7 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c
->
avg_pixels_tab
[
1
][
1
]
=
avg_pixels8_x2_mmx2
;
c
->
avg_pixels_tab
[
1
][
1
]
=
avg_pixels8_x2_mmx2
;
c
->
avg_pixels_tab
[
1
][
2
]
=
avg_pixels8_y2_mmx2
;
c
->
avg_pixels_tab
[
1
][
2
]
=
avg_pixels8_y2_mmx2
;
c
->
avg_pixels_tab
[
1
][
3
]
=
avg_pixels8_xy2_mmx2
;
c
->
avg_pixels_tab
[
1
][
3
]
=
avg_pixels8_xy2_mmx2
;
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
0
],
qpel16_mc00_mmx2
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
0
],
qpel16_mc00_mmx2
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
1
],
qpel16_mc10_mmx2
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
1
],
qpel16_mc10_mmx2
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
2
],
qpel16_mc20_mmx2
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
2
],
qpel16_mc20_mmx2
)
...
...
libavcodec/i386/dsputil_mmx_rnd.h
View file @
3178ee4c
...
@@ -657,7 +657,7 @@ static void DEF(avg, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
...
@@ -657,7 +657,7 @@ static void DEF(avg, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
"movq (%1), %%mm0
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 64(%2), %%mm2
\n\t
"
"movq 64(%2), %%mm2
\n\t
"
"movq 136(%
4
), %%mm4
\n\t
"
"movq 136(%
2
), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
...
@@ -670,7 +670,7 @@ static void DEF(avg, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
...
@@ -670,7 +670,7 @@ static void DEF(avg, pixels8_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
"packuswb %%mm4, %%mm3
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq (%0), %%mm4
\n\t
"
"movq (%0), %%mm4
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
"movq %%mm
3
, (%0)
\n\t
"
"movq %%mm
0
, (%0)
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %1
\n\t
"
"addl %4, %1
\n\t
"
"addl $8, %2
\n\t
"
"addl $8, %2
\n\t
"
...
@@ -705,7 +705,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
...
@@ -705,7 +705,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
"movq (%1), %%mm0
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 256(%2), %%mm2
\n\t
"
"movq 256(%2), %%mm2
\n\t
"
"movq 528(%
4
), %%mm4
\n\t
"
"movq 528(%
2
), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
...
@@ -718,7 +718,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
...
@@ -718,7 +718,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
"packuswb %%mm4, %%mm3
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq (%0), %%mm4
\n\t
"
"movq (%0), %%mm4
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
"movq %%mm
3
, (%0)
\n\t
"
"movq %%mm
0
, (%0)
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
...
@@ -735,7 +735,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
...
@@ -735,7 +735,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 536(%
4
), %%mm4
\n\t
"
"movq 536(%
2
), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
...
@@ -748,7 +748,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
...
@@ -748,7 +748,7 @@ static void DEF(avg, pixels16_l4)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
"packuswb %%mm4, %%mm3
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq 8(%0), %%mm4
\n\t
"
"movq 8(%0), %%mm4
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
"movq %%mm
3
, 8(%0)
\n\t
"
"movq %%mm
0
, 8(%0)
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %1
\n\t
"
"addl %4, %1
\n\t
"
"addl $16, %2
\n\t
"
"addl $16, %2
\n\t
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment