Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
f8bbebec
Commit
f8bbebec
authored
Jan 17, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: motion_est: K&R formatting cosmetics
parent
a36947c1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
161 additions
and
150 deletions
+161
-150
motion_est.c
libavcodec/x86/motion_est.c
+161
-150
No files found.
libavcodec/x86/motion_est.c
View file @
f8bbebec
...
...
@@ -31,18 +31,18 @@
#if HAVE_INLINE_ASM
DECLARE_ASM_CONST
(
8
,
uint64_t
,
round_tab
)[
3
]
=
{
0x0000000000000000ULL
,
0x0001000100010001ULL
,
0x0002000200020002ULL
,
DECLARE_ASM_CONST
(
8
,
uint64_t
,
round_tab
)[
3
]
=
{
0x0000000000000000ULL
,
0x0001000100010001ULL
,
0x0002000200020002ULL
,
};
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bone
)
=
0x0101010101010101LL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bone
)
=
0x0101010101010101LL
;
static
inline
void
sad8_1_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
...
...
@@ -71,14 +71,13 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"add %3, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
)
);
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_1_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -92,14 +91,13 @@ static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
)
);
:
"r"
((
x86_reg
)
stride
));
}
static
int
sad16_sse2
(
void
*
v
,
uint8_t
*
blk2
,
uint8_t
*
blk1
,
int
stride
,
int
h
)
{
int
ret
;
__asm__
volatile
(
__asm__
volatile
(
"pxor %%xmm2, %%xmm2
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
...
...
@@ -116,16 +114,15 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
"movhlps %%xmm2, %%xmm0
\n\t
"
"paddw %%xmm0, %%xmm2
\n\t
"
"movd %%xmm2, %3
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
),
"=r"
(
ret
)
:
"r"
((
x86_reg
)
stride
)
);
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
),
"=r"
(
ret
)
:
"r"
((
x86_reg
)
stride
));
return
ret
;
}
static
inline
void
sad8_x2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -141,14 +138,13 @@ static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
)
);
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_y2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
".p2align 4
\n\t
"
...
...
@@ -167,14 +163,13 @@ static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
)
);
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_4_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
__asm__
volatile
(
"movq "
MANGLE
(
bone
)
", %%mm5
\n\t
"
"movq (%1), %%mm0
\n\t
"
"pavgb 1(%1), %%mm0
\n\t
"
...
...
@@ -198,14 +193,14 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
)
);
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_2_mmx
(
uint8_t
*
blk1a
,
uint8_t
*
blk1b
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_2_mmx
(
uint8_t
*
blk1a
,
uint8_t
*
blk1b
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
...
...
@@ -236,15 +231,15 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
"add %4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
)
);
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
)
);
}
static
inline
void
sad8_4_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
"movq
(%1, %%"
REG_a
"), %%mm0
\n\t
"
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
"movq
(%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
...
...
@@ -256,7 +251,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"paddw %%mm3, %%mm1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
...
...
@@ -289,14 +284,14 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"add %4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
)
);
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
)
);
}
static
inline
int
sum_mmx
(
void
)
{
int
ret
;
__asm__
volatile
(
__asm__
volatile
(
"movq %%mm6, %%mm0
\n\t
"
"psrlq $32, %%mm6
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
...
...
@@ -304,129 +299,145 @@ static inline int sum_mmx(void)
"psrlq $16, %%mm6
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"movd %%mm6, %0
\n\t
"
:
"=r"
(
ret
)
);
return
ret
&
0xFFFF
;
:
"=r"
(
ret
));
return
ret
&
0xFFFF
;
}
static
inline
int
sum_mmxext
(
void
)
{
int
ret
;
__asm__
volatile
(
__asm__
volatile
(
"movd %%mm6, %0
\n\t
"
:
"=r"
(
ret
)
);
:
"=r"
(
ret
));
return
ret
;
}
static
inline
void
sad8_x2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
sad8_2_mmx
(
blk1
,
blk1
+
1
,
blk2
,
stride
,
h
);
sad8_2_mmx
(
blk1
,
blk1
+
1
,
blk2
,
stride
,
h
);
}
static
inline
void
sad8_y2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
sad8_2_mmx
(
blk1
,
blk1
+
stride
,
blk2
,
stride
,
h
);
sad8_2_mmx
(
blk1
,
blk1
+
stride
,
blk2
,
stride
,
h
);
}
#define PIX_SAD(suf)\
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_1_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
sad8_x2a_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
sad8_y2a_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
::);\
\
sad8_4_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_1_ ## suf(blk1 , blk2 , stride, h);\
sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
sad8_x2a_ ## suf(blk1 , blk2 , stride, h);\
sad8_x2a_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
sad8_y2a_ ## suf(blk1 , blk2 , stride, h);\
sad8_y2a_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
::);\
\
sad8_4_ ## suf(blk1 , blk2 , stride, h);\
sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
#define PIX_SAD(suf) \
static int sad8_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
:); \
\
sad8_1_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_x2a_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_y2a_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
::); \
\
sad8_4_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad16_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
:); \
\
sad8_1_ ## suf(blk1, blk2, stride, h); \
sad8_1_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_x2a_ ## suf(blk1, blk2, stride, h); \
sad8_x2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_y2a_ ## suf(blk1, blk2, stride, h); \
sad8_y2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
::); \
\
sad8_4_ ## suf(blk1, blk2, stride, h); \
sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
PIX_SAD
(
mmx
)
PIX_SAD
(
mmxext
)
...
...
@@ -448,17 +459,17 @@ av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmx
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmx
;
c
->
sad
[
0
]
=
sad16_mmx
;
c
->
sad
[
1
]
=
sad8_mmx
;
c
->
sad
[
0
]
=
sad16_mmx
;
c
->
sad
[
1
]
=
sad8_mmx
;
}
if
(
INLINE_MMXEXT
(
cpu_flags
))
{
c
->
pix_abs
[
0
][
0
]
=
sad16_mmxext
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmxext
;
c
->
sad
[
0
]
=
sad16_mmxext
;
c
->
sad
[
1
]
=
sad8_mmxext
;
c
->
sad
[
0
]
=
sad16_mmxext
;
c
->
sad
[
1
]
=
sad8_mmxext
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmxext
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmxext
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmxext
;
...
...
@@ -468,7 +479,7 @@ av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
}
}
if
(
INLINE_SSE2
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_3DNOW
))
{
c
->
sad
[
0
]
=
sad16_sse2
;
c
->
sad
[
0
]
=
sad16_sse2
;
}
#endif
/* HAVE_INLINE_ASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment