Commit a7c87e19 authored by Frank Barchard's avatar Frank Barchard

add Intel Code Analyst markers

add macros to enable/disable code analyst around blocks of code.

Normally these macros should not be used, but if performance
details are wanted for intel code, enable them around the code
and then run via the iaca tool, available on the intel website.

BUG=libyuv:670
TEST=~/iaca-lin64/bin/iaca.sh -64 out/Release/libyuv_unittest
R=wangcheng@google.com

Review-Url: https://codereview.chromium.org/2626193002 .
parent 73a6f100
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1638
Version: 1639
License: BSD
License File: LICENSE
......
......@@ -611,6 +611,57 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#endif
#endif
// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
// measured and then run with iaca -64 libyuv_unittest.
// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within
// inline assembly blocks.
// example of iaca:
// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest
#if defined(__x86_64__) || defined(__i386__)
#define IACA_ASM_START \
".byte 0x0F, 0x0B\n" \
" movl $111, %%ebx\n" \
".byte 0x64, 0x67, 0x90\n"
#define IACA_ASM_END \
" movl $222, %%ebx\n" \
".byte 0x64, 0x67, 0x90\n" \
".byte 0x0F, 0x0B\n"
#define IACA_SSC_MARK(MARK_ID) \
__asm__ __volatile__("\n\t movl $" #MARK_ID \
", %%ebx" \
"\n\t .byte 0x64, 0x67, 0x90" \
: \
: \
: "memory");
#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B");
#else /* Visual C */
#define IACA_UD_BYTES \
{ __asm _emit 0x0F __asm _emit 0x0B }
#define IACA_SSC_MARK(x) \
{ __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 }
#define IACA_VC64_START __writegsbyte(111, 111);
#define IACA_VC64_END __writegsbyte(222, 222);
#endif
#define IACA_START \
{ \
IACA_UD_BYTES \
IACA_SSC_MARK(111) \
}
#define IACA_END \
{ \
IACA_SSC_MARK(222) \
IACA_UD_BYTES \
}
void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1638
#define LIBYUV_VERSION 1639
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -257,9 +257,8 @@ int ConvertToARGB(const uint8* sample,
free(rotate_buffer);
} else if (rotation) {
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBRotate(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height, rotation);
r = ARGBRotate(src, src_width * 4, crop_argb, argb_stride, crop_width,
inv_crop_height, rotation);
}
return r;
......
......@@ -2112,6 +2112,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
......@@ -5458,7 +5459,7 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
}
#endif // HAS_HALFFLOATROW_AVX2
#ifdef HAS_HALFFLOATROW_F16C
//#ifdef HAS_HALFFLOATROW_F16C
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
asm volatile (
"vbroadcastss %3, %%ymm4 \n"
......@@ -5490,7 +5491,7 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
"xmm2", "xmm3", "xmm4"
);
}
#endif // HAS_HALFFLOATROW_F16C
//#endif // HAS_HALFFLOATROW_F16C
#ifdef HAS_HALFFLOATROW_F16C
void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) {
......
......@@ -1925,18 +1925,17 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
dst[2] = 0x00000000;
dst[3] = 0x00000000;
int r = ConvertToARGB(
reinterpret_cast<uint8_t*>(src),
16, // input size
reinterpret_cast<uint8_t*>(dst),
8, // destination stride
0, // crop_x
0, // crop_y
2, // width
2, // height
2, // crop width
2, // crop height
kRotate90, FOURCC_ARGB);
int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src),
16, // input size
reinterpret_cast<uint8_t*>(dst),
8, // destination stride
0, // crop_x
0, // crop_y
2, // width
2, // height
2, // crop width
2, // crop height
kRotate90, FOURCC_ARGB);
EXPECT_EQ(r, 0);
// 90 degrees rotation, no conversion
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment