Commit bf69adfd authored by lixia zhang's avatar lixia zhang Committed by Frank Barchard

libyuv:loongson Correct the optimization of mmi on loongson3a platform.

When loading or storing the data, the unaligned address will greatly degrade
the optimization performance, so non-aligned access instructions are required
on the loongson platform.

Also delete the optimization function:ScaleARGBFilterCols_MMI,
because it degraded the performance.

BUG=libyuv:804
R=fbarchard@chromium.org

Change-Id: If4c15886a21cdcbac7ae8b336292e4549acf1e47
Reviewed-on: https://chromium-review.googlesource.com/1164627Reviewed-by: 's avatarFrank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
parent 4e666c43
......@@ -115,7 +115,6 @@ extern "C" {
#define HAS_FIXEDDIV_MIPS
#define HAS_SCALEARGBCOLS_MMI
#define HAS_SCALEARGBCOLSUP2_MMI
#define HAS_SCALEARGBFILTERCOLS_MMI
#define HAS_SCALEARGBROWDOWN2_MMI
#define HAS_SCALEARGBROWDOWNEVEN_MMI
#define HAS_SCALEROWDOWN2_MMI
......@@ -592,21 +591,11 @@ void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
......
......@@ -7,10 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <sys/time.h>
#include "libyuv/row.h"
#include <stdio.h>
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h"
......@@ -4492,7 +4490,8 @@ void SobelXRow_MMI(const uint8_t* src_y0,
"psubh %[y00], %[y10], %[y20] \n\t"
"packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
"sdc1 %[sobel], 0(%[dst_sobelx]) \n\t"
"gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
"gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
"daddiu %[src_y0], %[src_y0], 8 \n\t"
"daddiu %[src_y1], %[src_y1], 8 \n\t"
......@@ -4587,7 +4586,8 @@ void SobelYRow_MMI(const uint8_t* src_y0,
"psubh %[y00], %[y02], %[y12] \n\t"
"packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
"sdc1 %[sobel], 0(%[dst_sobely]) \n\t"
"gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
"gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
"daddiu %[src_y0], %[src_y0], 8 \n\t"
"daddiu %[src_y1], %[src_y1], 8 \n\t"
......@@ -4624,13 +4624,15 @@ void SobelRow_MMI(const uint8_t* src_sobelx,
"punpcklbh %[t1], %[t0], %[t0] \n\t"
"or %[t1], %[t1], %[c1] \n\t"
// 255 s1 s1 s1 s55 s0 s0 s0
"sdc1 %[t1], 0x00(%[dst_argb]) \n\t"
"gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
"gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
// s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
"punpckhbh %[t1], %[t0], %[t0] \n\t"
"or %[t1], %[t1], %[c1] \n\t"
// 255 s3 s3 s3 255 s2 s2 s2
"sdc1 %[t1], 0x08(%[dst_argb]) \n\t"
"gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
"gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
// s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
"punpckhbh %[t0], %[t2], %[t2] \n\t"
......@@ -4638,12 +4640,14 @@ void SobelRow_MMI(const uint8_t* src_sobelx,
// s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
"punpcklbh %[t1], %[t0], %[t0] \n\t"
"or %[t1], %[t1], %[c1] \n\t"
"sdc1 %[t1], 0x10(%[dst_argb]) \n\t"
"gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
"gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
// s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
"punpckhbh %[t1], %[t0], %[t0] \n\t"
"or %[t1], %[t1], %[c1] \n\t"
"sdc1 %[t1], 0x18(%[dst_argb]) \n\t"
"gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
"gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
"daddiu %[dst_argb], %[dst_argb], 32 \n\t"
"daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
......@@ -4665,10 +4669,13 @@ void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
uint64_t tb = 0;
__asm__ volatile(
"1: \n\t"
"ldc1 %[tr], 0x0(%[src_sobelx]) \n\t" // r=src_sobelx[i]
"ldc1 %[tb], 0x0(%[src_sobely]) \n\t" // b=src_sobely[i]
"gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
"gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
"gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
"gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
"paddusb %[tr], %[tr], %[tb] \n\t" // g
"sdc1 %[tr], 0x0(%[dst_y]) \n\t"
"gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
"gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
"daddiu %[dst_y], %[dst_y], 8 \n\t"
"daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
......@@ -4705,10 +4712,12 @@ void SobelXYRow_MMI(const uint8_t* src_sobelx,
"punpcklbh %[cr], %[tr], %[c1] \n\t"
// c1 r1 g1 b1 c0 r0 g0 b0
"punpcklhw %[result], %[gb], %[cr] \n\t"
"sdc1 %[result], 0x00(%[dst_argb]) \n\t"
"gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
"gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
// c3 r3 g3 b3 c2 r2 g2 b2
"punpckhhw %[result], %[gb], %[cr] \n\t"
"sdc1 %[result], 0x08(%[dst_argb]) \n\t"
"gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
"gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
// g7 b7 g6 b6 g5 b5 g4 b4
"punpckhbh %[gb], %[tb], %[tg] \n\t"
......@@ -4716,10 +4725,12 @@ void SobelXYRow_MMI(const uint8_t* src_sobelx,
"punpckhbh %[cr], %[tr], %[c1] \n\t"
// c5 r5 g5 b5 c4 r4 g4 b4
"punpcklhw %[result], %[gb], %[cr] \n\t"
"sdc1 %[result], 0x10(%[dst_argb]) \n\t"
"gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
"gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
// c7 r7 g7 b7 c6 r6 g6 b6
"punpckhhw %[result], %[gb], %[cr] \n\t"
"sdc1 %[result], 0x18(%[dst_argb]) \n\t"
"gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
"gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
"daddiu %[dst_argb], %[dst_argb], 32 \n\t"
"daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
......@@ -4748,12 +4759,14 @@ void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
"punpcklhw %[dest], %[src], %[src] \n\t"
"and %[dest], %[dest], %[mask0] \n\t"
"or %[dest], %[dest], %[mask1] \n\t"
"sdc1 %[dest], 0x00(%[dst_ptr]) \n\t"
"gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
"gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
"punpckhhw %[dest], %[src], %[src] \n\t"
"and %[dest], %[dest], %[mask0] \n\t"
"or %[dest], %[dest], %[mask1] \n\t"
"sdc1 %[dest], 0x08(%[dst_ptr]) \n\t"
"gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
"gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
"daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
"daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
......@@ -4955,7 +4968,8 @@ void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
"gsldlc1 %[temp], 3(%[src]) \n\t"
"gsldrc1 %[temp], -4(%[src]) \n\t"
"pshufh %[temp], %[temp], %[shuff] \n\t"
"sdc1 %[temp], 0x0(%[dst]) \n\t"
"gssdrc1 %[temp], 0x0(%[dst]) \n\t"
"gssdlc1 %[temp], 0x7(%[dst]) \n\t"
"daddiu %[src], %[src], -0x08 \n\t"
"daddiu %[dst], %[dst], 0x08 \n\t"
......@@ -4975,18 +4989,22 @@ void SplitUVRow_MMI(const uint8_t* src_uv,
uint64_t shift = 0x08;
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_uv]) \n\t"
"ldc1 %[t1], 0x08(%[src_uv]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
"and %[t2], %[t0], %[c0] \n\t"
"and %[t3], %[t1], %[c0] \n\t"
"packushb %[t2], %[t2], %[t3] \n\t"
"sdc1 %[t2], 0x0(%[dst_u]) \n\t"
"gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
"gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
"psrlh %[t2], %[t0], %[shift] \n\t"
"psrlh %[t3], %[t1], %[shift] \n\t"
"packushb %[t2], %[t2], %[t3] \n\t"
"sdc1 %[t2], 0x0(%[dst_v]) \n\t"
"gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
"gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
"daddiu %[src_uv], %[src_uv], 16 \n\t"
"daddiu %[dst_u], %[dst_u], 8 \n\t"
......@@ -5008,12 +5026,16 @@ void MergeUVRow_MMI(const uint8_t* src_u,
uint64_t temp[3];
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x0(%[src_u]) \n\t"
"ldc1 %[t1], 0x0(%[src_v]) \n\t"
"gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
"gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
"gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
"gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
"punpcklbh %[t2], %[t0], %[t1] \n\t"
"sdc1 %[t2], 0x0(%[dst_uv]) \n\t"
"gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
"gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
"punpckhbh %[t2], %[t0], %[t1] \n\t"
"sdc1 %[t2], 0x8(%[dst_uv]) \n\t"
"gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
"gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
"daddiu %[src_u], %[src_u], 8 \n\t"
"daddiu %[src_v], %[src_v], 8 \n\t"
......@@ -5149,13 +5171,17 @@ void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
uint64_t src_stride = 0x0;
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_yuy2]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
"daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
"ldc1 %[t1], 0x00(%[src_stride]) \n\t"
"gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
"pavgb %[t0], %[t0], %[t1] \n\t"
"ldc1 %[t2], 0x08(%[src_yuy2]) \n\t"
"ldc1 %[t1], 0x08(%[src_stride]) \n\t"
"gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
"gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
"pavgb %[t1], %[t2], %[t1] \n\t"
"and %[t0], %[t0], %[c0] \n\t"
......@@ -5167,12 +5193,16 @@ void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
"and %[d0], %[t0], %[c1] \n\t"
"psrlh %[d1], %[t1], %[shift] \n\t"
"ldc1 %[t0], 0x10(%[src_yuy2]) \n\t"
"ldc1 %[t1], 0x10(%[src_stride]) \n\t"
"gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
"gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
"gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
"pavgb %[t0], %[t0], %[t1] \n\t"
"ldc1 %[t2], 0x18(%[src_yuy2]) \n\t"
"ldc1 %[t1], 0x18(%[src_stride]) \n\t"
"gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
"gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
"gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
"pavgb %[t1], %[t2], %[t1] \n\t"
"and %[t0], %[t0], %[c0] \n\t"
......@@ -5186,8 +5216,10 @@ void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
"packushb %[d0], %[d0], %[d2] \n\t"
"packushb %[d1], %[d1], %[d3] \n\t"
"sdc1 %[d0], 0x0(%[dst_u]) \n\t"
"sdc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
"gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
"gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
"daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
"daddiu %[dst_u], %[dst_u], 8 \n\t"
"daddiu %[dst_v], %[dst_v], 8 \n\t"
......@@ -5215,8 +5247,10 @@ void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
uint64_t shift = 0x08;
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_yuy2]) \n\t"
"ldc1 %[t1], 0x08(%[src_yuy2]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
"and %[t0], %[t0], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"psrlh %[t0], %[t0], %[shift] \n\t"
......@@ -5226,8 +5260,10 @@ void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
"and %[d0], %[t0], %[c1] \n\t"
"psrlh %[d1], %[t1], %[shift] \n\t"
"ldc1 %[t0], 0x10(%[src_yuy2]) \n\t"
"ldc1 %[t1], 0x18(%[src_yuy2]) \n\t"
"gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
"gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
"gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
"gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
"and %[t0], %[t0], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"psrlh %[t0], %[t0], %[shift] \n\t"
......@@ -5239,8 +5275,10 @@ void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
"packushb %[d0], %[d0], %[d2] \n\t"
"packushb %[d1], %[d1], %[d3] \n\t"
"sdc1 %[d0], 0x0(%[dst_u]) \n\t"
"sdc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
"gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
"gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
"daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
"daddiu %[dst_u], %[dst_u], 8 \n\t"
"daddiu %[dst_v], %[dst_v], 8 \n\t"
......@@ -5256,17 +5294,19 @@ void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
// Copy row of YUY2 Y's (422) into Y (420/422).
void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
// Output a row of UV values, filtering 2 rows of YUY2.
uint64_t c0 = 0x00ff00ff00ff00ff;
uint64_t temp[2];
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_yuy2]) \n\t"
"ldc1 %[t1], 0x08(%[src_yuy2]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
"and %[t0], %[t0], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"packushb %[t0], %[t0], %[t1] \n\t"
"sdc1 %[t0], 0x0(%[dst_y]) \n\t"
"gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
"gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
"daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
"daddiu %[dst_y], %[dst_y], 8 \n\t"
"daddiu %[width], %[width], -8 \n\t"
......@@ -5292,13 +5332,17 @@ void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
uint64_t src_stride = 0x0;
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_uyvy]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
"daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
"ldc1 %[t1], 0x00(%[src_stride]) \n\t"
"gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
"pavgb %[t0], %[t0], %[t1] \n\t"
"ldc1 %[t2], 0x08(%[src_uyvy]) \n\t"
"ldc1 %[t1], 0x08(%[src_stride]) \n\t"
"gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
"gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
"pavgb %[t1], %[t2], %[t1] \n\t"
"and %[t0], %[t0], %[c0] \n\t"
......@@ -5308,12 +5352,16 @@ void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
"and %[d0], %[t0], %[c0] \n\t"
"psrlh %[d1], %[t1], %[shift] \n\t"
"ldc1 %[t0], 0x10(%[src_uyvy]) \n\t"
"ldc1 %[t1], 0x10(%[src_stride]) \n\t"
"gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
"gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
"gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
"pavgb %[t0], %[t0], %[t1] \n\t"
"ldc1 %[t2], 0x18(%[src_uyvy]) \n\t"
"ldc1 %[t1], 0x18(%[src_stride]) \n\t"
"gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
"gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
"gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
"gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
"pavgb %[t1], %[t2], %[t1] \n\t"
"and %[t0], %[t0], %[c0] \n\t"
......@@ -5325,8 +5373,10 @@ void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
"packushb %[d0], %[d0], %[d2] \n\t"
"packushb %[d1], %[d1], %[d3] \n\t"
"sdc1 %[d0], 0x0(%[dst_u]) \n\t"
"sdc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
"gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
"gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
"daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
"daddiu %[dst_u], %[dst_u], 8 \n\t"
"daddiu %[dst_v], %[dst_v], 8 \n\t"
......@@ -5354,8 +5404,10 @@ void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
uint64_t shift = 0x08;
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_uyvy]) \n\t"
"ldc1 %[t1], 0x08(%[src_uyvy]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
"and %[t0], %[t0], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"packushb %[t0], %[t0], %[t1] \n\t"
......@@ -5363,8 +5415,10 @@ void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
"and %[d0], %[t0], %[c0] \n\t"
"psrlh %[d1], %[t1], %[shift] \n\t"
"ldc1 %[t0], 0x10(%[src_uyvy]) \n\t"
"ldc1 %[t1], 0x18(%[src_uyvy]) \n\t"
"gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
"gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
"gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
"gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
"and %[t0], %[t0], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"packushb %[t0], %[t0], %[t1] \n\t"
......@@ -5374,8 +5428,10 @@ void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
"packushb %[d0], %[d0], %[d2] \n\t"
"packushb %[d1], %[d1], %[d3] \n\t"
"sdc1 %[d0], 0x0(%[dst_u]) \n\t"
"sdc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
"gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
"gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
"gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
"daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
"daddiu %[dst_u], %[dst_u], 8 \n\t"
"daddiu %[dst_v], %[dst_v], 8 \n\t"
......@@ -5397,15 +5453,18 @@ void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
uint64_t temp[2];
__asm__ volatile(
"1: \n\t"
"ldc1 %[t0], 0x00(%[src_uyvy]) \n\t"
"ldc1 %[t1], 0x08(%[src_uyvy]) \n\t"
"gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
"gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
"gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
"gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
"dsrl %[t0], %[t0], %[shift] \n\t"
"dsrl %[t1], %[t1], %[shift] \n\t"
"and %[t0], %[t0], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"and %[t1], %[t1], %[c0] \n\t"
"packushb %[t0], %[t0], %[t1] \n\t"
"sdc1 %[t0], 0x0(%[dst_y]) \n\t"
"gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
"gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
"daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
"daddiu %[dst_y], %[dst_y], 8 \n\t"
"daddiu %[width], %[width], -8 \n\t"
......@@ -5671,12 +5730,15 @@ void InterpolateRow_MMI(uint8_t* dst_ptr,
uint64_t uv_stride = 0x0;
__asm__ volatile(
"1: \n\t"
"ldc1 %[uv], 0x0(%[src_ptr]) \n\t"
"gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
"gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
"daddu $t0, %[src_ptr], %[stride] \n\t"
"ldc1 %[uv_stride], 0x0($t0) \n\t"
"gsldrc1 %[uv_stride], 0x0($t0) \n\t"
"gsldlc1 %[uv_stride], 0x7($t0) \n\t"
"pavgb %[uv], %[uv], %[uv_stride] \n\t"
"sdc1 %[uv], 0x0(%[dst_ptr]) \n\t"
"gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
"gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
"daddiu %[src_ptr], %[src_ptr], 8 \n\t"
"daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
......@@ -5700,10 +5762,12 @@ void InterpolateRow_MMI(uint8_t* dst_ptr,
"pshufh %[fy1], %[fy1], %[zero] \n\t"
"psubh %[fy0], %[fy0], %[fy1] \n\t"
"1: \n\t"
"ldc1 %[t0], 0x0(%[src_ptr]) \n\t"
"gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
"gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
"punpcklbh %[d0], %[t0], %[zero] \n\t"
"punpckhbh %[d1], %[t0], %[zero] \n\t"
"ldc1 %[t0], 0x0(%[src_ptr1]) \n\t"
"gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
"gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
"punpcklbh %[d2], %[t0], %[zero] \n\t"
"punpckhbh %[d3], %[t0], %[zero] \n\t"
......@@ -5720,7 +5784,8 @@ void InterpolateRow_MMI(uint8_t* dst_ptr,
"psrlh %[d1], %[d1], %[shift] \n\t"
"packushb %[d0], %[d0], %[d1] \n\t"
"sdc1 %[d0], 0x0(%[dst_ptr]) \n\t"
"gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
"gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
"daddiu %[src_ptr], %[src_ptr], 8 \n\t"
"daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
"daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
......
......@@ -38,10 +38,12 @@ void ScaleRowDown2_MMI(const uint8_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"psrlh %[src0], %[src0], %[shift] \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"psrlh %[src1], %[src1], %[shift] \n\t"
"packushb %[dest], %[src0], %[src1] \n\t"
......@@ -72,9 +74,11 @@ void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"and %[dest0], %[src0], %[mask] \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"and %[dest1], %[src1], %[mask] \n\t"
"packushb %[dest0], %[dest0], %[dest1] \n\t"
......@@ -114,11 +118,13 @@ void ScaleRowDown2Box_MMI(const uint8_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[s0], 0x00(%[s]) \n\t"
"gsldrc1 %[s0], 0x00(%[s]) \n\t"
"gsldlc1 %[s0], 0x07(%[s]) \n\t"
"psrlh %[s1], %[s0], %[shift1] \n\t"
"and %[s0], %[s0], %[mask] \n\t"
"ldc1 %[t0], 0x00(%[t]) \n\t"
"gsldrc1 %[t0], 0x00(%[t]) \n\t"
"gsldlc1 %[t0], 0x07(%[t]) \n\t"
"psrlh %[t1], %[t0], %[shift1] \n\t"
"and %[t0], %[t0], %[mask] \n\t"
......@@ -128,11 +134,13 @@ void ScaleRowDown2Box_MMI(const uint8_t* src_ptr,
"paddh %[dest0], %[dest0], %[ph] \n\t"
"psrlh %[dest0], %[dest0], %[shift0] \n\t"
"ldc1 %[s0], 0x08(%[s]) \n\t"
"gsldrc1 %[s0], 0x08(%[s]) \n\t"
"gsldlc1 %[s0], 0x0f(%[s]) \n\t"
"psrlh %[s1], %[s0], %[shift1] \n\t"
"and %[s0], %[s0], %[mask] \n\t"
"ldc1 %[t0], 0x08(%[t]) \n\t"
"gsldrc1 %[t0], 0x08(%[t]) \n\t"
"gsldlc1 %[t0], 0x0f(%[t]) \n\t"
"psrlh %[t1], %[t0], %[shift1] \n\t"
"and %[t0], %[t0], %[mask] \n\t"
......@@ -172,8 +180,10 @@ void ScaleARGBRowDown2_MMI(const uint8_t* src_argb,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"punpckhwd %[dest], %[src0], %[src1] \n\t"
"gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
......@@ -237,12 +247,14 @@ void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb,
__asm__ volatile(
"1: \n\t"
"ldc1 %[s0], 0x00(%[s]) \n\t"
"gsldrc1 %[s0], 0x00(%[s]) \n\t"
"gsldlc1 %[s0], 0x07(%[s]) \n\t"
"punpcklbh %[s_lo], %[s0], %[mask] \n\t"
"punpckhbh %[s_hi], %[s0], %[mask] \n\t"
"paddh %[dest_lo], %[s_lo], %[s_hi] \n\t"
"ldc1 %[t0], 0x00(%[t]) \n\t"
"gsldrc1 %[t0], 0x00(%[t]) \n\t"
"gsldlc1 %[t0], 0x07(%[t]) \n\t"
"punpcklbh %[t_lo], %[t0], %[mask] \n\t"
"punpckhbh %[t_hi], %[t0], %[mask] \n\t"
"paddh %[dest_lo], %[dest_lo], %[t_lo] \n\t"
......@@ -251,12 +263,14 @@ void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb,
"paddh %[dest_lo], %[dest_lo], %[ph] \n\t"
"psrlh %[dest_lo], %[dest_lo], %[shfit] \n\t"
"ldc1 %[s0], 0x08(%[s]) \n\t"
"gsldrc1 %[s0], 0x08(%[s]) \n\t"
"gsldlc1 %[s0], 0x0f(%[s]) \n\t"
"punpcklbh %[s_lo], %[s0], %[mask] \n\t"
"punpckhbh %[s_hi], %[s0], %[mask] \n\t"
"paddh %[dest_hi], %[s_lo], %[s_hi] \n\t"
"ldc1 %[t0], 0x08(%[t]) \n\t"
"gsldrc1 %[t0], 0x08(%[t]) \n\t"
"gsldlc1 %[t0], 0x0f(%[t]) \n\t"
"punpcklbh %[t_lo], %[t0], %[mask] \n\t"
"punpckhbh %[t_hi], %[t0], %[mask] \n\t"
"paddh %[dest_hi], %[dest_hi], %[t_lo] \n\t"
......@@ -293,10 +307,12 @@ void ScaleRowDown2_16_MMI(const uint16_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"psrlw %[src0], %[src0], %[shift] \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"psrlw %[src1], %[src1], %[shift] \n\t"
"packsswh %[dest], %[src0], %[src1] \n\t"
......@@ -324,8 +340,10 @@ void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"punpcklhw %[dest_lo], %[src0], %[src1] \n\t"
"punpckhhw %[dest_hi], %[src0], %[src1] \n\t"
......@@ -364,11 +382,13 @@ void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[s0], 0x00(%[s]) \n\t"
"gsldrc1 %[s0], 0x00(%[s]) \n\t"
"gsldlc1 %[s0], 0x07(%[s]) \n\t"
"psrlw %[s1], %[s0], %[shift0] \n\t"
"and %[s0], %[s0], %[mask] \n\t"
"ldc1 %[t0], 0x00(%[t]) \n\t"
"gsldrc1 %[t0], 0x00(%[t]) \n\t"
"gsldlc1 %[t0], 0x07(%[t]) \n\t"
"psrlw %[t1], %[t0], %[shift0] \n\t"
"and %[t0], %[t0], %[mask] \n\t"
......@@ -378,11 +398,13 @@ void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr,
"paddw %[dest0], %[dest0], %[ph] \n\t"
"psrlw %[dest0], %[dest0], %[shift1] \n\t"
"ldc1 %[s0], 0x08(%[s]) \n\t"
"gsldrc1 %[s0], 0x08(%[s]) \n\t"
"gsldlc1 %[s0], 0x0f(%[s]) \n\t"
"psrlw %[s1], %[s0], %[shift0] \n\t"
"and %[s0], %[s0], %[mask] \n\t"
"ldc1 %[t0], 0x08(%[t]) \n\t"
"gsldrc1 %[t0], 0x08(%[t]) \n\t"
"gsldlc1 %[t0], 0x0f(%[t]) \n\t"
"psrlw %[t1], %[t0], %[shift0] \n\t"
"and %[t0], %[t0], %[mask] \n\t"
......@@ -425,18 +447,22 @@ void ScaleRowDown4_MMI(const uint8_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"psrlw %[src0], %[src0], %[shift] \n\t"
"and %[src0], %[src0], %[mask] \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"psrlw %[src1], %[src1], %[shift] \n\t"
"and %[src1], %[src1], %[mask] \n\t"
"packsswh %[dest_lo], %[src0], %[src1] \n\t"
"ldc1 %[src0], 0x10(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t"
"psrlw %[src0], %[src0], %[shift] \n\t"
"and %[src0], %[src0], %[mask] \n\t"
"ldc1 %[src1], 0x18(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t"
"psrlw %[src1], %[src1], %[shift] \n\t"
"and %[src1], %[src1], %[mask] \n\t"
"packsswh %[dest_hi], %[src0], %[src1] \n\t"
......@@ -469,13 +495,17 @@ void ScaleRowDown4_16_MMI(const uint16_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src_ptr]) \n\t"
"ldc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"punpckhhw %[dest_lo], %[src0], %[src1] \n\t"
"punpcklhw %[dest_lo], %[dest_lo], %[mask] \n\t"
"ldc1 %[src0], 0x10(%[src_ptr]) \n\t"
"ldc1 %[src1], 0x18(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t"
"gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t"
"punpckhhw %[dest_hi], %[src0], %[src1] \n\t"
"punpcklhw %[dest_hi], %[dest_hi], %[mask] \n\t"
......@@ -691,7 +721,8 @@ void ScaleColsUp2_16_MMI(uint16_t* dst_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
"punpcklhw %[dest], %[src], %[src] \n\t"
"gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
......@@ -721,9 +752,11 @@ void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
"punpcklbh %[src_lo], %[src], %[mask] \n\t"
"punpckhbh %[src_hi], %[src], %[mask] \n\t"
"ldc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
"gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
"gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
"paddush %[dest0], %[dest0], %[src_lo] \n\t"
"ldc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
"gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
"gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
"paddush %[dest1], %[dest1], %[src_hi] \n\t"
"gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
......@@ -750,16 +783,19 @@ void ScaleAddRow_16_MMI(const uint16_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
"punpcklhw %[src_lo], %[src], %[mask] \n\t"
"punpckhhw %[src_hi], %[src], %[mask] \n\t"
"ldc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
"gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
"gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
"paddw %[dest0], %[dest0], %[src_lo] \n\t"
"gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
"ldc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
"gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
"gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
"paddw %[dest1], %[dest1], %[src_hi] \n\t"
"gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
"gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
......@@ -922,7 +958,8 @@ void ScaleARGBColsUp2_MMI(uint8_t* dst_argb,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src], 0x00(%[src_ptr]) \n\t"
"gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
"punpcklwd %[dest0], %[src], %[src] \n\t"
"gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
......@@ -939,67 +976,6 @@ void ScaleARGBColsUp2_MMI(uint8_t* dst_argb,
: "memory");
}
void ScaleARGBFilterCols_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
uint64_t dest, src, src_hi, src_lo;
int xi, xf, nxf;
int64_t fxf, fnxf;
const uint8_t* src_ptr = src_argb;
const uint64_t mask0 = 0;
const uint64_t mask1 = 0x7fULL;
const uint64_t shift2 = 2;
const uint64_t shift9 = 9;
const uint64_t shift7 = 7;
const uint64_t shift16 = 16;
__asm__ volatile(
"1: \n\t"
"dsrl %[xi], %[x], %[shift16] \n\t"
"dsll %[xi], %[xi], %[shift2] \n\t"
"dadd %[src_ptr], %[src_argb], %[xi] \n\t"
"ldc1 %[src], 0x00(%[src_ptr]) \n\t"
"punpcklbh %[src_lo], %[src], %[mask0] \n\t"
"punpckhbh %[src_hi], %[src], %[mask0] \n\t"
"dsrl %[xf], %[x], %[shift9] \n\t"
"andi %[xf], %[xf], 0x7f \n\t"
"xori %[nxf], %[xf], 0x7f \n\t"
"dmtc1 %[xf], %[fxf] \n\t"
"pshufh %[fxf], %[fxf], %[mask0] \n\t"
"dmtc1 %[nxf], %[fnxf] \n\t"
"pshufh %[fnxf], %[fnxf], %[mask0] \n\t"
"pmullh %[src_lo], %[src_lo], %[fnxf] \n\t"
"pmullh %[src_hi], %[src_hi], %[fxf] \n\t"
"paddh %[dest], %[src_lo], %[src_hi] \n\t"
"psrlh %[dest], %[dest], %[shift7] \n\t"
"packushb %[dest], %[dest], %[mask0] \n\t"
"dadd %[x], %[x], %[dx] \n\t"
"swc1 %[dest], 0x00(%[dst_ptr]) \n\t"
"daddiu %[dst_ptr], %[dst_ptr], 0x04 \n\t"
"daddi %[width], %[width], -0x01 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
[src_lo] "=&f"(src_lo), [fxf] "=&f"(fxf), [fnxf] "=&f"(fnxf),
[xi] "=&r"(xi), [xf] "=&r"(xf), [nxf] "=&r"(nxf)
: [src_argb] "r"(src_argb), [src_ptr] "r"(src_ptr),
[dst_ptr] "r"(dst_argb), [width] "r"(dst_width), [x] "r"(x),
[dx] "r"(dx), [mask0] "f"(mask0), [mask1] "f"(mask1),
[shift2] "r"(shift2), [shift7] "f"(shift7), [shift9] "r"(shift9),
[shift16] "r"(shift16)
: "memory");
}
// Divide num by div and return as 16.16 fixed point result.
/* LibYUVBaseTest.TestFixedDiv */
int FixedDiv_MIPS(int num, int div) {
......@@ -1058,9 +1034,11 @@ void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
__asm__ volatile(
"1: \n\t"
"ldc1 %[src0], 0x00(%[src1_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src1_ptr]) \n\t"
"gsldlc1 %[src0], 0x07(%[src1_ptr]) \n\t"
"pmaddhw %[dest04], %[src0], %[mask0] \n\t"
"ldc1 %[src1], 0x00(%[src2_ptr]) \n\t"
"gsldrc1 %[src1], 0x00(%[src2_ptr]) \n\t"
"gsldlc1 %[src1], 0x07(%[src2_ptr]) \n\t"
"pmaddhw %[dest], %[src1], %[mask1] \n\t"
"paddw %[dest04], %[dest04], %[dest] \n\t"
"paddw %[dest04], %[dest04], %[ph] \n\t"
......@@ -1072,9 +1050,11 @@ void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
"paddw %[dest15], %[dest15], %[ph] \n\t"
"psrlw %[dest15], %[dest15], %[shift] \n\t"
"ldc1 %[src0], 0x02(%[src1_ptr]) \n\t"
"gsldrc1 %[src0], 0x02(%[src1_ptr]) \n\t"
"gsldlc1 %[src0], 0x09(%[src1_ptr]) \n\t"
"pmaddhw %[dest26], %[src0], %[mask0] \n\t"
"ldc1 %[src1], 0x02(%[src2_ptr]) \n\t"
"gsldrc1 %[src1], 0x02(%[src2_ptr]) \n\t"
"gsldlc1 %[src1], 0x09(%[src2_ptr]) \n\t"
"pmaddhw %[dest], %[src1], %[mask1] \n\t"
"paddw %[dest26], %[dest26], %[dest] \n\t"
"paddw %[dest26], %[dest26], %[ph] \n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment