summaryrefslogtreecommitdiff
path: root/third_party/libyuv/source/row_mips.cc
diff options
context:
space:
mode:
authorJames Bankoski <jimbankoski@google.com>2016-07-01 19:08:04 +0000
committerJames Bankoski <jimbankoski@google.com>2016-07-01 19:14:28 +0000
commitc5372cf077394856eb1aa10e72bcc8e25bb9b3ec (patch)
tree7cfaf2e17b6c1374a3c3d10026dd74c1ed9992c0 /third_party/libyuv/source/row_mips.cc
parentaa81375d73ee33d382e7f717c519db6159e497ee (diff)
downloadlibvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.tar
libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.tar.gz
libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.tar.bz2
libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.zip
Revert "libyuv: update to 2f101fdb"
Compile failures on linux platform. BUG=webm:1253 This reverts commit aa81375d73ee33d382e7f717c519db6159e497ee. Change-Id: Ibab2c4827bc21518dc03c6e9716b5015cff56fc7
Diffstat (limited to 'third_party/libyuv/source/row_mips.cc')
-rw-r--r--third_party/libyuv/source/row_mips.cc171
1 files changed, 150 insertions, 21 deletions
diff --git a/third_party/libyuv/source/row_mips.cc b/third_party/libyuv/source/row_mips.cc
index 285f0b5ad..cfc9ffe03 100644
--- a/third_party/libyuv/source/row_mips.cc
+++ b/third_party/libyuv/source/row_mips.cc
@@ -375,13 +375,13 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
}
#endif // HAS_COPYROW_MIPS
-// DSPR2 functions
+// MIPS DSPR2 functions
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
(__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
-void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
+void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
@@ -389,6 +389,7 @@ void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
+ ".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
@@ -446,7 +447,7 @@ void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
);
}
-void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
+void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
@@ -456,6 +457,7 @@ void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
"blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width
+ ".p2align 2 \n"
"1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
@@ -496,10 +498,10 @@ void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
);
}
-void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- int x;
- int y;
+void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) {
+ int x = 0;
+ int y = 0;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
@@ -510,6 +512,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n"
+ ".p2align 2 \n"
"1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
@@ -579,7 +582,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v),
[x] "=&r" (x),
- [y] "=&r" (y)
+ [y] "+r" (y)
: [width] "r" (width)
: "t0", "t1", "t2", "t3", "t4",
"t5", "t7", "t8", "t9"
@@ -593,7 +596,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
-#define YUVTORGB \
+#define I422ToTransientMipsRGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
@@ -652,13 +655,11 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
-// TODO(fbarchard): accept yuv conversion constants.
-void I422ToARGBRow_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
+void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
@@ -672,8 +673,9 @@ void I422ToARGBRow_DSPR2(const uint8* y_buf,
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
+ ".p2align 2 \n"
"1: \n"
- YUVTORGB
+ I422ToTransientMipsRGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
@@ -715,10 +717,136 @@ void I422ToARGBRow_DSPR2(const uint8* y_buf,
);
}
+void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ __asm__ __volatile__ (
+ ".set push \n"
+ ".set noreorder \n"
+ "beqz %[width], 2f \n"
+ " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
+ "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
+ "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
+ "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
+ "repl.ph $s4, 16 \n" // |0|16|0|16|
+ "repl.ph $s5, 128 \n" // |128|128|
+ "lui $s6, 0xff00 \n"
+ "ori $s6, 0xff00 \n" // |ff|00|ff|00|
+
+ ".p2align 2 \n"
+ "1: \n"
+ I422ToTransientMipsRGB
+// Arranging into abgr format
+ "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
+ "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
+ "precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
+ "precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
+
+ "precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
+ "addiu %[width], -4 \n"
+ "addiu %[y_buf], 4 \n"
+ "preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
+ "preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
+ "or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
+ "or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
+ "precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
+ "precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
+ "sll $t9, $t9, 16 \n"
+ "sll $t8, $t8, 16 \n"
+ "packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
+ "packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
+// Store results.
+ "sw $t2, 0(%[rgb_buf]) \n"
+ "sw $t0, 4(%[rgb_buf]) \n"
+ "sw $t1, 8(%[rgb_buf]) \n"
+ "sw $t3, 12(%[rgb_buf]) \n"
+ "bnez %[width], 1b \n"
+ " addiu %[rgb_buf], 16 \n"
+ "2: \n"
+ ".set pop \n"
+ :[y_buf] "+r" (y_buf),
+ [u_buf] "+r" (u_buf),
+ [v_buf] "+r" (v_buf),
+ [width] "+r" (width),
+ [rgb_buf] "+r" (rgb_buf)
+ :
+ : "t0", "t1", "t2", "t3", "t4", "t5",
+ "t6", "t7", "t8", "t9",
+ "s0", "s1", "s2", "s3",
+ "s4", "s5", "s6"
+ );
+}
+
+void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ __asm__ __volatile__ (
+ ".set push \n"
+ ".set noreorder \n"
+ "beqz %[width], 2f \n"
+ " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
+ "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
+ "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
+ "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
+ "repl.ph $s4, 16 \n" // |0|16|0|16|
+ "repl.ph $s5, 128 \n" // |128|128|
+ "lui $s6, 0xff \n"
+ "ori $s6, 0xff \n" // |00|ff|00|ff|
+
+ ".p2align 2 \n"
+ "1: \n"
+ I422ToTransientMipsRGB
+ // Arranging into bgra format
+ "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
+ "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
+ "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
+ "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
+
+ "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
+ "addiu %[width], -4 \n"
+ "addiu %[y_buf], 4 \n"
+ "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
+ "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
+ "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
+ "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
+ "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
+ "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
+ "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
+ "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
+ "sll $t1, $t1, 16 \n"
+ "sll $t2, $t2, 16 \n"
+ "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
+ "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
+// Store results.
+ "sw $t2, 0(%[rgb_buf]) \n"
+ "sw $t0, 4(%[rgb_buf]) \n"
+ "sw $t1, 8(%[rgb_buf]) \n"
+ "sw $t3, 12(%[rgb_buf]) \n"
+ "bnez %[width], 1b \n"
+ " addiu %[rgb_buf], 16 \n"
+ "2: \n"
+ ".set pop \n"
+ :[y_buf] "+r" (y_buf),
+ [u_buf] "+r" (u_buf),
+ [v_buf] "+r" (v_buf),
+ [width] "+r" (width),
+ [rgb_buf] "+r" (rgb_buf)
+ :
+ : "t0", "t1", "t2", "t3", "t4", "t5",
+ "t6", "t7", "t8", "t9",
+ "s0", "s1", "s2", "s3",
+ "s4", "s5", "s6"
+ );
+}
+
// Bilinear filter 8x2 -> 8x1
-void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
+void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
@@ -729,6 +857,7 @@ void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
+ ".p2align 2 \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"