summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2013-11-12 15:30:00 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-11-12 15:30:00 -0800
commit8dd3905163c5301e6746d7a8583fd68a714e2992 (patch)
treed9b8337509e14e52f345f77053dfbea15f4fced0 /vp9
parentc88f1ec8ca1e51bf2f38462b255e72c3a4b748c0 (diff)
parentd5a52edc117fdedb087034635d601888de553691 (diff)
downloadlibvpx-8dd3905163c5301e6746d7a8583fd68a714e2992.tar
libvpx-8dd3905163c5301e6746d7a8583fd68a714e2992.tar.gz
libvpx-8dd3905163c5301e6746d7a8583fd68a714e2992.tar.bz2
libvpx-8dd3905163c5301e6746d7a8583fd68a714e2992.zip
Merge "Added optimized vp9_idct32x32_34_add_dspr2"
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans32_dspr2.c69
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
2 files changed, 67 insertions, 4 deletions
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
index d3aee73cb..bc6759400 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
@@ -19,7 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output) {
+static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
@@ -42,7 +43,7 @@ static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output) {
const int const_2_power_13 = 8192;
const int32_t *input_int;
- for (i = 32; i--; ) {
+ for (i = no_rows; i--; ) {
input_int = (const int32_t *)input;
if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] |
@@ -881,12 +882,74 @@ void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- idct32_1d_rows_dspr2(input, outptr);
+ idct32_1d_rows_dspr2(input, outptr, 32);
// Columns
vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride);
}
+void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
+ int16_t *outptr = out;
+ uint32_t i;
+ uint32_t pos = 45;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__ (
+ "wrdsp %[pos], 1 \n\t"
+ :
+ : [pos] "r" (pos)
+ );
+
+ // Rows
+ idct32_1d_rows_dspr2(input, outptr, 8);
+
+ outptr += 8;
+ __asm__ __volatile__ (
+ "sw $zero, 0(%[outptr]) \n\t"
+ "sw $zero, 4(%[outptr]) \n\t"
+ "sw $zero, 8(%[outptr]) \n\t"
+ "sw $zero, 12(%[outptr]) \n\t"
+ "sw $zero, 16(%[outptr]) \n\t"
+ "sw $zero, 20(%[outptr]) \n\t"
+ "sw $zero, 24(%[outptr]) \n\t"
+ "sw $zero, 28(%[outptr]) \n\t"
+ "sw $zero, 32(%[outptr]) \n\t"
+ "sw $zero, 36(%[outptr]) \n\t"
+ "sw $zero, 40(%[outptr]) \n\t"
+ "sw $zero, 44(%[outptr]) \n\t"
+
+ :
+ : [outptr] "r" (outptr)
+ );
+
+ for (i = 0; i < 31; ++i) {
+ outptr += 32;
+
+ __asm__ __volatile__ (
+ "sw $zero, 0(%[outptr]) \n\t"
+ "sw $zero, 4(%[outptr]) \n\t"
+ "sw $zero, 8(%[outptr]) \n\t"
+ "sw $zero, 12(%[outptr]) \n\t"
+ "sw $zero, 16(%[outptr]) \n\t"
+ "sw $zero, 20(%[outptr]) \n\t"
+ "sw $zero, 24(%[outptr]) \n\t"
+ "sw $zero, 28(%[outptr]) \n\t"
+ "sw $zero, 32(%[outptr]) \n\t"
+ "sw $zero, 36(%[outptr]) \n\t"
+ "sw $zero, 40(%[outptr]) \n\t"
+ "sw $zero, 44(%[outptr]) \n\t"
+
+ :
+ : [outptr] "r" (outptr)
+ );
+ }
+
+ // Columns
+ vp9_idct32_1d_cols_add_blk_dspr2(out, dest, stride);
+}
+
void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
int stride) {
int r, out;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 6313f3337..8d00c6440 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -296,7 +296,7 @@ prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int
specialize vp9_idct32x32_1024_add sse2 neon dspr2
prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_34_add sse2
+specialize vp9_idct32x32_34_add sse2 dspr2
prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1_add sse2 neon dspr2