summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.asm16
-rw-r--r--vp9/common/mips/dspr2/vp9_common_dspr2.h4
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans16_dspr2.c34
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c4
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans32_dspr2.c12
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans4_dspr2.c26
-rw-r--r--vp9/common/mips/dspr2/vp9_itrans8_dspr2.c34
-rw-r--r--vp9/common/vp9_alloccommon.c12
-rw-r--r--vp9/common/vp9_blockd.h2
-rw-r--r--vp9/common/vp9_convolve.c2
-rw-r--r--vp9/common/vp9_entropy.h10
-rw-r--r--vp9/common/vp9_entropymode.c2
-rw-r--r--vp9/common/vp9_entropymv.c4
-rw-r--r--vp9/common/vp9_frame_buffers.c84
-rw-r--r--vp9/common/vp9_frame_buffers.h53
-rw-r--r--vp9/common/vp9_mv.h4
-rw-r--r--vp9/common/vp9_mvref_common.h2
-rw-r--r--vp9/common/vp9_onyx.h8
-rw-r--r--vp9/common/vp9_onyxc_int.h26
-rw-r--r--vp9/common/vp9_pred_common.c62
-rw-r--r--vp9/common/vp9_pred_common.h13
-rw-r--r--vp9/common/vp9_prob.c31
-rw-r--r--vp9/common/vp9_prob.h33
-rw-r--r--vp9/common/vp9_quant_common.c3
-rw-r--r--vp9/common/vp9_quant_common.h3
-rw-r--r--vp9/common/vp9_reconinter.c25
-rw-r--r--vp9/common/vp9_reconinter.h14
-rw-r--r--vp9/common/vp9_rtcd_defs.sh12
-rw-r--r--vp9/common/vp9_scale.h4
-rw-r--r--vp9/common/vp9_systemdependent.h21
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c103
-rw-r--r--vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm422
32 files changed, 841 insertions, 244 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 279f678b1..4a49964d5 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -349,7 +349,7 @@ loop_h
vdup.u8 d0, r12
; preload 8 left
- vld1.8 d30, [r3]
+ vld1.8 {d30}, [r3]
; Load above 8 pixels
vld1.64 {d2}, [r2]
@@ -422,10 +422,10 @@ loop_h
vdup.u8 q0, r12
; Load above 8 pixels
- vld1.8 q1, [r2]
+ vld1.8 {q1}, [r2]
; preload 8 left into r12
- vld1.8 d18, [r3]!
+ vld1.8 {d18}, [r3]!
; Compute above - ytop_left
vsubl.u8 q2, d2, d0
@@ -492,7 +492,7 @@ loop_16x16_neon
vqshrun.s16 d23, q8, #0
vdup.16 q0, d20[2]
vdup.16 q8, d20[3]
- vld1.8 d18, [r3]! ; preload 8 left into r12
+ vld1.8 {d18}, [r3]! ; preload 8 left into r12
vmovl.u8 q10, d18
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
@@ -518,11 +518,11 @@ loop_16x16_neon
vdup.u8 q0, r12
; Load above 32 pixels
- vld1.8 q1, [r2]!
- vld1.8 q2, [r2]
+ vld1.8 {q1}, [r2]!
+ vld1.8 {q2}, [r2]
; preload 8 left pixels
- vld1.8 d26, [r3]!
+ vld1.8 {d26}, [r3]!
; Compute above - ytop_left
vsubl.u8 q8, d2, d0
@@ -621,7 +621,7 @@ loop_32x32_neon
vst1.64 {d0-d3}, [r0], r1
vqshrun.s16 d24, q12, #0
vqshrun.s16 d25, q13, #0
- vld1.8 d0, [r3]! ; preload 8 left pixels
+ vld1.8 {d0}, [r3]! ; preload 8 left pixels
vqshrun.s16 d26, q14, #0
vqshrun.s16 d27, q15, #0
vmovl.u8 q3, d0
diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h
index 991d3c2b3..6ebea9f2f 100644
--- a/vp9/common/mips/dspr2/vp9_common_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h
@@ -85,8 +85,8 @@ static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {
);
}
-void vp9_idct32_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride);
+void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride);
void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 1b2f5506a..19c582fd1 100644
--- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct16_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct16_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int i;
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int step1_10, step1_11, step1_12, step1_13;
@@ -404,8 +404,8 @@ static void idct16_1d_rows_dspr2(const int16_t *input, int16_t *output,
}
}
-static void idct16_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int i;
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int step1_8, step1_9, step1_10, step1_11;
@@ -905,13 +905,13 @@ void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
);
// First transform rows
- idct16_1d_rows_dspr2(input, out, 16);
+ idct16_rows_dspr2(input, out, 16);
// Then transform columns and add to dest
- idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
-static void iadst16_1d(const int16_t *input, int16_t *output) {
+static void iadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -1099,16 +1099,16 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- idct16_1d_rows_dspr2(input, outptr, 16);
- idct16_1d_cols_add_blk_dspr2(out, dest, pitch);
+ idct16_rows_dspr2(input, outptr, 16);
+ idct16_cols_add_blk_dspr2(out, dest, pitch);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- idct16_1d_rows_dspr2(input, outptr, 16);
+ idct16_rows_dspr2(input, outptr, 16);
outptr = out;
for (i = 0; i < 16; ++i) {
- iadst16_1d(outptr, temp_out);
+ iadst16(outptr, temp_out);
for (j = 0; j < 16; ++j)
dest[j * pitch + i] =
@@ -1125,7 +1125,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
- iadst16_1d(input, outptr);
+ iadst16(input, outptr);
input += 16;
outptr += 16;
}
@@ -1134,7 +1134,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
for (j = 0; j < 16; ++j)
temp_in[j * 16 + i] = out[i * 16 + j];
- idct16_1d_cols_add_blk_dspr2(temp_in, dest, pitch);
+ idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
}
break;
case ADST_ADST: // ADST in both directions
@@ -1145,7 +1145,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
- iadst16_1d(input, outptr);
+ iadst16(input, outptr);
input += 16;
outptr += 16;
}
@@ -1153,7 +1153,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- iadst16_1d(temp_in, temp_out);
+ iadst16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * pitch + i] =
clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
@@ -1183,7 +1183,7 @@ void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
- idct16_1d_rows_dspr2(input, outptr, 4);
+ idct16_rows_dspr2(input, outptr, 4);
outptr += 4;
for (i = 0; i < 6; ++i) {
@@ -1213,7 +1213,7 @@ void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
}
// Then transform columns
- idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 5e92db3d2..132d88ce5 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -18,8 +18,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-void vp9_idct32_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
index bc6759400..74a90b02c 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct32_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
@@ -882,10 +882,10 @@ void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- idct32_1d_rows_dspr2(input, outptr, 32);
+ idct32_rows_dspr2(input, outptr, 32);
// Columns
- vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ vp9_idct32_cols_add_blk_dspr2(out, dest, dest_stride);
}
void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
@@ -903,7 +903,7 @@ void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- idct32_1d_rows_dspr2(input, outptr, 8);
+ idct32_rows_dspr2(input, outptr, 8);
outptr += 8;
__asm__ __volatile__ (
@@ -947,7 +947,7 @@ void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
}
// Columns
- vp9_idct32_1d_cols_add_blk_dspr2(out, dest, stride);
+ vp9_idct32_cols_add_blk_dspr2(out, dest, stride);
}
void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 5b7aa5e71..1990348b8 100644
--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -19,7 +19,7 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) {
+static void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
const int const_2_power_13 = 8192;
@@ -104,7 +104,7 @@ static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) {
}
}
-static void vp9_idct4_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
@@ -240,10 +240,10 @@ void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
- vp9_idct4_1d_rows_dspr2(input, outptr);
+ vp9_idct4_rows_dspr2(input, outptr);
// Columns
- vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
@@ -319,7 +319,7 @@ void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
}
}
-static void iadst4_1d_dspr2(const int16_t *input, int16_t *output) {
+static void iadst4_dspr2(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0, x1, x2, x3;
@@ -379,16 +379,16 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- vp9_idct4_1d_rows_dspr2(input, outptr);
- vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vp9_idct4_rows_dspr2(input, outptr);
+ vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- vp9_idct4_1d_rows_dspr2(input, outptr);
+ vp9_idct4_rows_dspr2(input, outptr);
outptr = out;
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(outptr, temp_out);
+ iadst4_dspr2(outptr, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] =
@@ -400,7 +400,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
break;
case DCT_ADST: // DCT in vertical, ADST in horizontal
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(input, outptr);
+ iadst4_dspr2(input, outptr);
input += 4;
outptr += 4;
}
@@ -410,11 +410,11 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
temp_in[i * 4 + j] = out[j * 4 + i];
}
}
- vp9_idct4_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(input, outptr);
+ iadst4_dspr2(input, outptr);
input += 4;
outptr += 4;
}
@@ -422,7 +422,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- iadst4_1d_dspr2(temp_in, temp_out);
+ iadst4_dspr2(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] =
diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index 93a08401d..acccaea6d 100644
--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct8_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct8_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
const int const_2_power_13 = 8192;
int Temp0, Temp1, Temp2, Temp3, Temp4;
@@ -200,8 +200,8 @@ static void idct8_1d_rows_dspr2(const int16_t *input, int16_t *output,
}
}
-static void idct8_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int Temp0, Temp1, Temp2, Temp3;
int i;
@@ -462,13 +462,13 @@ void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
);
// First transform rows
- idct8_1d_rows_dspr2(input, outptr, 8);
+ idct8_rows_dspr2(input, outptr, 8);
// Then transform columns and add to dest
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
-static void iadst8_1d_dspr2(const int16_t *input, int16_t *output) {
+static void iadst8_dspr2(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0, x1, x2, x3, x4, x5, x6, x7;
@@ -563,14 +563,14 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- idct8_1d_rows_dspr2(input, outptr, 8);
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_rows_dspr2(input, outptr, 8);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- idct8_1d_rows_dspr2(input, outptr, 8);
+ idct8_rows_dspr2(input, outptr, 8);
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(&out[i * 8], temp_out);
+ iadst8_dspr2(&out[i * 8], temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] =
@@ -580,7 +580,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
break;
case DCT_ADST: // DCT in vertical, ADST in horizontal
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(input, outptr);
+ iadst8_dspr2(input, outptr);
input += 8;
outptr += 8;
}
@@ -590,11 +590,11 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
temp_in[i * 8 + j] = out[j * 8 + i];
}
}
- idct8_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(input, outptr);
+ iadst8_dspr2(input, outptr);
input += 8;
outptr += 8;
}
@@ -603,7 +603,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- iadst8_1d_dspr2(temp_in, temp_out);
+ iadst8_dspr2(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] =
@@ -631,7 +631,7 @@ void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest,
);
// First transform rows
- idct8_1d_rows_dspr2(input, outptr, 4);
+ idct8_rows_dspr2(input, outptr, 4);
outptr += 4;
@@ -659,7 +659,7 @@ void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest,
// Then transform columns and add to dest
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index e033fbb99..6f771992b 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -33,9 +33,16 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
void vp9_free_frame_buffers(VP9_COMMON *cm) {
int i;
- for (i = 0; i < FRAME_BUFFERS; i++)
+ for (i = 0; i < FRAME_BUFFERS; i++) {
vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
+ if (cm->frame_bufs[i].ref_count > 0 &&
+ cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
+ cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
+ cm->frame_bufs[i].ref_count = 0;
+ }
+ }
+
vp9_free_frame_buffer(&cm->post_proc_buffer);
vpx_free(cm->mip);
@@ -85,7 +92,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
int mi_size;
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9_DEC_BORDER_IN_PIXELS) < 0)
+ VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
@@ -199,6 +206,7 @@ void vp9_create_common(VP9_COMMON *cm) {
void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_frame_buffers(cm);
+ vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
}
void vp9_initialize_common() {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 70b8ffa4e..f10a3c8c7 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -182,7 +182,7 @@ struct macroblockd_plane {
int subsampling_y;
struct buf_2d dst;
struct buf_2d pre[2];
- int16_t *dequant;
+ const int16_t *dequant;
ENTROPY_CONTEXT *above_context;
ENTROPY_CONTEXT *left_context;
};
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 3807ccc87..d30e0b488 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -145,7 +145,7 @@ static const InterpKernel *get_filter_base(const int16_t *filter) {
}
static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
- return (const InterpKernel *)(intptr_t)f - base;
+ return (int)((const InterpKernel *)(intptr_t)f - base);
}
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index e030d92ec..d6b380fd5 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -119,7 +119,7 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]);
extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]);
-static const uint8_t *get_band_translate(TX_SIZE tx_size) {
+static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
: vp9_coefband_trans_8x8plus;
}
@@ -146,8 +146,8 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
-static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
- const ENTROPY_CONTEXT *l) {
+static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
switch (tx_size) {
@@ -174,8 +174,8 @@ static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
return combine_entropy_contexts(above_ec, left_ec);
}
-static const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
- PLANE_TYPE type, int block_idx) {
+static const INLINE scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
+ PLANE_TYPE type, int block_idx) {
const MODE_INFO *const mi = xd->mi_8x8[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 6def3c869..25cba7fbe 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -345,7 +345,7 @@ static int adapt_prob(vp9_prob pre_prob, const unsigned int ct[2]) {
static void adapt_probs(const vp9_tree_index *tree,
const vp9_prob *pre_probs, const unsigned int *counts,
vp9_prob *probs) {
- tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
+ vp9_tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
probs);
}
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 60ae79fdc..e1f5ef7b4 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -192,8 +192,8 @@ static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
const unsigned int *counts, vp9_prob *probs) {
- tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR,
- probs);
+ vp9_tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT,
+ MV_MAX_UPDATE_FACTOR, probs);
}
void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
diff --git a/vp9/common/vp9_frame_buffers.c b/vp9/common/vp9_frame_buffers.c
new file mode 100644
index 000000000..d903ed695
--- /dev/null
+++ b/vp9/common/vp9_frame_buffers.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_frame_buffers.h"
+#include "vpx_mem/vpx_mem.h"
+
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
+ assert(list != NULL);
+ vp9_free_internal_frame_buffers(list);
+
+ list->num_internal_frame_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+ list->int_fb = vpx_calloc(list->num_internal_frame_buffers,
+ sizeof(*list->int_fb));
+ return (list->int_fb == NULL);
+}
+
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) {
+ int i;
+
+ assert(list != NULL);
+
+ for (i = 0; i < list->num_internal_frame_buffers; ++i) {
+ vpx_free(list->int_fb[i].data);
+ list->int_fb[i].data = NULL;
+ }
+ vpx_free(list->int_fb);
+ list->int_fb = NULL;
+}
+
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb) {
+ int i;
+ InternalFrameBufferList *const int_fb_list =
+ (InternalFrameBufferList *)cb_priv;
+ if (int_fb_list == NULL || fb == NULL)
+ return -1;
+
+ // Find a free frame buffer.
+ for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
+ if (!int_fb_list->int_fb[i].in_use)
+ break;
+ }
+
+ if (i == int_fb_list->num_internal_frame_buffers)
+ return -1;
+
+ if (int_fb_list->int_fb[i].size < min_size) {
+ int_fb_list->int_fb[i].data =
+ (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size);
+ if (!int_fb_list->int_fb[i].data)
+ return -1;
+
+ int_fb_list->int_fb[i].size = min_size;
+ }
+
+ fb->data = int_fb_list->int_fb[i].data;
+ fb->size = int_fb_list->int_fb[i].size;
+ int_fb_list->int_fb[i].in_use = 1;
+
+ // Set the frame buffer's private data to point at the internal frame buffer.
+ fb->priv = &int_fb_list->int_fb[i];
+ return 0;
+}
+
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
+ InternalFrameBuffer *int_fb;
+ (void)cb_priv;
+ if (fb == NULL)
+ return -1;
+
+ int_fb = (InternalFrameBuffer *)fb->priv;
+ int_fb->in_use = 0;
+ return 0;
+}
diff --git a/vp9/common/vp9_frame_buffers.h b/vp9/common/vp9_frame_buffers.h
new file mode 100644
index 000000000..e2cfe61b6
--- /dev/null
+++ b/vp9/common/vp9_frame_buffers.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#define VP9_COMMON_VP9_FRAME_BUFFERS_H_
+
+#include "vpx/vpx_frame_buffer.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct InternalFrameBuffer {
+ uint8_t *data;
+ size_t size;
+ int in_use;
+} InternalFrameBuffer;
+
+typedef struct InternalFrameBufferList {
+ int num_internal_frame_buffers;
+ InternalFrameBuffer *int_fb;
+} InternalFrameBufferList;
+
+// Initializes |list|. Returns 0 on success.
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Free any data allocated to the frame buffers.
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Callback used by libvpx to request an external frame buffer. |cb_priv|
+// Callback private data, which points to an InternalFrameBufferList.
+// |min_size| is the minimum size in bytes needed to decode the next frame.
+// |fb| pointer to the frame buffer.
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb);
+
+// Callback used by libvpx when there are no references to the frame buffer.
+// |cb_priv| is not used. |fb| pointer to the frame buffer.
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 98fd1d82f..3eb7f9d61 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -34,8 +34,8 @@ typedef struct mv32 {
int32_t col;
} MV32;
-static void clamp_mv(MV *mv, int min_col, int max_col,
- int min_row, int max_row) {
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
+ int min_row, int max_row) {
mv->col = clamp(mv->col, min_col, max_col);
mv->row = clamp(mv->row, min_row, max_row);
}
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index 0936abfcd..f99952f3c 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -48,7 +48,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
int_mv *mvlist, int_mv *nearest, int_mv *near);
// TODO(jingning): this mv clamping function should be block size dependent.
-static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
+static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
xd->mb_to_top_edge - LEFT_TOP_MARGIN,
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 564e4195f..ac39a98fd 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -147,8 +147,12 @@ extern "C" {
// END DATARATE CONTROL OPTIONS
// ----------------------------------------------------------------
- // Spatial scalability
- int ss_number_layers;
+ // Spatial and temporal scalability.
+ int ss_number_layers; // Number of spatial layers.
+ int ts_number_layers; // Number of temporal layers.
+ // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
+ int ts_target_bitrate[VPX_TS_MAX_LAYERS];
+ int ts_rate_decimator[VPX_TS_MAX_LAYERS];
// these parameters aren't to be used in final build don't use!!!
int play_alternate;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d92a25b12..97983c596 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -18,6 +18,7 @@
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_frame_buffers.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_tile_common.h"
@@ -94,6 +95,7 @@ typedef enum {
typedef struct {
int ref_count;
+ vpx_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf;
} RefCntBuffer;
@@ -223,13 +225,21 @@ typedef struct VP9Common {
int frame_parallel_decoding_mode;
int log2_tile_cols, log2_tile_rows;
+
+ // Private data associated with the frame buffer callbacks.
+ void *cb_priv;
+ vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+ // Handles memory for the codec.
+ InternalFrameBufferList int_frame_buffers;
} VP9_COMMON;
-static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
+static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
return &cm->frame_bufs[cm->new_fb_idx].buf;
}
-static int get_free_fb(VP9_COMMON *cm) {
+static INLINE int get_free_fb(VP9_COMMON *cm) {
int i;
for (i = 0; i < FRAME_BUFFERS; i++)
if (cm->frame_bufs[i].ref_count == 0)
@@ -240,7 +250,7 @@ static int get_free_fb(VP9_COMMON *cm) {
return i;
}
-static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
+static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
const int ref_index = *idx;
if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
@@ -251,7 +261,7 @@ static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
bufs[new_idx].ref_count++;
}
-static int mi_cols_aligned_to_sb(int n_mis) {
+static INLINE int mi_cols_aligned_to_sb(int n_mis) {
return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
}
@@ -275,10 +285,10 @@ static INLINE void set_skip_context(
}
}
-static void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
- int mi_row, int bh,
- int mi_col, int bw,
- int mi_rows, int mi_cols) {
+static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
+ int mi_row, int bh,
+ int mi_col, int bw,
+ int mi_rows, int mi_cols) {
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 11b6d93c1..487f00cca 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -218,27 +218,25 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
} else { // inter/inter
const int above_has_second = has_second_ref(above_mbmi);
const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
if (above_has_second && left_has_second) {
- pred_context = 1 + (above_mbmi->ref_frame[0] == LAST_FRAME ||
- above_mbmi->ref_frame[1] == LAST_FRAME ||
- left_mbmi->ref_frame[0] == LAST_FRAME ||
- left_mbmi->ref_frame[1] == LAST_FRAME);
+ pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+ left0 == LAST_FRAME || left1 == LAST_FRAME);
} else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf1 = above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf2 = above_has_second ?
- above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
if (rfs == LAST_FRAME)
pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
else
pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
} else {
- pred_context = 2 * (above_mbmi->ref_frame[0] == LAST_FRAME) +
- 2 * (left_mbmi->ref_frame[0] == LAST_FRAME);
+ pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
}
}
} else if (has_above || has_left) { // one edge available
@@ -291,23 +289,23 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
} else { // inter/inter
const int above_has_second = has_second_ref(above_mbmi);
const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
if (above_has_second && left_has_second) {
- if (above_mbmi->ref_frame[0] == left_mbmi->ref_frame[0] &&
- above_mbmi->ref_frame[1] == left_mbmi->ref_frame[1])
- pred_context = 3 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- above_mbmi->ref_frame[1] == GOLDEN_FRAME ||
- left_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- left_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == GOLDEN_FRAME ||
+ above1 == GOLDEN_FRAME ||
+ left0 == GOLDEN_FRAME ||
+ left1 == GOLDEN_FRAME);
else
pred_context = 2;
} else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf1 = above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf2 = above_has_second ?
- above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
if (rfs == GOLDEN_FRAME)
pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
@@ -316,17 +314,15 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
else
pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
} else {
- if (above_mbmi->ref_frame[0] == LAST_FRAME &&
- left_mbmi->ref_frame[0] == LAST_FRAME) {
+ if (above0 == LAST_FRAME && left0 == LAST_FRAME) {
pred_context = 3;
- } else if (above_mbmi->ref_frame[0] == LAST_FRAME ||
- left_mbmi->ref_frame[0] == LAST_FRAME) {
- const MB_MODE_INFO *edge_mbmi =
- above_mbmi->ref_frame[0] == LAST_FRAME ? left_mbmi : above_mbmi;
- pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) {
+ const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0
+ : above0;
+ pred_context = 4 * (edge0 == GOLDEN_FRAME);
} else {
- pred_context = 2 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME) +
- 2 * (left_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ pred_context = 2 * (above0 == GOLDEN_FRAME) +
+ 2 * (left0 == GOLDEN_FRAME);
}
}
}
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 0acee32f8..33ae5a896 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -98,8 +98,8 @@ static INLINE vp9_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
int vp9_get_tx_size_context(const MACROBLOCKD *xd);
-static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
- const struct tx_probs *tx_probs) {
+static INLINE const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
+ const struct tx_probs *tx_probs) {
switch (max_tx_size) {
case TX_8X8:
return tx_probs->p8x8[ctx];
@@ -113,13 +113,14 @@ static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
}
}
-static const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size, const MACROBLOCKD *xd,
- const struct tx_probs *tx_probs) {
+static INLINE const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size,
+ const MACROBLOCKD *xd,
+ const struct tx_probs *tx_probs) {
return get_tx_probs(max_tx_size, vp9_get_tx_size_context(xd), tx_probs);
}
-static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
- struct tx_counts *tx_counts) {
+static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
+ struct tx_counts *tx_counts) {
switch (max_tx_size) {
case TX_8X8:
return tx_counts->p8x8[ctx];
diff --git a/vp9/common/vp9_prob.c b/vp9/common/vp9_prob.c
index 884884e0b..f9bc06ecf 100644
--- a/vp9/common/vp9_prob.c
+++ b/vp9/common/vp9_prob.c
@@ -28,3 +28,34 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
+
+
+static unsigned int tree_merge_probs_impl(unsigned int i,
+ const vp9_tree_index *tree,
+ const vp9_prob *pre_probs,
+ const unsigned int *counts,
+ unsigned int count_sat,
+ unsigned int max_update,
+ vp9_prob *probs) {
+ const int l = tree[i];
+ const unsigned int left_count = (l <= 0)
+ ? counts[-l]
+ : tree_merge_probs_impl(l, tree, pre_probs, counts,
+ count_sat, max_update, probs);
+ const int r = tree[i + 1];
+ const unsigned int right_count = (r <= 0)
+ ? counts[-r]
+ : tree_merge_probs_impl(r, tree, pre_probs, counts,
+ count_sat, max_update, probs);
+ const unsigned int ct[2] = { left_count, right_count };
+ probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
+ count_sat, max_update);
+ return left_count + right_count;
+}
+
+void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+ const unsigned int *counts, unsigned int count_sat,
+ unsigned int max_update_factor, vp9_prob *probs) {
+ tree_merge_probs_impl(0, tree, pre_probs, counts, count_sat,
+ max_update_factor, probs);
+}
diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h
index cc8d8ab38..f36148035 100644
--- a/vp9/common/vp9_prob.h
+++ b/vp9/common/vp9_prob.h
@@ -79,37 +79,10 @@ static INLINE vp9_prob merge_probs(vp9_prob pre_prob,
return weighted_prob(pre_prob, prob, factor);
}
-static unsigned int tree_merge_probs_impl(unsigned int i,
- const vp9_tree_index *tree,
- const vp9_prob *pre_probs,
- const unsigned int *counts,
- unsigned int count_sat,
- unsigned int max_update_factor,
- vp9_prob *probs) {
- const int l = tree[i];
- const unsigned int left_count = (l <= 0)
- ? counts[-l]
- : tree_merge_probs_impl(l, tree, pre_probs, counts,
- count_sat, max_update_factor, probs);
- const int r = tree[i + 1];
- const unsigned int right_count = (r <= 0)
- ? counts[-r]
- : tree_merge_probs_impl(r, tree, pre_probs, counts,
- count_sat, max_update_factor, probs);
- const unsigned int ct[2] = { left_count, right_count };
- probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
- count_sat, max_update_factor);
- return left_count + right_count;
-}
+void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+ const unsigned int *counts, unsigned int count_sat,
+ unsigned int max_update_factor, vp9_prob *probs);
-static void tree_merge_probs(const vp9_tree_index *tree,
- const vp9_prob *pre_probs,
- const unsigned int *counts,
- unsigned int count_sat,
- unsigned int max_update_factor, vp9_prob *probs) {
- tree_merge_probs_impl(0, tree, pre_probs, counts,
- count_sat, max_update_factor, probs);
-}
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 6dbdb4216..9fef8b1ef 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -130,7 +130,8 @@ int16_t vp9_ac_quant(int qindex, int delta) {
}
-int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex) {
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+ int base_qindex) {
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
return seg->abs_delta == SEGMENT_ABSDATA ?
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index af50e23cd..581104006 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -27,7 +27,8 @@ void vp9_init_quant_tables();
int16_t vp9_dc_quant(int qindex, int delta);
int16_t vp9_ac_quant(int qindex, int delta);
-int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex);
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+ int base_qindex);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index cc70e4cc0..7576e7b6f 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -269,21 +269,15 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
: mi_mv_pred_q4(mi, ref))
: mi->mbmi.mv[ref].as_mv;
-
- // TODO(jkoleszar): This clamping is done in the incorrect place for the
- // scaling case. It needs to be done on the scaled MV, not the pre-scaling
- // MV. Note however that it performs the subsampling aware scaling so
- // that the result is always q4.
- // mv_precision precision is MV_PRECISION_Q4.
- const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
- pd->subsampling_x,
- pd->subsampling_y);
-
MV32 scaled_mv;
- int xs, ys, x0, y0, x0_16, y0_16, x1, y1, frame_width,
- frame_height, subpel_x, subpel_y, buf_stride;
+ int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride,
+ subpel_x, subpel_y;
uint8_t *ref_frame, *buf_ptr;
const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf;
+ const MV mv_q4 = {
+ mv.row * (1 << (1 - pd->subsampling_y)),
+ mv.col * (1 << (1 - pd->subsampling_x))
+ };
// Get reference frame pointer, width and height.
if (plane == 0) {
@@ -327,10 +321,6 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
x0_16 += scaled_mv.col;
y0_16 += scaled_mv.row;
- // Get reference block bottom right coordinate.
- x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
- y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
-
// Get reference block pointer.
buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
buf_stride = pre_buf->stride;
@@ -339,6 +329,9 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
// width/height is not a multiple of 8 pixels.
if (scaled_mv.col || scaled_mv.row ||
(frame_width & 0x7) || (frame_height & 0x7)) {
+ // Get reference block bottom right coordinate.
+ int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
+ int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
int x_pad = 0, y_pad = 0;
if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) {
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index bf738c28b..dccd60938 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -39,18 +39,18 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
enum mv_precision precision,
int x, int y);
-static int scaled_buffer_offset(int x_offset, int y_offset, int stride,
- const struct scale_factors *sf) {
+static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
+ const struct scale_factors *sf) {
const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
return y * stride + x;
}
-static void setup_pred_plane(struct buf_2d *dst,
- uint8_t *src, int stride,
- int mi_row, int mi_col,
- const struct scale_factors *scale,
- int subsampling_x, int subsampling_y) {
+static INLINE void setup_pred_plane(struct buf_2d *dst,
+ uint8_t *src, int stride,
+ int mi_row, int mi_col,
+ const struct scale_factors *scale,
+ int subsampling_x, int subsampling_y) {
const int x = (MI_SIZE * mi_col) >> subsampling_x;
const int y = (MI_SIZE * mi_row) >> subsampling_y;
dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 04a40bd58..7bdd11eb0 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
fi
# fdct functions
-prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht4x4 sse2 avx2
+prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht4x4 sse2 avx2
-prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht8x8 sse2 avx2
+prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht8x8 sse2 avx2
-prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht16x16 sse2 avx2
+prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht16x16 sse2 avx2
prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
specialize vp9_fwht4x4
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 90b0d0bf9..a9dda1889 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -40,12 +40,12 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h);
-static int vp9_is_valid_scale(const struct scale_factors *sf) {
+static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_INVALID_SCALE &&
sf->y_scale_fp != REF_INVALID_SCALE;
}
-static int vp9_is_scaled(const struct scale_factors *sf) {
+static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_NO_SCALE ||
sf->y_scale_fp != REF_NO_SCALE;
}
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index ee9a4823b..7455abce3 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -11,13 +11,17 @@
#ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
-#ifdef __cplusplus
-extern "C" {
+#ifdef _MSC_VER
+# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
+# include <intrin.h>
+# define USE_MSC_INTRIN
+# endif
+# include <math.h>
+# define snprintf _snprintf
#endif
-#ifdef _MSC_VER
-#include <math.h>
-#define snprintf _snprintf
+#ifdef __cplusplus
+extern "C" {
#endif
#include "./vpx_config.h"
@@ -30,7 +34,7 @@ void vpx_reset_mmx_state(void);
#if defined(_MSC_VER) && _MSC_VER < 1800
// round is not defined in MSVC before VS2013.
-static int round(double x) {
+static INLINE int round(double x) {
if (x < 0)
return (int)ceil(x - 0.5);
else
@@ -44,9 +48,7 @@ static int round(double x) {
static INLINE int get_msb(unsigned int n) {
return 31 ^ __builtin_clz(n);
}
-#elif defined(_MSC_VER) && _MSC_VER > 1310 && \
- (defined(_M_X64) || defined(_M_IX86))
-#include <intrin.h>
+#elif defined(USE_MSC_INTRIN)
#pragma intrinsic(_BitScanReverse)
static INLINE int get_msb(unsigned int n) {
@@ -54,6 +56,7 @@ static INLINE int get_msb(unsigned int n) {
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
+#undef USE_MSC_INTRIN
#else
// Returns (int)floor(log2(n)). n must be > 0.
static INLINE int get_msb(unsigned int n) {
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 8ab5fb1bc..60018ea86 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -16,15 +16,15 @@
typedef void filter8_1dfunction (
const unsigned char *src_ptr,
- const unsigned int src_pitch,
+ const ptrdiff_t src_pitch,
unsigned char *output_ptr,
- unsigned int out_pitch,
+ ptrdiff_t out_pitch,
unsigned int output_height,
const short *filter
);
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt1, opt2) \
-void vp9_convolve8_##name##_##opt1(const uint8_t *src, ptrdiff_t src_stride, \
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+ void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
@@ -32,50 +32,68 @@ void vp9_convolve8_##name##_##opt1(const uint8_t *src, ptrdiff_t src_stride, \
if (step_q4 == 16 && filter[3] != 128) { \
if (filter[0] || filter[1] || filter[2]) { \
while (w >= 16) { \
- vp9_filter_block1d16_##dir##8_##avg##opt1(src_start, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 16; \
dst += 16; \
w -= 16; \
} \
while (w >= 8) { \
- vp9_filter_block1d8_##dir##8_##avg##opt1(src_start, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 8; \
dst += 8; \
w -= 8; \
} \
while (w >= 4) { \
- vp9_filter_block1d4_##dir##8_##avg##opt1(src_start, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 4; \
dst += 4; \
w -= 4; \
} \
} else { \
while (w >= 16) { \
- vp9_filter_block1d16_##dir##2_##avg##opt2(src, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d16_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 16; \
dst += 16; \
w -= 16; \
} \
while (w >= 8) { \
- vp9_filter_block1d8_##dir##2_##avg##opt2(src, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d8_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 8; \
dst += 8; \
w -= 8; \
} \
while (w >= 4) { \
- vp9_filter_block1d4_##dir##2_##avg##opt2(src, src_stride, \
- dst, dst_stride, \
- h, filter); \
+ vp9_filter_block1d4_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
src += 4; \
dst += 4; \
w -= 4; \
@@ -136,18 +154,18 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_sse2;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
@@ -169,11 +187,11 @@ filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
// const int16_t *filter_x, int x_step_q4,
// const int16_t *filter_y, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3, sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3, sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3, sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
- ssse3, sse2);
+ ssse3);
// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
@@ -236,11 +254,10 @@ filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
// const int16_t *filter_x, int x_step_q4,
// const int16_t *filter_y, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2, sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2, sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2, sse2);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2,
- sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm b/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
new file mode 100644
index 000000000..b5e18fe6d
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
@@ -0,0 +1,422 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro GET_PARAM_4 0
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm3, [rdx] ;load filters
+ psrldq xmm3, 6
+ packsswb xmm3, xmm3
+ pshuflw xmm3, xmm3, 0b ;k3_k4
+
+ movq xmm2, rcx ;rounding
+ pshufd xmm2, xmm2, 0
+
+ movsxd rax, DWORD PTR arg(1) ;pixels_per_line
+ movsxd rdx, DWORD PTR arg(3) ;out_pitch
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+%endm
+
+%macro APPLY_FILTER_4 1
+ punpcklbw xmm0, xmm1
+ pmaddubsw xmm0, xmm3
+
+ paddsw xmm0, xmm2 ;rounding
+ psraw xmm0, 7 ;shift
+ packuswb xmm0, xmm0 ;pack to byte
+
+%if %1
+ movd xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movd [rdi], xmm0
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+%macro GET_PARAM 0
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm7, [rdx] ;load filters
+ psrldq xmm7, 6
+ packsswb xmm7, xmm7
+ pshuflw xmm7, xmm7, 0b ;k3_k4
+ punpcklwd xmm7, xmm7
+
+ movq xmm6, rcx ;rounding
+ pshufd xmm6, xmm6, 0
+
+ movsxd rax, DWORD PTR arg(1) ;pixels_per_line
+ movsxd rdx, DWORD PTR arg(3) ;out_pitch
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+%endm
+
+%macro APPLY_FILTER_8 1
+ punpcklbw xmm0, xmm1
+ pmaddubsw xmm0, xmm7
+
+ paddsw xmm0, xmm6 ;rounding
+ psraw xmm0, 7 ;shift
+ packuswb xmm0, xmm0 ;pack back to byte
+
+%if %1
+ movq xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movq [rdi], xmm0 ;store the result
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+%macro APPLY_FILTER_16 1
+ punpcklbw xmm0, xmm1
+ punpckhbw xmm2, xmm1
+ pmaddubsw xmm0, xmm7
+ pmaddubsw xmm2, xmm7
+
+ paddsw xmm0, xmm6 ;rounding
+ paddsw xmm2, xmm6
+ psraw xmm0, 7 ;shift
+ psraw xmm2, 7
+ packuswb xmm0, xmm2 ;pack back to byte
+
+%if %1
+ movdqu xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movdqu [rdi], xmm0 ;store the result
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movd xmm0, [rsi] ;load src
+ movd xmm1, [rsi + rax]
+
+ APPLY_FILTER_4 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movq xmm0, [rsi] ;0
+ movq xmm1, [rsi + rax] ;1
+
+ APPLY_FILTER_8 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;0
+ movdqu xmm1, [rsi + rax] ;1
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movd xmm0, [rsi] ;load src
+ movd xmm1, [rsi + rax]
+
+ APPLY_FILTER_4 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movq xmm0, [rsi] ;0
+ movq xmm1, [rsi + rax] ;1
+
+ APPLY_FILTER_8 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;0
+ movdqu xmm1, [rsi + rax] ;1
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_4 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_8 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqu xmm1, [rsi + 1]
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_4 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_8 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqu xmm1, [rsi + 1]
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret