summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.asm73
-rw-r--r--vp9/common/vp9_alloccommon.c6
-rw-r--r--vp9/common/vp9_entropy.h4
-rw-r--r--vp9/common/vp9_mvref_common.h6
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
5 files changed, 82 insertions, 9 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 98619bb30..71bf24c9f 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -17,6 +17,7 @@
EXPORT |vp9_h_predictor_16x16_neon|
EXPORT |vp9_h_predictor_32x32_neon|
EXPORT |vp9_tm_predictor_4x4_neon|
+ EXPORT |vp9_tm_predictor_8x8_neon|
ARM
REQUIRE8
PRESERVE8
@@ -328,8 +329,78 @@ loop_h
vqshrun.s16 d1, q2, #0
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
-
bx lr
ENDP ; |vp9_tm_predictor_4x4_neon|
+;void vp9_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
+; const uint8_t *above,
+; const uint8_t *left)
+; r0 uint8_t *dst
+; r1 ptrdiff_t y_stride
+; r2 const uint8_t *above
+; r3 const uint8_t *left
+
+|vp9_tm_predictor_8x8_neon| PROC
+ ; Load ytop_left = above[-1];
+ sub r12, r2, #1
+ ldrb r12, [r12]
+ vdup.u8 d0, r12
+
+ ; Load above 8 pixels
+ vld1.64 {d2}, [r2]
+
+ ; Compute above - ytop_left
+ vsubl.u8 q3, d2, d0
+
+ ; Load left row by row and compute left + (above - ytop_left)
+ ; 1st row and 2nd row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+
+ ; 3rd row and 4th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+
+ ; 5th row and 6th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+
+ ; 7rd row and 8th row
+ ldrb r12, [r3], #1
+ ldrb r2, [r3], #1
+ vdup.u16 q1, r12
+ vdup.u16 q2, r2
+ vadd.s16 q1, q1, q3
+ vadd.s16 q2, q2, q3
+ vqshrun.s16 d0, q1, #0
+ vqshrun.s16 d1, q2, #0
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+ bx lr
+ ENDP ; |vp9_tm_predictor_8x8_neon|
+
END
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index ff20553d6..ca42090c1 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -85,7 +85,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
int mi_size;
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9BORDERINPIXELS, NULL, NULL, NULL) < 0)
+ VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
@@ -154,7 +154,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
for (i = 0; i < cm->fb_count; i++) {
cm->fb_idx_ref_cnt[i] = 0;
if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
- VP9BORDERINPIXELS) < 0)
+ VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
}
@@ -167,7 +167,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
}
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9BORDERINPIXELS) < 0)
+ VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index f43a85f14..ba162fd20 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -112,8 +112,8 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
// This macro is currently unused but may be used by certain implementations
#define MAXBAND_INDEX 21
-extern const uint8_t vp9_coefband_trans_8x8plus[1024];
-extern const uint8_t vp9_coefband_trans_4x4[16];
+extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]);
+extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]);
static const uint8_t *get_band_translate(TX_SIZE tx_size) {
return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index 06adbabaa..cd89390d5 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -32,8 +32,10 @@ static INLINE void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
mv_ref_list, -1, mi_row, mi_col);
}
-#define LEFT_TOP_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
-#define RIGHT_BOTTOM_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
+#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS \
+ - VP9_INTERP_EXTEND) << 3)
+#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS \
+ - VP9_INTERP_EXTEND) << 3)
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index c2468c1c4..9105e9684 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -96,7 +96,7 @@ prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint
specialize vp9_v_predictor_8x8 $sse_x86inc neon
prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_8x8 $sse2_x86inc dspr2
+specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2
prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2