summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_idct16x16_neon.c25
-rw-r--r--vp9/common/arm/neon/vp9_idct32x32_neon.c47
-rw-r--r--vp9/common/arm/neon/vp9_save_reg_neon.asm36
-rw-r--r--vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm28
-rw-r--r--vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm570
-rw-r--r--vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm8
-rw-r--r--vp9/common/vp9_alloccommon.c5
-rw-r--r--vp9/common/vp9_blockd.h4
-rw-r--r--vp9/common/vp9_convolve.c2
-rw-r--r--vp9/common/vp9_entropy.c25
-rw-r--r--vp9/common/vp9_entropy.h11
-rw-r--r--vp9/common/vp9_entropymode.h1
-rw-r--r--vp9/common/vp9_findnearmv.c2
-rw-r--r--vp9/common/vp9_findnearmv.h20
-rw-r--r--vp9/common/vp9_idct.c14
-rw-r--r--vp9/common/vp9_idct.h3
-rw-r--r--vp9/common/vp9_reconinter.c4
-rw-r--r--vp9/common/vp9_rtcd_defs.sh16
-rw-r--r--vp9/common/vp9_subpelvar.h145
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c4
-rw-r--r--vp9/common/x86/vp9_intrapred_ssse3.asm154
-rw-r--r--vp9/decoder/vp9_decodemv.c38
-rw-r--r--vp9/decoder/vp9_decodframe.c12
-rw-r--r--vp9/decoder/vp9_detokenize.c5
-rw-r--r--vp9/decoder/vp9_dsubexp.c1
-rw-r--r--vp9/decoder/vp9_idct_blk.c6
-rw-r--r--vp9/decoder/vp9_idct_blk.h11
-rw-r--r--vp9/decoder/vp9_onyxd.h6
-rw-r--r--vp9/decoder/vp9_onyxd_if.c15
-rw-r--r--vp9/decoder/vp9_onyxd_int.h2
-rw-r--r--vp9/decoder/vp9_read_bit_buffer.h6
-rw-r--r--vp9/decoder/vp9_thread.h4
-rw-r--r--vp9/encoder/vp9_bitstream.c32
-rw-r--r--vp9/encoder/vp9_block.h2
-rw-r--r--vp9/encoder/vp9_dct.c8
-rw-r--r--vp9/encoder/vp9_encodeframe.c49
-rw-r--r--vp9/encoder/vp9_encodemb.c47
-rw-r--r--vp9/encoder/vp9_encodemv.c52
-rw-r--r--vp9/encoder/vp9_encodemv.h4
-rw-r--r--vp9/encoder/vp9_firstpass.c11
-rw-r--r--vp9/encoder/vp9_mbgraph.c35
-rw-r--r--vp9/encoder/vp9_mcomp.c492
-rw-r--r--vp9/encoder/vp9_mcomp.h31
-rw-r--r--vp9/encoder/vp9_onyx_int.h26
-rw-r--r--vp9/encoder/vp9_quantize.c4
-rw-r--r--vp9/encoder/vp9_quantize.h10
-rw-r--r--vp9/encoder/vp9_ratectrl.c36
-rw-r--r--vp9/encoder/vp9_ratectrl.h4
-rw-r--r--vp9/encoder/vp9_rdopt.c327
-rw-r--r--vp9/encoder/vp9_rdopt.h5
-rw-r--r--vp9/encoder/vp9_segmentation.c2
-rw-r--r--vp9/encoder/vp9_temporal_filter.c8
-rw-r--r--vp9/encoder/vp9_tokenize.c3
-rw-r--r--vp9/encoder/vp9_variance_c.c147
-rw-r--r--vp9/vp9_common.mk3
-rw-r--r--vp9/vp9_cx_iface.c2
56 files changed, 1443 insertions, 1127 deletions
diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c
index 3e3e400a4..fb7b5cdc4 100644
--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c
+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c
@@ -20,26 +20,28 @@ extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
+extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
-extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
+extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void save_neon_registers();
-extern void restore_neon_registers();
+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
+extern void vp9_push_neon(int64_t *store);
+extern void vp9_pop_neon(int64_t *store);
void vp9_short_idct16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
+ int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
- save_neon_registers();
+ vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@@ -102,28 +104,29 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
dest_stride);
// restore d8-d15 register values.
- restore_neon_registers();
+ vp9_pop_neon(store_reg);
return;
}
-void vp9_short_idct10_16x16_add_neon(int16_t *input,
+void vp9_short_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
+ int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
- save_neon_registers();
+ vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
+ vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_short_idct10_16x16_add_neon_pass2(input+1,
+ vp9_short_idct16x16_10_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
@@ -163,7 +166,7 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input,
dest_stride);
// restore d8-d15 register values.
- restore_neon_registers();
+ vp9_pop_neon(store_reg);
return;
}
diff --git a/vp9/common/arm/neon/vp9_idct32x32_neon.c b/vp9/common/arm/neon/vp9_idct32x32_neon.c
deleted file mode 100644
index ceecd6fbd..000000000
--- a/vp9/common/arm/neon/vp9_idct32x32_neon.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vp9/common/vp9_common.h"
-
-// defined in vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
-extern void idct32_transpose_and_transform(int16_t *transpose_buffer,
- int16_t *output, int16_t *input);
-extern void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
-
-
-// defined in vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
-extern void save_neon_registers();
-extern void restore_neon_registers();
-
-void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest,
- int dest_stride) {
- // TODO(cd): move the creation of these buffers within the ASM file
- // internal buffer used to transpose 8 lines into before transforming them
- int16_t transpose_buffer[32 * 8];
- // results of the first pass (transpose and transform rows)
- int16_t pass1[32 * 32];
- // results of the second pass (transpose and transform columns)
- int16_t pass2[32 * 32];
-
- // save register we need to preserve
- save_neon_registers();
- // process rows
- idct32_transpose_and_transform(transpose_buffer, pass1, input);
- // process columns
- // TODO(cd): do these two steps/passes within the ASM file
- idct32_transpose_and_transform(transpose_buffer, pass2, pass1);
- // combine and add to dest
- // TODO(cd): integrate this within the last storage step of the second pass
- idct32_combine_add(dest, pass2, dest_stride);
- // restore register we need to preserve
- restore_neon_registers();
-}
-
-// TODO(cd): Eliminate this file altogether when everything is in ASM file
diff --git a/vp9/common/arm/neon/vp9_save_reg_neon.asm b/vp9/common/arm/neon/vp9_save_reg_neon.asm
new file mode 100644
index 000000000..71c3e7077
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_save_reg_neon.asm
@@ -0,0 +1,36 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp9_push_neon|
+ EXPORT |vp9_pop_neon|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+|vp9_push_neon| PROC
+ vst1.i64 {d8, d9, d10, d11}, [r0]!
+ vst1.i64 {d12, d13, d14, d15}, [r0]!
+ bx lr
+
+ ENDP
+
+|vp9_pop_neon| PROC
+ vld1.i64 {d8, d9, d10, d11}, [r0]!
+ vld1.i64 {d12, d13, d14, d15}, [r0]!
+ bx lr
+
+ ENDP
+
+ END
+
diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
index 7464e800f..df2a0526c 100644
--- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
@@ -10,10 +10,8 @@
EXPORT |vp9_short_idct16x16_add_neon_pass1|
EXPORT |vp9_short_idct16x16_add_neon_pass2|
- EXPORT |vp9_short_idct10_16x16_add_neon_pass1|
- EXPORT |vp9_short_idct10_16x16_add_neon_pass2|
- EXPORT |save_neon_registers|
- EXPORT |restore_neon_registers|
+ EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
+ EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
ARM
REQUIRE8
PRESERVE8
@@ -788,7 +786,7 @@ end_idct16x16_pass2
bx lr
ENDP ; |vp9_short_idct16x16_add_neon_pass2|
-;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input,
+;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
@@ -798,7 +796,7 @@ end_idct16x16_pass2
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct10_16x16_add_neon_pass1| PROC
+|vp9_short_idct16x16_10_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
@@ -907,9 +905,9 @@ end_idct16x16_pass2
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_short_idct10_16x16_add_neon_pass1|
+ ENDP ; |vp9_short_idct16x16_10_add_neon_pass1|
-;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
+;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
@@ -926,7 +924,7 @@ end_idct16x16_pass2
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct10_16x16_add_neon_pass2| PROC
+|vp9_short_idct16x16_10_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
@@ -1177,15 +1175,5 @@ end_idct16x16_pass2
end_idct10_16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_short_idct10_16x16_add_neon_pass2|
-;void |save_neon_registers|()
-|save_neon_registers| PROC
- vpush {d8-d15}
- bx lr
- ENDP ; |save_registers|
-;void |restore_neon_registers|()
-|restore_neon_registers| PROC
- vpop {d8-d15}
- bx lr
- ENDP ; |restore_registers|
+ ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
END
diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
index 3a0ff608b..b5a284b5a 100644
--- a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
@@ -43,8 +43,7 @@ cospi_30_64 EQU 1606
cospi_31_64 EQU 804
- EXPORT |idct32_transpose_and_transform|
- EXPORT |idct32_combine_add|
+ EXPORT |vp9_short_idct32x32_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -100,6 +99,142 @@ cospi_31_64 EQU 804
vst1.16 {$reg2}, [r1]
MEND
; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q6-q9 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_CENTER_RESULTS
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d8}, [r10], r2
+ vld1.s16 {d11}, [r9], r11
+ vld1.s16 {d9}, [r10]
+ vld1.s16 {d10}, [r9]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q8, q8, #6
+ vrshr.s16 q9, q9, #6
+ vrshr.s16 q6, q6, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q7, q7, d9
+ vaddw.u8 q8, q8, d10
+ vaddw.u8 q9, q9, d11
+ vaddw.u8 q6, q6, d8
+ ; clip pixel
+ vqmovun.s16 d9, q7
+ vqmovun.s16 d10, q8
+ vqmovun.s16 d11, q9
+ vqmovun.s16 d8, q6
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d9}, [r10], r11
+ vst1.16 {d10}, [r9], r2
+ vst1.16 {d8}, [r10]
+ vst1.16 {d11}, [r9]
+ ; update pointers (by dest_stride * 2)
+ sub r9, r9, r2, lsl #1
+ add r10, r10, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q6-q9 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_CENTER_RESULTS_LAST
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d8}, [r10], r2
+ vld1.s16 {d11}, [r9], r11
+ vld1.s16 {d9}, [r10]
+ vld1.s16 {d10}, [r9]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q8, q8, #6
+ vrshr.s16 q9, q9, #6
+ vrshr.s16 q6, q6, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q7, q7, d9
+ vaddw.u8 q8, q8, d10
+ vaddw.u8 q9, q9, d11
+ vaddw.u8 q6, q6, d8
+ ; clip pixel
+ vqmovun.s16 d9, q7
+ vqmovun.s16 d10, q8
+ vqmovun.s16 d11, q9
+ vqmovun.s16 d8, q6
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d9}, [r10], r11
+ vst1.16 {d10}, [r9], r2
+ vst1.16 {d8}, [r10]!
+ vst1.16 {d11}, [r9]!
+ ; update pointers (by dest_stride * 2)
+ sub r9, r9, r2, lsl #1
+ add r10, r10, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q4-q7 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_EXTREME_RESULTS
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d4}, [r7], r2
+ vld1.s16 {d7}, [r6], r11
+ vld1.s16 {d5}, [r7]
+ vld1.s16 {d6}, [r6]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q5, q5, #6
+ vrshr.s16 q6, q6, #6
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q4, q4, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q5, q5, d5
+ vaddw.u8 q6, q6, d6
+ vaddw.u8 q7, q7, d7
+ vaddw.u8 q4, q4, d4
+ ; clip pixel
+ vqmovun.s16 d5, q5
+ vqmovun.s16 d6, q6
+ vqmovun.s16 d7, q7
+ vqmovun.s16 d4, q4
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d5}, [r7], r11
+ vst1.16 {d6}, [r6], r2
+ vst1.16 {d7}, [r6]
+ vst1.16 {d4}, [r7]
+ ; update pointers (by dest_stride * 2)
+ sub r6, r6, r2, lsl #1
+ add r7, r7, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q4-q7 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_EXTREME_RESULTS_LAST
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d4}, [r7], r2
+ vld1.s16 {d7}, [r6], r11
+ vld1.s16 {d5}, [r7]
+ vld1.s16 {d6}, [r6]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q5, q5, #6
+ vrshr.s16 q6, q6, #6
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q4, q4, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q5, q5, d5
+ vaddw.u8 q6, q6, d6
+ vaddw.u8 q7, q7, d7
+ vaddw.u8 q4, q4, d4
+ ; clip pixel
+ vqmovun.s16 d5, q5
+ vqmovun.s16 d6, q6
+ vqmovun.s16 d7, q7
+ vqmovun.s16 d4, q4
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d5}, [r7], r11
+ vst1.16 {d6}, [r6], r2
+ vst1.16 {d7}, [r6]!
+ vst1.16 {d4}, [r7]!
+ ; update pointers (by dest_stride * 2)
+ sub r6, r6, r2, lsl #1
+ add r7, r7, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
; Touches q8-q12, q15 (q13-q14 are preserved)
; valid output registers are anything but q8-q11
MACRO
@@ -110,12 +245,12 @@ cospi_31_64 EQU 804
; additions/substractions before the multiplies.
; generate the constants
; generate scalar constants
- mov r3, #$first_constant & 0xFF00
- add r3, #$first_constant & 0x00FF
+ mov r8, #$first_constant & 0xFF00
mov r12, #$second_constant & 0xFF00
+ add r8, #$first_constant & 0x00FF
add r12, #$second_constant & 0x00FF
; generate vector constants
- vdup.16 d30, r3
+ vdup.16 d30, r8
vdup.16 d31, r12
; (used) two for inputs (regA-regD), one for constants (q15)
; do some multiplications (ordered for maximum latency hiding)
@@ -153,15 +288,22 @@ cospi_31_64 EQU 804
MEND
; --------------------------------------------------------------------------
-;void idct32_transpose_and_transform(int16_t *transpose_buffer, int16_t *output, int16_t *input);
+;void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
;
-; r0 int16_t *transpose_buffer
-; r1 int16_t *output
-; r2 int16_t *input)
-; TODO(cd): have more logical parameter ordering but this issue will disappear
-; when functions are combined.
+; r0 int16_t *input,
+; r1 uint8_t *dest,
+; r2 int dest_stride)
+; loop counters
+; r4 bands loop counter
+; r5 pass loop counter
+; r8 transpose loop counter
+; combine-add pointers
+; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...)
+; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...)
+; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)
+; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)
-|idct32_transpose_and_transform| PROC
+|vp9_short_idct32x32_add_neon| PROC
; This function does one pass of idct32x32 transform.
;
; This is done by transposing the input and then doing a 1d transform on
@@ -171,43 +313,73 @@ cospi_31_64 EQU 804
; The 1d transform is done by looping over bands of eight columns (the
; idct32_bands loop). For each band, the transform input transposition
; is done on demand, one band of four 8x8 matrices at a time. The four
- ; matrices are trsnposed by pairs (the idct32_transpose_pair loop).
- push {r4}
- mov r4, #0 ; initialize bands loop counter
+ ; matrices are transposed by pairs (the idct32_transpose_pair loop).
+ push {r4-r11}
+ vpush {d8-d15}
+ ; stack operation
+ ; internal buffer used to transpose 8 lines into before transforming them
+ ; int16_t transpose_buffer[32 * 8];
+ ; at sp + [4096, 4607]
+ ; results of the first pass (transpose and transform rows)
+ ; int16_t pass1[32 * 32];
+ ; at sp + [0, 2047]
+ ; results of the second pass (transpose and transform columns)
+ ; int16_t pass2[32 * 32];
+ ; at sp + [2048, 4095]
+ sub sp, sp, #512+2048+2048
+
+ ; r6 = dest + 31 * dest_stride
+ ; r7 = dest + 0 * dest_stride
+ ; r9 = dest + 15 * dest_stride
+ ; r10 = dest + 16 * dest_stride
+ rsb r6, r2, r2, lsl #5
+ rsb r9, r2, r2, lsl #4
+ add r10, r1, r2, lsl #4
+ mov r7, r1
+ add r6, r6, r1
+ add r9, r9, r1
+ ; r11 = -dest_stride
+ neg r11, r2
+ ; r3 = input
+ mov r3, r0
+ ; parameters for first pass
+ ; r0 = transpose_buffer[32 * 8]
+ add r0, sp, #4096
+ ; r1 = pass1[32 * 32]
+ mov r1, sp
+
+ mov r5, #0 ; initialize pass loop counter
+idct32_pass_loop
+ mov r4, #4 ; initialize bands loop counter
idct32_bands_loop
- ; TODO(cd) get rid of these push/pop by properly adjusting register
- ; content at end of loop
- push {r0}
- push {r1}
- push {r2}
- mov r3, #0 ; initialize transpose loop counter
+ mov r8, #2 ; initialize transpose loop counter
idct32_transpose_pair_loop
; Load two horizontally consecutive 8x8 16bit data matrices. The first one
; into q0-q7 and the second one into q8-q15. There is a stride of 64,
; adjusted to 32 because of the two post-increments.
- vld1.s16 {q8}, [r2]!
- vld1.s16 {q0}, [r2]!
- add r2, #32
- vld1.s16 {q9}, [r2]!
- vld1.s16 {q1}, [r2]!
- add r2, #32
- vld1.s16 {q10}, [r2]!
- vld1.s16 {q2}, [r2]!
- add r2, #32
- vld1.s16 {q11}, [r2]!
- vld1.s16 {q3}, [r2]!
- add r2, #32
- vld1.s16 {q12}, [r2]!
- vld1.s16 {q4}, [r2]!
- add r2, #32
- vld1.s16 {q13}, [r2]!
- vld1.s16 {q5}, [r2]!
- add r2, #32
- vld1.s16 {q14}, [r2]!
- vld1.s16 {q6}, [r2]!
- add r2, #32
- vld1.s16 {q15}, [r2]!
- vld1.s16 {q7}, [r2]!
+ vld1.s16 {q8}, [r3]!
+ vld1.s16 {q0}, [r3]!
+ add r3, #32
+ vld1.s16 {q9}, [r3]!
+ vld1.s16 {q1}, [r3]!
+ add r3, #32
+ vld1.s16 {q10}, [r3]!
+ vld1.s16 {q2}, [r3]!
+ add r3, #32
+ vld1.s16 {q11}, [r3]!
+ vld1.s16 {q3}, [r3]!
+ add r3, #32
+ vld1.s16 {q12}, [r3]!
+ vld1.s16 {q4}, [r3]!
+ add r3, #32
+ vld1.s16 {q13}, [r3]!
+ vld1.s16 {q5}, [r3]!
+ add r3, #32
+ vld1.s16 {q14}, [r3]!
+ vld1.s16 {q6}, [r3]!
+ add r3, #32
+ vld1.s16 {q15}, [r3]!
+ vld1.s16 {q7}, [r3]!
; Transpose the two 8x8 16bit data matrices.
vswp d17, d24
@@ -255,11 +427,13 @@ idct32_transpose_pair_loop
vst1.16 {q7}, [r0]!
; increment pointers by adjusted stride (not necessary for r0/out)
- sub r2, r2, #8*32*2-32-16*2
+ ; go back by 7*32 for the seven lines moved fully by read and add
+ ; go back by 32 for the eigth line only read
+ ; advance by 16*2 to go the next pair
+ sub r3, r3, #7*32*2 + 32 - 16*2
; transpose pair loop processing
- add r3, r3, #1
- cmp r3, #1
- ble idct32_transpose_pair_loop
+ subs r8, r8, #1
+ bne idct32_transpose_pair_loop
; restore r0/input to its original value
sub r0, r0, #32*8*2
@@ -815,21 +989,26 @@ idct32_transpose_pair_loop
vadd.s16 q9, q5, q0
vsub.s16 q6, q5, q0
vsub.s16 q7, q4, q1
- STORE_IN_OUTPUT 17, 17, 16, q7, q6
- STORE_IN_OUTPUT 16, 15, 14, q9, q8
+
+ cmp r5, #0
+ bgt idct32_bands_end_2nd_pass
+
+idct32_bands_end_1st_pass
+ STORE_IN_OUTPUT 17, 16, 17, q6, q7
+ STORE_IN_OUTPUT 17, 14, 15, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
;output[30 * 32] = step1b[1][i] - step1b[30][i];
;output[31 * 32] = step1b[0][i] - step1b[31][i];
- LOAD_FROM_OUTPUT 14, 30, 31, q0, q1
+ LOAD_FROM_OUTPUT 15, 30, 31, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 31, 31, 30, q7, q6
- STORE_IN_OUTPUT 30, 0, 1, q4, q5
+ STORE_IN_OUTPUT 31, 30, 31, q6, q7
+ STORE_IN_OUTPUT 31, 0, 1, q4, q5
; --------------------------------------------------------------------------
; part of stage 7
;step1[2] = step1b[2][i] + step1b[13][i];
@@ -848,25 +1027,25 @@ idct32_transpose_pair_loop
;output[18 * 32] = step1b[13][i] - step1b[18][i];
;output[19 * 32] = step1b[12][i] - step1b[19][i];
LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
- vadd.s16 q6, q4, q1
- vadd.s16 q7, q5, q0
- vsub.s16 q8, q5, q0
- vsub.s16 q9, q4, q1
- STORE_IN_OUTPUT 19, 19, 18, q9, q8
- STORE_IN_OUTPUT 18, 13, 12, q7, q6
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_IN_OUTPUT 19, 18, 19, q6, q7
+ STORE_IN_OUTPUT 19, 12, 13, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
;output[28 * 32] = step1b[3][i] - step1b[28][i];
;output[29 * 32] = step1b[2][i] - step1b[29][i];
- LOAD_FROM_OUTPUT 12, 28, 29, q0, q1
+ LOAD_FROM_OUTPUT 13, 28, 29, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 29, 29, 28, q7, q6
- STORE_IN_OUTPUT 28, 2, 3, q4, q5
+ STORE_IN_OUTPUT 29, 28, 29, q6, q7
+ STORE_IN_OUTPUT 29, 2, 3, q4, q5
; --------------------------------------------------------------------------
; part of stage 7
;step1[4] = step1b[4][i] + step1b[11][i];
@@ -885,25 +1064,25 @@ idct32_transpose_pair_loop
;output[20 * 32] = step1b[11][i] - step1b[20][i];
;output[21 * 32] = step1b[10][i] - step1b[21][i];
LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
- vadd.s16 q6, q4, q1
- vadd.s16 q7, q5, q0
- vsub.s16 q8, q5, q0
- vsub.s16 q9, q4, q1
- STORE_IN_OUTPUT 21, 21, 20, q9, q8
- STORE_IN_OUTPUT 20, 11, 10, q7, q6
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_IN_OUTPUT 21, 20, 21, q6, q7
+ STORE_IN_OUTPUT 21, 10, 11, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
;output[26 * 32] = step1b[5][i] - step1b[26][i];
;output[27 * 32] = step1b[4][i] - step1b[27][i];
- LOAD_FROM_OUTPUT 10, 26, 27, q0, q1
+ LOAD_FROM_OUTPUT 11, 26, 27, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 27, 27, 26, q7, q6
- STORE_IN_OUTPUT 26, 4, 5, q4, q5
+ STORE_IN_OUTPUT 27, 26, 27, q6, q7
+ STORE_IN_OUTPUT 27, 4, 5, q4, q5
; --------------------------------------------------------------------------
; part of stage 7
;step1[6] = step1b[6][i] + step1b[9][i];
@@ -922,92 +1101,199 @@ idct32_transpose_pair_loop
;output[22 * 32] = step1b[9][i] - step1b[22][i];
;output[23 * 32] = step1b[8][i] - step1b[23][i];
LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
- vadd.s16 q6, q4, q1
- vadd.s16 q7, q5, q0
- vsub.s16 q8, q5, q0
- vsub.s16 q9, q4, q1
- STORE_IN_OUTPUT 23, 23, 22, q9, q8
- STORE_IN_OUTPUT 22, 9, 8, q7, q6
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_IN_OUTPUT 23, 22, 23, q6, q7
+ STORE_IN_OUTPUT 23, 8, 9, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
;output[24 * 32] = step1b[7][i] - step1b[24][i];
;output[25 * 32] = step1b[6][i] - step1b[25][i];
- LOAD_FROM_OUTPUT 8, 24, 25, q0, q1
+ LOAD_FROM_OUTPUT 9, 24, 25, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 25, 25, 24, q7, q6
- STORE_IN_OUTPUT 24, 6, 7, q4, q5
- ; --------------------------------------------------------------------------
+ STORE_IN_OUTPUT 25, 24, 25, q6, q7
+ STORE_IN_OUTPUT 25, 6, 7, q4, q5
- ; TODO(cd) get rid of these push/pop by properly adjusting register
- ; content at end of loop
- pop {r2}
- pop {r1}
- pop {r0}
- add r1, r1, #8*2
- add r2, r2, #8*32*2
+ ; restore r0 by removing the last offset from the last
+ ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
+ sub r0, r0, #24*8*2
+ ; restore r1 by removing the last offset from the last
+ ; operation (STORE_IN_OUTPUT 24, 6, 7) => 7*32*2
+ ; advance by 8 columns => 8*2
+ sub r1, r1, #7*32*2 - 8*2
+ ; advance by 8 lines (8*32*2)
+ ; go back by the two pairs from the loop (32*2)
+ add r3, r3, #8*32*2 - 32*2
; bands loop processing
- add r4, r4, #1
- cmp r4, #3
- ble idct32_bands_loop
+ subs r4, r4, #1
+ bne idct32_bands_loop
- pop {r4}
- bx lr
- ENDP ; |idct32_transpose_and_transform|
+ ; parameters for second pass
+ ; the input of pass2 is the result of pass1. we have to remove the offset
+ ; of 32 columns induced by the above idct32_bands_loop
+ sub r3, r1, #32*2
+ ; r1 = pass2[32 * 32]
+ add r1, sp, #2048
-;void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
-;
-; r0 uint8_t *dest
-; r1 int16_t *out
-; r2 int dest_stride)
-
-|idct32_combine_add| PROC
-
- mov r12, r0 ; dest pointer used for stores
- sub r2, r2, #32 ; adjust the stride (remove the post-increments)
- mov r3, #0 ; initialize loop counter
-
-idct32_combine_add_loop
- ; load out[j * 32 + 0-31]
- vld1.s16 {q12}, [r1]!
- vld1.s16 {q13}, [r1]!
- vld1.s16 {q14}, [r1]!
- vld1.s16 {q15}, [r1]!
- ; load dest[j * dest_stride + 0-31]
- vld1.s16 {q6}, [r0]!
- vld1.s16 {q7}, [r0]!
- ; ROUND_POWER_OF_TWO
- vrshr.s16 q12, q12, #6
- vrshr.s16 q13, q13, #6
- vrshr.s16 q14, q14, #6
- vrshr.s16 q15, q15, #6
- ; add to dest[j * dest_stride + 0-31]
- vaddw.u8 q12, q12, d12
- vaddw.u8 q13, q13, d13
- vaddw.u8 q14, q14, d14
- vaddw.u8 q15, q15, d15
- ; clip pixel
- vqmovun.s16 d12, q12
- vqmovun.s16 d13, q13
- vqmovun.s16 d14, q14
- vqmovun.s16 d15, q15
- ; store back into dest[j * dest_stride + 0-31]
- vst1.16 {q6}, [r12]!
- vst1.16 {q7}, [r12]!
- ; increment pointers by adjusted stride (not necessary for r1/out)
- add r0, r0, r2
- add r12, r12, r2
- ; loop processing
- add r3, r3, #1
- cmp r3, #31
- ble idct32_combine_add_loop
+ ; pass loop processing
+ add r5, r5, #1
+ B idct32_pass_loop
- bx lr
- ENDP ; |idct32_transpose|
+idct32_bands_end_2nd_pass
+ STORE_COMBINE_CENTER_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
+ ;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
+ ;output[30 * 32] = step1b[1][i] - step1b[30][i];
+ ;output[31 * 32] = step1b[0][i] - step1b[31][i];
+ LOAD_FROM_OUTPUT 17, 30, 31, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of stage 7
+ ;step1[2] = step1b[2][i] + step1b[13][i];
+ ;step1[3] = step1b[3][i] + step1b[12][i];
+ ;step1[12] = step1b[3][i] - step1b[12][i];
+ ;step1[13] = step1b[2][i] - step1b[13][i];
+ LOAD_FROM_OUTPUT 31, 12, 13, q0, q1
+ vadd.s16 q2, q10, q1
+ vadd.s16 q3, q11, q0
+ vsub.s16 q4, q11, q0
+ vsub.s16 q5, q10, q1
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[12 * 32] = step1b[12][i] + step1b[19][i];
+ ;output[13 * 32] = step1b[13][i] + step1b[18][i];
+ ;output[18 * 32] = step1b[13][i] - step1b[18][i];
+ ;output[19 * 32] = step1b[12][i] - step1b[19][i];
+ LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_COMBINE_CENTER_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
+ ;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
+ ;output[28 * 32] = step1b[3][i] - step1b[28][i];
+ ;output[29 * 32] = step1b[2][i] - step1b[29][i];
+ LOAD_FROM_OUTPUT 19, 28, 29, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of stage 7
+ ;step1[4] = step1b[4][i] + step1b[11][i];
+ ;step1[5] = step1b[5][i] + step1b[10][i];
+ ;step1[10] = step1b[5][i] - step1b[10][i];
+ ;step1[11] = step1b[4][i] - step1b[11][i];
+ LOAD_FROM_OUTPUT 29, 10, 11, q0, q1
+ vadd.s16 q2, q12, q1
+ vadd.s16 q3, q13, q0
+ vsub.s16 q4, q13, q0
+ vsub.s16 q5, q12, q1
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[10 * 32] = step1b[10][i] + step1b[21][i];
+ ;output[11 * 32] = step1b[11][i] + step1b[20][i];
+ ;output[20 * 32] = step1b[11][i] - step1b[20][i];
+ ;output[21 * 32] = step1b[10][i] - step1b[21][i];
+ LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_COMBINE_CENTER_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
+ ;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
+ ;output[26 * 32] = step1b[5][i] - step1b[26][i];
+ ;output[27 * 32] = step1b[4][i] - step1b[27][i];
+ LOAD_FROM_OUTPUT 21, 26, 27, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of stage 7
+ ;step1[6] = step1b[6][i] + step1b[9][i];
+ ;step1[7] = step1b[7][i] + step1b[8][i];
+ ;step1[8] = step1b[7][i] - step1b[8][i];
+ ;step1[9] = step1b[6][i] - step1b[9][i];
+ LOAD_FROM_OUTPUT 27, 8, 9, q0, q1
+ vadd.s16 q2, q14, q1
+ vadd.s16 q3, q15, q0
+ vsub.s16 q4, q15, q0
+ vsub.s16 q5, q14, q1
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 8 * 32] = step1b[8][i] + step1b[23][i];
+ ;output[ 9 * 32] = step1b[9][i] + step1b[22][i];
+ ;output[22 * 32] = step1b[9][i] - step1b[22][i];
+ ;output[23 * 32] = step1b[8][i] - step1b[23][i];
+ LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_COMBINE_CENTER_RESULTS_LAST
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
+ ;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
+ ;output[24 * 32] = step1b[7][i] - step1b[24][i];
+ ;output[25 * 32] = step1b[6][i] - step1b[25][i];
+ LOAD_FROM_OUTPUT 23, 24, 25, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS_LAST
+ ; --------------------------------------------------------------------------
+ ; restore pointers to their initial indices for next band pass by
+ ; removing/adding dest_stride * 8. The actual increment by eight
+ ; is taken care of within the _LAST macros.
+ add r6, r6, r2, lsl #3
+ add r9, r9, r2, lsl #3
+ sub r7, r7, r2, lsl #3
+ sub r10, r10, r2, lsl #3
+
+ ; restore r0 by removing the last offset from the last
+ ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
+ sub r0, r0, #24*8*2
+ ; restore r1 by removing the last offset from the last
+ ; operation (LOAD_FROM_OUTPUT 23, 24, 25) => 25*32*2
+ ; advance by 8 columns => 8*2
+ sub r1, r1, #25*32*2 - 8*2
+ ; advance by 8 lines (8*32*2)
+ ; go back by the two pairs from the loop (32*2)
+ add r3, r3, #8*32*2 - 32*2
+ ; bands loop processing
+ subs r4, r4, #1
+ bne idct32_bands_loop
+
+ ; stack operation
+ add sp, sp, #512+2048+2048
+ vpop {d8-d15}
+ pop {r4-r11}
+ bx lr
+ ENDP ; |vp9_short_idct32x32_add_neon|
END
diff --git a/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
index a744f59db..c02251a3d 100644
--- a/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
@@ -9,7 +9,7 @@
;
EXPORT |vp9_short_idct8x8_add_neon|
- EXPORT |vp9_short_idct10_8x8_add_neon|
+ EXPORT |vp9_short_idct8x8_10_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -310,13 +310,13 @@
bx lr
ENDP ; |vp9_short_idct8x8_add_neon|
-;void vp9_short_idct10_8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_short_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_short_idct10_8x8_add_neon| PROC
+|vp9_short_idct8x8_10_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
@@ -514,6 +514,6 @@
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_short_idct10_8x8_add_neon|
+ ENDP ; |vp9_short_idct8x8_10_add_neon|
END
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 864e27e98..5e526a83c 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -170,13 +170,8 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
void vp9_create_common(VP9_COMMON *cm) {
vp9_machine_specific_config(cm);
- vp9_init_mbmode_probs(cm);
-
cm->tx_mode = ONLY_4X4;
cm->comp_pred_mode = HYBRID_PREDICTION;
-
- // Initialize reference frame sign bias structure to defaults
- vpx_memset(cm->ref_frame_sign_bias, 0, sizeof(cm->ref_frame_sign_bias));
}
void vp9_remove_common(VP9_COMMON *cm) {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index c8d677fb9..9ab2cc31b 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -137,7 +137,7 @@ typedef struct {
TX_SIZE tx_size;
int_mv mv[2]; // for each reference frame used
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
- int_mv best_mv, best_second_mv;
+ int_mv best_mv[2];
uint8_t mode_context[MAX_REF_FRAMES];
@@ -247,7 +247,7 @@ typedef struct macroblockd {
} MACROBLOCKD;
-static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
+static INLINE uint8_t *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
switch (subsize) {
case BLOCK_64X64:
case BLOCK_64X32:
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index abedf6b27..1705402c2 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -282,7 +282,7 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
int r;
for (r = h; r > 0; --r) {
- memcpy(dst, src, w);
+ vpx_memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
}
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 32d9e0cf7..f171c317f 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -569,31 +569,6 @@ void vp9_init_neighbors() {
vp9_default_scan_32x32_neighbors);
}
-const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
- if (scan == vp9_default_scan_4x4) {
- return vp9_default_scan_4x4_neighbors;
- } else if (scan == vp9_row_scan_4x4) {
- return vp9_row_scan_4x4_neighbors;
- } else if (scan == vp9_col_scan_4x4) {
- return vp9_col_scan_4x4_neighbors;
- } else if (scan == vp9_default_scan_8x8) {
- return vp9_default_scan_8x8_neighbors;
- } else if (scan == vp9_row_scan_8x8) {
- return vp9_row_scan_8x8_neighbors;
- } else if (scan == vp9_col_scan_8x8) {
- return vp9_col_scan_8x8_neighbors;
- } else if (scan == vp9_default_scan_16x16) {
- return vp9_default_scan_16x16_neighbors;
- } else if (scan == vp9_row_scan_16x16) {
- return vp9_row_scan_16x16_neighbors;
- } else if (scan == vp9_col_scan_16x16) {
- return vp9_col_scan_16x16_neighbors;
- } else {
- assert(scan == vp9_default_scan_32x32);
- return vp9_default_scan_32x32_neighbors;
- }
-}
-
void vp9_coef_tree_initialize() {
vp9_init_neighbors();
init_bit_trees();
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index c1f2d782b..4ed94815b 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -190,9 +190,6 @@ static INLINE int get_coef_context(const int16_t *neighbors,
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
-const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
-
-
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
@@ -367,22 +364,24 @@ static int get_entropy_context(TX_SIZE tx_size,
static void get_scan_and_band(const MACROBLOCKD *xd, TX_SIZE tx_size,
PLANE_TYPE type, int block_idx,
const int16_t **scan,
+ const int16_t **scan_nb,
const uint8_t **band_translate) {
switch (tx_size) {
case TX_4X4:
- *scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx));
+ get_scan_nb_4x4(get_tx_type_4x4(type, xd, block_idx), scan, scan_nb);
*band_translate = vp9_coefband_trans_4x4;
break;
case TX_8X8:
- *scan = get_scan_8x8(get_tx_type_8x8(type, xd));
+ get_scan_nb_8x8(get_tx_type_8x8(type, xd), scan, scan_nb);
*band_translate = vp9_coefband_trans_8x8plus;
break;
case TX_16X16:
- *scan = get_scan_16x16(get_tx_type_16x16(type, xd));
+ get_scan_nb_16x16(get_tx_type_16x16(type, xd), scan, scan_nb);
*band_translate = vp9_coefband_trans_8x8plus;
break;
case TX_32X32:
*scan = vp9_default_scan_32x32;
+ *scan_nb = vp9_default_scan_32x32_neighbors;
*band_translate = vp9_coefband_trans_8x8plus;
break;
default:
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 4cf4c0392..31537c7f7 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -14,7 +14,6 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_treecoder.h"
-#define SUBMVREF_COUNT 5
#define TX_SIZE_CONTEXTS 2
#define MODE_UPDATE_PROB 252
#define SWITCHABLE_FILTERS 3 // number of switchable filters
diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c
index 49a731fdb..73f6b4c19 100644
--- a/vp9/common/vp9_findnearmv.c
+++ b/vp9/common/vp9_findnearmv.c
@@ -54,7 +54,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
dst_list[1].as_int = 0;
if (block_idx == 0) {
- memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv));
+ vpx_memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv));
} else if (block_idx == 1 || block_idx == 2) {
int dst = 0, n;
union b_mode_info *bmi = mi->bmi;
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index ad0d882b9..50dfdc7fb 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -55,13 +55,11 @@ static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb,
if (!mi)
return DC_PRED;
- if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(&mi->mbmi))
return DC_PRED;
- } else if (mi->mbmi.sb_type < BLOCK_8X8) {
- return ((mi->bmi + 1 + b)->as_mode);
- } else {
- return mi->mbmi.mode;
- }
+ else
+ return mi->mbmi.sb_type < BLOCK_8X8 ? (mi->bmi + 1 + b)->as_mode
+ : mi->mbmi.mode;
}
assert(b == 1 || b == 3);
return (mi->bmi + b - 1)->as_mode;
@@ -77,13 +75,11 @@ static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
if (!mi)
return DC_PRED;
- if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(&mi->mbmi))
return DC_PRED;
- } else if (mi->mbmi.sb_type < BLOCK_8X8) {
- return ((mi->bmi + 2 + b)->as_mode);
- } else {
- return mi->mbmi.mode;
- }
+ else
+ return mi->mbmi.sb_type < BLOCK_8X8 ? (mi->bmi + 2 + b)->as_mode
+ : mi->mbmi.mode;
}
return (mi->bmi + b - 2)->as_mode;
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index a2245259e..bc30d2a95 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -28,10 +28,10 @@ void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
int16_t *op = output;
for (i = 0; i < 4; i++) {
- a1 = ip[0] >> WHT_UPSCALE_FACTOR;
- c1 = ip[1] >> WHT_UPSCALE_FACTOR;
- d1 = ip[2] >> WHT_UPSCALE_FACTOR;
- b1 = ip[3] >> WHT_UPSCALE_FACTOR;
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ c1 = ip[1] >> UNIT_QUANT_SHIFT;
+ d1 = ip[2] >> UNIT_QUANT_SHIFT;
+ b1 = ip[3] >> UNIT_QUANT_SHIFT;
a1 += c1;
d1 -= b1;
e1 = (a1 - d1) >> 1;
@@ -77,7 +77,7 @@ void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
int16_t *ip = in;
int16_t *op = tmp;
- a1 = ip[0] >> WHT_UPSCALE_FACTOR;
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
e1 = a1 >> 1;
a1 -= e1;
op[0] = a1;
@@ -420,7 +420,7 @@ void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride,
+ dest[j * dest_stride + i]); }
}
-void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
+void vp9_short_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[8 * 8] = { 0 };
int16_t *outptr = out;
@@ -838,7 +838,7 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
+ dest[j * dest_stride + i]); }
}
-void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
+void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[16 * 16] = { 0 };
int16_t *outptr = out;
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 5f2f0a569..59892cd03 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -22,7 +22,8 @@
#define DCT_CONST_BITS 14
#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
-#define WHT_UPSCALE_FACTOR 2
+#define UNIT_QUANT_SHIFT 2
+#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
#define pair_set_epi16(a, b) \
_mm_set_epi16(b, a, b, a, b, a, b, a)
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 0f2e4e999..18407dd73 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -132,7 +132,7 @@ static void build_inter_predictors(int plane, int block, BLOCK_SIZE bsize,
const int x = 4 * (block & ((1 << bwl) - 1));
const int y = 4 * (block >> bwl);
const MODE_INFO *mi = xd->this_mi;
- const int use_second_ref = mi->mbmi.ref_frame[1] > 0;
+ const int is_compound = has_second_ref(&mi->mbmi);
int ref;
assert(x < bw);
@@ -140,7 +140,7 @@ static void build_inter_predictors(int plane, int block, BLOCK_SIZE bsize,
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw);
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh);
- for (ref = 0; ref < 1 + use_second_ref; ++ref) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
struct scale_factors *const scale = &xd->scale_factor[ref];
struct buf_2d *const pre_buf = &pd->pre[ref];
struct buf_2d *const dst_buf = &pd->dst;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 042afbbef..42923b3c8 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -53,7 +53,7 @@ prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const ui
specialize vp9_d45_predictor_4x4 $ssse3_x86inc
prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_4x4
+specialize vp9_d63_predictor_4x4 $ssse3_x86inc
prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_4x4 $ssse3_x86inc
@@ -92,7 +92,7 @@ prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const ui
specialize vp9_d45_predictor_8x8 $ssse3_x86inc
prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_8x8
+specialize vp9_d63_predictor_8x8 $ssse3_x86inc
prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_8x8 $ssse3_x86inc
@@ -131,7 +131,7 @@ prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const
specialize vp9_d45_predictor_16x16 $ssse3_x86inc
prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_16x16
+specialize vp9_d63_predictor_16x16 $ssse3_x86inc
prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_16x16 $ssse3_x86inc
@@ -170,7 +170,7 @@ prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const
specialize vp9_d45_predictor_32x32 $ssse3_x86inc
prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_32x32
+specialize vp9_d63_predictor_32x32 $ssse3_x86inc
prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_32x32 $ssse3 x86inc
@@ -306,8 +306,8 @@ specialize vp9_short_idct8x8_1_add sse2 neon
prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_add sse2 neon
-prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct10_8x8_add sse2 neon
+prototype void vp9_short_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct8x8_10_add sse2 neon
prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_1_add sse2 neon
@@ -315,8 +315,8 @@ specialize vp9_short_idct16x16_1_add sse2 neon
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_add sse2 neon
-prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct10_16x16_add sse2 neon
+prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct16x16_10_add sse2 neon
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct32x32_add sse2 neon
diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h
deleted file mode 100644
index fe75481f6..000000000
--- a/vp9/common/vp9_subpelvar.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_SUBPELVAR_H_
-#define VP9_COMMON_VP9_SUBPELVAR_H_
-
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_convolve.h"
-
-static void variance(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- int w,
- int h,
- unsigned int *sse,
- int *sum) {
- int i, j;
- int diff;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- diff = src_ptr[j] - ref_ptr[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- src_ptr += source_stride;
- ref_ptr += recon_stride;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_first_pass
- *
- * INPUTS : uint8_t *src_ptr : Pointer to source block.
- * uint32_t src_pixels_per_line : Stride of input block.
- * uint32_t pixel_step : Offset between filter input samples (see notes).
- * uint32_t output_height : Input block height.
- * uint32_t output_width : Input block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter taps.
- *
- * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement first-pass
- * of 2-D separable filter.
- *
- * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=stride).
- * It defines the offset required to move from one input
- * to the next.
- *
- ****************************************************************************/
-static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
- uint16_t *output_ptr,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
- (int)src_ptr[pixel_step] * vp9_filter[1],
- FILTER_BITS);
-
- src_ptr++;
- }
-
- // Next row...
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_second_pass
- *
- * INPUTS : int32_t *src_ptr : Pointer to source block.
- * uint32_t src_pixels_per_line : Stride of input block.
- * uint32_t pixel_step : Offset between filter input samples (see notes).
- * uint32_t output_height : Input block height.
- * uint32_t output_width : Input block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter taps.
- *
- * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement second-pass
- * of 2-D separable filter.
- *
- * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=stride).
- * It defines the offset required to move from one input
- * to the next.
- *
- ****************************************************************************/
-static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
- uint8_t *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
- (int)src_ptr[pixel_step] * vp9_filter[1],
- FILTER_BITS);
- src_ptr++;
- }
-
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-#endif // VP9_COMMON_VP9_SUBPELVAR_H_
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 8f740f412..d44c7e2a0 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -985,7 +985,7 @@ void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride,
RECON_AND_STORE(dest, in[7]);
}
-void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_short_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
@@ -2456,7 +2456,7 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride,
write_buffer_8x16(dest, in1, stride);
}
-void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest,
+void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
diff --git a/vp9/common/x86/vp9_intrapred_ssse3.asm b/vp9/common/x86/vp9_intrapred_ssse3.asm
index 67c8ab03a..c51d01151 100644
--- a/vp9/common/x86/vp9_intrapred_ssse3.asm
+++ b/vp9/common/x86/vp9_intrapred_ssse3.asm
@@ -17,8 +17,8 @@ pw_2: times 8 dw 2
pb_7m1: times 8 db 7, -1
pb_15: times 16 db 15
-sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7
-sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7
+sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
+sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
@@ -305,3 +305,153 @@ cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset
RESTORE_GOT
RET
+
+; ------------------------------------------
+; input: x, y, z, result
+;
+; trick from pascal
+; (x+2y+z+2)>>2 can be calculated as:
+; result = avg(x,z)
+; result -= xor(x,z) & 1
+; result = avg(result,y)
+; ------------------------------------------
+%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4
+ pavgb %4, %1, %3
+ pxor %3, %1
+ pand %3, [GLOBAL(pb_1)]
+ psubb %4, %3
+ pavgb %4, %2
+%endmacro
+
+INIT_XMM ssse3
+cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset
+ GET_GOT goffsetq
+
+ movq m3, [aboveq]
+ pshufb m1, m3, [GLOBAL(sh_b23456777)]
+ pshufb m2, m3, [GLOBAL(sh_b12345677)]
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4
+ pavgb m3, m2
+
+ ; store 4 lines
+ movd [dstq ], m3
+ movd [dstq+strideq], m4
+ lea dstq, [dstq+strideq*2]
+ psrldq m3, 1
+ psrldq m4, 1
+ movd [dstq ], m3
+ movd [dstq+strideq], m4
+ RESTORE_GOT
+ RET
+
+INIT_XMM ssse3
+cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset
+ GET_GOT goffsetq
+
+ movq m3, [aboveq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ pshufb m1, m3, [GLOBAL(sh_b2345677777777777)]
+ pshufb m0, m3, [GLOBAL(sh_b0123456777777777)]
+ pshufb m2, m3, [GLOBAL(sh_b1234567777777777)]
+ pshufb m3, [GLOBAL(sh_b0123456777777777)]
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4
+ pavgb m3, m2
+
+ ; store 4 lines
+ movq [dstq ], m3
+ movq [dstq+strideq], m4
+ psrldq m3, 1
+ psrldq m4, 1
+ movq [dstq+strideq*2], m3
+ movq [dstq+stride3q ], m4
+ lea dstq, [dstq+strideq*4]
+ psrldq m3, 1
+ psrldq m4, 1
+
+ ; store 4 lines
+ movq [dstq ], m3
+ movq [dstq+strideq], m4
+ psrldq m3, 1
+ psrldq m4, 1
+ movq [dstq+strideq*2], m3
+ movq [dstq+stride3q ], m4
+ RESTORE_GOT
+ RET
+
+INIT_XMM ssse3
+cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset
+ GET_GOT goffsetq
+
+ mova m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3, line
+ lea stride3q, [strideq*3]
+ mova m1, [GLOBAL(sh_b123456789abcdeff)]
+ pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
+ pshufb m3, m0, m1
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4
+ pavgb m0, m3
+
+ mov lined, 4
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m4
+ pshufb m0, m1
+ pshufb m4, m1
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m4
+ pshufb m0, m1
+ pshufb m4, m1
+ lea dstq, [dstq+strideq*4]
+ dec lined
+ jnz .loop
+ RESTORE_GOT
+ REP_RET
+
+INIT_XMM ssse3
+cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset
+ GET_GOT goffsetq
+
+ mova m0, [aboveq]
+ mova m7, [aboveq+16]
+ DEFINE_ARGS dst, stride, stride3, line
+ mova m1, [GLOBAL(sh_b123456789abcdeff)]
+ lea stride3q, [strideq*3]
+ pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)]
+ pshufb m3, m7, m1
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4
+ palignr m6, m7, m0, 1
+ palignr m5, m7, m0, 2
+ pavgb m7, m3
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2
+ pavgb m0, m6
+
+ mov lined, 8
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m7
+ mova [dstq+strideq ], m2
+ mova [dstq+strideq +16], m4
+ palignr m3, m7, m0, 1
+ palignr m5, m4, m2, 1
+ pshufb m7, m1
+ pshufb m4, m1
+
+ mova [dstq+strideq*2 ], m3
+ mova [dstq+strideq*2+16], m7
+ mova [dstq+stride3q ], m5
+ mova [dstq+stride3q +16], m4
+ palignr m0, m7, m3, 1
+ palignr m2, m4, m5, 1
+ pshufb m7, m1
+ pshufb m4, m1
+ lea dstq, [dstq+strideq*4]
+ dec lined
+ jnz .loop
+ RESTORE_GOT
+ REP_RET
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 84a29b17a..ba9fad25e 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -75,28 +75,9 @@ static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode,
return TX_4X4;
}
-static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize,
- int mi_row, int mi_col, int segment_id) {
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = 1 << mi_width_log2(bsize);
- const int bh = 1 << mi_height_log2(bsize);
- const int xmis = MIN(cm->mi_cols - mi_col, bw);
- const int ymis = MIN(cm->mi_rows - mi_row, bh);
- int x, y;
-
- assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
-
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++)
- cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
-}
-
static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_reader *r) {
- MACROBLOCKD *const xd = &pbi->mb;
struct segmentation *const seg = &pbi->common.seg;
- const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type;
- int segment_id;
if (!seg->enabled)
return 0; // Default for disabled segmentation
@@ -104,9 +85,7 @@ static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
if (!seg->update_map)
return 0;
- segment_id = read_segment_id(r, seg);
- set_segment_id(&pbi->common, bsize, mi_row, mi_col, segment_id);
- return segment_id;
+ return read_segment_id(r, seg);
}
static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
@@ -115,7 +94,7 @@ static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
MACROBLOCKD *const xd = &pbi->mb;
struct segmentation *const seg = &cm->seg;
const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type;
- int pred_segment_id, segment_id;
+ int pred_segment_id;;
if (!seg->enabled)
return 0; // Default for disabled segmentation
@@ -129,13 +108,10 @@ static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
const vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
const int pred_flag = vp9_read(r, pred_prob);
vp9_set_pred_flag_seg_id(xd, pred_flag);
- segment_id = pred_flag ? pred_segment_id
- : read_segment_id(r, seg);
+ return pred_flag ? pred_segment_id : read_segment_id(r, seg);
} else {
- segment_id = read_segment_id(r, seg);
+ return read_segment_id(r, seg);
}
- set_segment_id(cm, bsize, mi_row, mi_col, segment_id);
- return segment_id;
}
static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
@@ -200,7 +176,6 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
static int read_mv_component(vp9_reader *r,
const nmv_component *mvcomp, int usehp) {
-
int mag, d, fr, hp;
const int sign = vp9_read(r, mvcomp->sign);
const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes);
@@ -493,11 +468,12 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
+ int b_mode;
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv blockmv, secondmv;
const int j = idy * 2 + idx;
- const int b_mode = read_inter_mode(cm, r, inter_mode_ctx);
+ b_mode = read_inter_mode(cm, r, inter_mode_ctx);
if (b_mode == NEARESTMV || b_mode == NEARMV) {
vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0,
@@ -544,10 +520,10 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
mi->bmi[j + 2] = mi->bmi[j];
if (num_4x4_w == 2)
mi->bmi[j + 1] = mi->bmi[j];
- mi->mbmi.mode = b_mode;
}
}
+ mi->mbmi.mode = b_mode;
mv0->as_int = mi->bmi[3].as_mv[0].as_int;
mv1->as_int = mi->bmi[3].as_mv[1].as_int;
} else {
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 34ed0c759..77fec5061 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -436,7 +436,6 @@ static void setup_segmentation(struct segmentation *seg,
static void setup_loopfilter(struct loopfilter *lf,
struct vp9_read_bit_buffer *rb) {
-
lf->filter_level = vp9_rb_read_literal(rb, 6);
lf->sharpness_level = vp9_rb_read_literal(rb, 3);
@@ -935,6 +934,15 @@ void vp9_init_dequantizer(VP9_COMMON *cm) {
}
}
+static void update_segmentation_map(VP9_COMMON *cm) {
+ int i, j;
+
+ for (i = 0; i < cm->mi_rows; ++i)
+ for (j = 0; j < cm->mi_cols; ++j)
+ cm->last_frame_seg_map[i * cm->mi_cols + j] =
+ cm->mi_grid_visible[i * cm->mode_info_stride + j]->mbmi.segment_id;
+}
+
int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
int i;
VP9_COMMON *const cm = &pbi->common;
@@ -1014,5 +1022,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
+ update_segmentation_map(cm);
+
return 0;
}
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 3792b9c78..8fcf83ee3 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -106,8 +106,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
const uint8_t *band_translate;
uint8_t token_cache[1024];
int pt = get_entropy_context(tx_size, A, L);
- get_scan_and_band(xd, tx_size, type, block_idx, &scan, &band_translate);
- nb = vp9_get_coef_neighbors_handle(scan);
+ get_scan_and_band(xd, tx_size, type, block_idx, &scan, &nb, &band_translate);
while (1) {
int val;
@@ -122,7 +121,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
-SKIP_START:
+ SKIP_START:
if (c >= seg_eob)
break;
if (c)
diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index 8cc64f73e..9a970d42b 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c
@@ -67,7 +67,6 @@ static int inv_remap_prob(int v, int m) {
206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221,
222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
-
};
// v = merge_index(v, MAX_PROBS - 1, MODULUS_PARAM);
v = inv_map_table[v];
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index 395e636b8..cac5f1a76 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/decoder/vp9_idct_blk.h"
@@ -96,7 +96,7 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
- vp9_short_idct10_8x8_add(input, dest, stride);
+ vp9_short_idct8x8_10_add(input, dest, stride);
vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
@@ -126,7 +126,7 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
vp9_short_idct16x16_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
- vp9_short_idct10_16x16_add(input, dest, stride);
+ vp9_short_idct16x16_10_add(input, dest, stride);
vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
diff --git a/vp9/decoder/vp9_idct_blk.h b/vp9/decoder/vp9_idct_blk.h
index 1810bd02f..00f1bc6a6 100644
--- a/vp9/decoder/vp9_idct_blk.h
+++ b/vp9/decoder/vp9_idct_blk.h
@@ -14,17 +14,16 @@
#include "vp9/common/vp9_blockd.h"
+void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest,
+ int stride, int eob);
-void vp9_idct_add_lossless_c(int16_t *input, unsigned char *dest, int stride,
- int eob);
-
-void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
+void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob);
-void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
+void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob);
-void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
+void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob);
#endif // VP9_DECODER_VP9_IDCT_BLK_H_
diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h
index cd5b7508f..4f662e9ac 100644
--- a/vp9/decoder/vp9_onyxd.h
+++ b/vp9/decoder/vp9_onyxd.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ONYXD_H_
-#define VP9_COMMON_VP9_ONYXD_H_
+#ifndef VP9_DECODER_VP9_ONYXD_H_
+#define VP9_DECODER_VP9_ONYXD_H_
#ifdef __cplusplus
extern "C" {
@@ -66,4 +66,4 @@ void vp9_remove_decompressor(VP9D_PTR comp);
}
#endif
-#endif // VP9_COMMON_VP9_ONYXD_H_
+#endif // VP9_DECODER_VP9_ONYXD_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 17d5def33..1c804d974 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -65,13 +65,12 @@ static void recon_write_yuv_frame(const char *name,
#endif
#if WRITE_RECON_BUFFER == 2
void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
-
// write the frame
FILE *yframe;
int i;
char filename[255];
- sprintf(filename, "dx\\y%04d.raw", this_frame);
+ snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame);
yframe = fopen(filename, "wb");
for (i = 0; i < frame->y_height; i++)
@@ -79,7 +78,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
frame->y_width, 1, yframe);
fclose(yframe);
- sprintf(filename, "dx\\u%04d.raw", this_frame);
+ snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame);
yframe = fopen(filename, "wb");
for (i = 0; i < frame->uv_height; i++)
@@ -87,7 +86,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
frame->uv_width, 1, yframe);
fclose(yframe);
- sprintf(filename, "dx\\v%04d.raw", this_frame);
+ snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame);
yframe = fopen(filename, "wb");
for (i = 0; i < frame->uv_height; i++)
@@ -214,13 +213,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
* vpxenc --test-decode functionality working, and will be replaced in a
* later commit that adds VP9-specific controls for this functionality.
*/
- if (ref_frame_flag == VP9_LAST_FLAG)
+ if (ref_frame_flag == VP9_LAST_FLAG) {
ref_fb_ptr = &pbi->common.active_ref_idx[0];
- else if (ref_frame_flag == VP9_GOLD_FLAG)
+ } else if (ref_frame_flag == VP9_GOLD_FLAG) {
ref_fb_ptr = &pbi->common.active_ref_idx[1];
- else if (ref_frame_flag == VP9_ALT_FLAG)
+ } else if (ref_frame_flag == VP9_ALT_FLAG) {
ref_fb_ptr = &pbi->common.active_ref_idx[2];
- else {
+ } else {
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
"Invalid reference frame");
return pbi->common.error.error_code;
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index a051971a1..8fee5e975 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -41,4 +41,4 @@ typedef struct VP9Decompressor {
VP9Worker lf_worker;
} VP9D_COMP;
-#endif // VP9_DECODER_VP9_TREEREADER_H_
+#endif // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/decoder/vp9_read_bit_buffer.h b/vp9/decoder/vp9_read_bit_buffer.h
index c7fa3aa27..41a686837 100644
--- a/vp9/decoder/vp9_read_bit_buffer.h
+++ b/vp9/decoder/vp9_read_bit_buffer.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_READ_BIT_BUFFER_
-#define VP9_READ_BIT_BUFFER_
+#ifndef VP9_DECODER_VP9_READ_BIT_BUFFER_H_
+#define VP9_DECODER_VP9_READ_BIT_BUFFER_H_
#include <limits.h>
@@ -57,4 +57,4 @@ static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb,
return vp9_rb_read_bit(rb) ? -value : value;
}
-#endif // VP9_READ_BIT_BUFFER_
+#endif // VP9_DECODER_VP9_READ_BIT_BUFFER_H_
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index a8f7e046a..0b5eca073 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -17,7 +17,7 @@
#ifndef VP9_DECODER_VP9_THREAD_H_
#define VP9_DECODER_VP9_THREAD_H_
-#include "vpx_config.h"
+#include "./vpx_config.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -90,4 +90,4 @@ void vp9_worker_end(VP9Worker* const worker);
} // extern "C"
#endif
-#endif /* VP9_DECODER_VP9_THREAD_H_ */
+#endif // VP9_DECODER_VP9_THREAD_H_
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 622f75fe6..20dd8e175 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -484,17 +484,13 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
}
if (bsize < BLOCK_8X8) {
- int j;
- MB_PREDICTION_MODE blockmode;
- int_mv blockmv;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
- j = idy * 2 + idx;
- blockmode = x->partition_info->bmi[j].mode;
- blockmv = m->bmi[j].as_mv[0];
+ const int j = idy * 2 + idx;
+ const MB_PREDICTION_MODE blockmode = x->partition_info->bmi[j].mode;
write_sb_mv_ref(bc, blockmode, mv_ref_p);
++cm->counts.inter_mode[mi->mode_context[rf]]
[inter_mode_offset(blockmode)];
@@ -503,14 +499,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
#ifdef ENTROPY_STATS
active_section = 11;
#endif
- vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv,
- nmvc, allow_hp);
-
- if (mi->ref_frame[1] > INTRA_FRAME)
- vp9_encode_mv(cpi, bc,
- &m->bmi[j].as_mv[1].as_mv,
- &mi->best_second_mv.as_mv,
- nmvc, allow_hp);
+ vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
+ &mi->best_mv[0].as_mv, nmvc, allow_hp);
+
+ if (has_second_ref(mi))
+ vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
+ &mi->best_mv[1].as_mv, nmvc, allow_hp);
}
}
}
@@ -518,12 +512,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
#ifdef ENTROPY_STATS
active_section = 5;
#endif
- vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv,
- nmvc, allow_hp);
+ vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
+ &mi->best_mv[0].as_mv, nmvc, allow_hp);
- if (mi->ref_frame[1] > INTRA_FRAME)
- vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv,
- nmvc, allow_hp);
+ if (has_second_ref(mi))
+ vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
+ &mi->best_mv[1].as_mv, nmvc, allow_hp);
}
}
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 013047e35..5a0d746c8 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -34,6 +34,7 @@ typedef struct {
typedef struct {
MODE_INFO mic;
PARTITION_INFO partition_info;
+ unsigned char zcoeff_blk[256];
int skip;
int_mv best_ref_mv;
int_mv second_best_ref_mv;
@@ -136,6 +137,7 @@ struct macroblock {
int mv_row_min;
int mv_row_max;
+ unsigned char zcoeff_blk[TX_SIZES][256];
int skip;
int encode_breakout;
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index ca863931e..b9c300033 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -637,10 +637,10 @@ void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
c1 = e1 - c1;
a1 -= c1;
d1 += b1;
- op[0] = a1 << WHT_UPSCALE_FACTOR;
- op[1] = c1 << WHT_UPSCALE_FACTOR;
- op[2] = d1 << WHT_UPSCALE_FACTOR;
- op[3] = b1 << WHT_UPSCALE_FACTOR;
+ op[0] = a1 * UNIT_QUANT_FACTOR;
+ op[1] = c1 * UNIT_QUANT_FACTOR;
+ op[2] = d1 * UNIT_QUANT_FACTOR;
+ op[3] = b1 * UNIT_QUANT_FACTOR;
ip += 4;
op += 4;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ee938bda9..f6045e80b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -390,6 +390,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
}
x->skip = ctx->skip;
+ vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
+ sizeof(ctx->zcoeff_blk));
+
if (!output_enabled)
return;
@@ -428,19 +431,19 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
cpi->mode_chosen_counts[mb_mode_index]++;
if (is_inter_block(mbmi)
&& (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv, best_second_mv;
+ int_mv best_mv[2];
const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
- best_mv.as_int = ctx->best_ref_mv.as_int;
- best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
+ best_mv[0].as_int = ctx->best_ref_mv.as_int;
+ best_mv[1].as_int = ctx->second_best_ref_mv.as_int;
if (mbmi->mode == NEWMV) {
- best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int;
+ best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int;
if (rf2 > 0)
- best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
+ best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int;
}
- mbmi->best_mv.as_int = best_mv.as_int;
- mbmi->best_second_mv.as_int = best_second_mv.as_int;
- vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
+ mbmi->best_mv[0].as_int = best_mv[0].as_int;
+ mbmi->best_mv[1].as_int = best_mv[1].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
}
if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
@@ -2209,7 +2212,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
cpi->inter_zz_count = 0;
vp9_zero(cm->counts.switchable_interp);
- vp9_zero(cpi->txfm_stepdown_count);
+ vp9_zero(cpi->tx_stepdown_count);
xd->mi_8x8 = cm->mi_grid_visible;
// required for vp9_frame_init_quantizer
@@ -2348,18 +2351,19 @@ static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO **mi_8x8,
int mis, TX_SIZE max_tx_size, int bw, int bh,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
VP9_COMMON * const cm = &cpi->common;
- MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
return;
-
- if (mbmi->tx_size > max_tx_size) {
- const int ymbs = MIN(bh, cm->mi_rows - mi_row);
- const int xmbs = MIN(bw, cm->mi_cols - mi_col);
-
- assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
- get_skip_flag(mi_8x8, mis, ymbs, xmbs));
- set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
+ } else {
+ MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
+ if (mbmi->tx_size > max_tx_size) {
+ const int ymbs = MIN(bh, cm->mi_rows - mi_row);
+ const int xmbs = MIN(bw, cm->mi_cols - mi_col);
+
+ assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
+ get_skip_flag(mi_8x8, mis, ymbs, xmbs));
+ set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
+ }
}
}
@@ -2454,9 +2458,9 @@ static void select_tx_mode(VP9_COMP *cpi) {
unsigned int total = 0;
int i;
for (i = 0; i < TX_SIZES; ++i)
- total += cpi->txfm_stepdown_count[i];
+ total += cpi->tx_stepdown_count[i];
if (total) {
- double fraction = (double)cpi->txfm_stepdown_count[0] / total;
+ double fraction = (double)cpi->tx_stepdown_count[0] / total;
cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
// printf("fraction = %f\n", fraction);
} // else keep unchanged
@@ -2732,7 +2736,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
- if (mbmi->ref_frame[1] > 0) {
+ if (has_second_ref(mbmi)) {
idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
second_ref_fb = &cm->yv12_fb[idx];
}
@@ -2744,7 +2748,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
&xd->scale_factor[1]);
-
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 2c12477a7..76a5d33e7 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -53,7 +53,7 @@ static void inverse_transform_b_8x8_add(int eob,
if (eob <= 1)
vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
- vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
+ vp9_short_idct8x8_10_add(dqcoeff, dest, stride);
else
vp9_short_idct8x8_add(dqcoeff, dest, stride);
}
@@ -64,7 +64,7 @@ static void inverse_transform_b_16x16_add(int eob,
if (eob <= 1)
vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
- vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
+ vp9_short_idct16x16_10_add(dqcoeff, dest, stride);
else
vp9_short_idct16x16_add(dqcoeff, dest, stride);
}
@@ -172,7 +172,7 @@ static void optimize_b(MACROBLOCK *mb,
assert((!type && !plane) || (type && plane));
dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
- get_scan_and_band(xd, tx_size, type, ib, &scan, &band_translate);
+ get_scan_and_band(xd, tx_size, type, ib, &scan, &nb, &band_translate);
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -191,7 +191,6 @@ static void optimize_b(MACROBLOCK *mb,
for (i = 0; i < eob; i++)
token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
qcoeff_ptr[scan[i]]].token];
- nb = vp9_get_coef_neighbors_handle(scan);
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
@@ -365,36 +364,10 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize,
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
- int i;
- switch (tx_size) {
- case TX_4X4:
- vpx_memcpy(args->ctx->ta[plane], pd->above_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_w);
- vpx_memcpy(args->ctx->tl[plane], pd->left_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_h);
- break;
- case TX_8X8:
- for (i = 0; i < num_4x4_w; i += 2)
- args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_h; i += 2)
- args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i];
- break;
- case TX_16X16:
- for (i = 0; i < num_4x4_w; i += 4)
- args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_h; i += 4)
- args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i];
- break;
- case TX_32X32:
- for (i = 0; i < num_4x4_w; i += 8)
- args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_h; i += 8)
- args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i];
- break;
- default:
- assert(0);
- }
+ vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
+ pd->above_context, pd->left_context,
+ num_4x4_w, num_4x4_h);
}
void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -482,6 +455,14 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
pd->dst.buf, pd->dst.stride);
+
+ // TODO(jingning): per transformed block zero forcing only enabled for
+ // luma component. will integrate chroma components as well.
+ if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+ pd->eobs[block] = 0;
+ return;
+ }
+
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
if (x->optimize)
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index ed3a2bb64..db08ee856 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -314,44 +314,34 @@ void vp9_build_nmv_cost_table(int *mvjoint,
build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
}
-void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
+static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound,
+ nmv_context_counts *counts) {
+ int i;
+ for (i = 0; i < 1 + is_compound; ++i) {
+ const MV diff = { mv[i].as_mv.row - ref[i].as_mv.row,
+ mv[i].as_mv.col - ref[i].as_mv.col };
+ vp9_inc_mv(&diff, counts);
+ }
+}
+
+void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]) {
MODE_INFO *mi = x->e_mbd.mi_8x8[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
- MV diff;
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
- int idx, idy;
+ const int is_compound = has_second_ref(mbmi);
if (mbmi->sb_type < BLOCK_8X8) {
- PARTITION_INFO *pi = x->partition_info;
- for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
- for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int idx, idy;
+
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
const int i = idy * 2 + idx;
- if (pi->bmi[i].mode == NEWMV) {
- diff.row = mi->bmi[i].as_mv[0].as_mv.row - best_ref_mv->as_mv.row;
- diff.col = mi->bmi[i].as_mv[0].as_mv.col - best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
-
- if (mi->mbmi.ref_frame[1] > INTRA_FRAME) {
- diff.row = mi->bmi[i].as_mv[1].as_mv.row -
- second_best_ref_mv->as_mv.row;
- diff.col = mi->bmi[i].as_mv[1].as_mv.col -
- second_best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
- }
- }
+ if (x->partition_info->bmi[i].mode == NEWMV)
+ inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, &cpi->NMVcount);
}
}
} else if (mbmi->mode == NEWMV) {
- diff.row = mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row;
- diff.col = mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
-
- if (mbmi->ref_frame[1] > INTRA_FRAME) {
- diff.row = mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row;
- diff.col = mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
- }
+ inc_mvs(mbmi->mv, best_ref_mv, is_compound, &cpi->NMVcount);
}
}
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index 2789ce114..633177885 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -25,7 +25,7 @@ void vp9_build_nmv_cost_table(int *mvjoint,
int usehp,
int mvc_flag_v,
int mvc_flag_h);
-void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv, int_mv *second_best_ref_mv);
+
+void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
#endif // VP9_ENCODER_VP9_ENCODEMV_H_
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 6e44e604c..eaa3bd183 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -534,10 +534,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);
- // Set up limit values for motion vectors to prevent them extending outside the UMV borders
- x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 8));
+ // Set up limit values for motion vectors to prevent them extending
+ // outside the UMV borders
+ x->mv_row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
- + (VP9BORDERINPIXELS - 8);
+ + BORDER_MV_PIXELS_B16;
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
@@ -583,9 +584,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
intra_error += (int64_t)this_error;
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
- x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 8));
+ x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
- + (VP9BORDERINPIXELS - 8);
+ + BORDER_MV_PIXELS_B16;
// Other than for the first frame do a motion search
if (cm->current_video_frame > 0) {
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 5a671f201..0a6576eb5 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -10,14 +10,17 @@
#include <limits.h>
-#include <vpx_mem/vpx_mem.h>
-#include <vp9/encoder/vp9_encodeintra.h>
-#include <vp9/encoder/vp9_rdopt.h>
-#include <vp9/common/vp9_blockd.h>
-#include <vp9/common/vp9_reconinter.h>
-#include <vp9/common/vp9_reconintra.h>
-#include <vp9/common/vp9_systemdependent.h>
-#include <vp9/encoder/vp9_segmentation.h>
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/encoder/vp9_encodeintra.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
int_mv *ref_mv,
@@ -46,9 +49,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
ref_full.as_mv.row = ref_mv->as_mv.row >> 3;
/*cpi->sf.search_method == HEX*/
- best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit,
+ best_err = vp9_hex_search(x, &ref_full.as_mv, step_param, x->errorperbit,
0, &v_fn_ptr,
- 0, ref_mv, dst_mv);
+ 0, &ref_mv->as_mv, &dst_mv->as_mv);
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
@@ -57,7 +60,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
unsigned int sse;
best_err = cpi->find_fractional_mv_step(
x,
- dst_mv, ref_mv,
+ &dst_mv->as_mv, &ref_mv->as_mv,
x->errorperbit, &v_fn_ptr,
0, cpi->sf.subpel_iters_per_step, NULL, NULL,
& distortion, &sse);
@@ -246,9 +249,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
- x->mv_row_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND);
- x->mv_row_max = (cm->mb_rows - 1) * 8 + VP9BORDERINPIXELS
- - 8 - VP9_INTERP_EXTEND;
+ x->mv_row_min = -BORDER_MV_PIXELS_B16;
+ x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->up_available = 0;
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
@@ -267,9 +269,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_left_mv.as_int = arf_top_mv.as_int;
gld_left_mv.as_int = gld_top_mv.as_int;
- x->mv_col_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND);
- x->mv_col_max = (cm->mb_cols - 1) * 8 + VP9BORDERINPIXELS
- - 8 - VP9_INTERP_EXTEND;
+ x->mv_col_min = -BORDER_MV_PIXELS_B16;
+ x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->left_available = 0;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 7dd786904..44eaa657c 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -59,38 +59,39 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) {
return sr;
}
-int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
- int weight) {
- MV v;
- v.row = mv->as_mv.row - ref->as_mv.row;
- v.col = mv->as_mv.col - ref->as_mv.col;
- return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
- mvcost[0][v.row] +
- mvcost[1][v.col]) * weight, 7);
+static INLINE int mv_cost(const MV *mv,
+ const int *joint_cost, int *comp_cost[2]) {
+ return joint_cost[vp9_get_mv_joint(mv)] +
+ comp_cost[0][mv->row] + comp_cost[1][mv->col];
}
-static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
+int vp9_mv_bit_cost(const MV *mv, const MV *ref,
+ const int *mvjcost, int *mvcost[2], int weight) {
+ const MV diff = { mv->row - ref->row,
+ mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
+}
+
+static int mv_err_cost(const MV *mv, const MV *ref,
+ const int *mvjcost, int *mvcost[2],
int error_per_bit) {
if (mvcost) {
- MV v;
- v.row = mv->as_mv.row - ref->as_mv.row;
- v.col = mv->as_mv.col - ref->as_mv.col;
- return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
- mvcost[0][v.row] +
- mvcost[1][v.col]) * error_per_bit, 13);
+ const MV diff = { mv->row - ref->row,
+ mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
+ error_per_bit, 13);
}
return 0;
}
-static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
- int *mvsadcost[2], int error_per_bit) {
+static int mvsad_err_cost(const MV *mv, const MV *ref,
+ const int *mvjsadcost, int *mvsadcost[2],
+ int error_per_bit) {
if (mvsadcost) {
- MV v;
- v.row = mv->as_mv.row - ref->as_mv.row;
- v.col = mv->as_mv.col - ref->as_mv.col;
- return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] +
- mvsadcost[0][v.row] +
- mvsadcost[1][v.col]) * error_per_bit, 8);
+ const MV diff = { mv->row - ref->row,
+ mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
+ error_per_bit, 8);
}
return 0;
}
@@ -273,7 +274,7 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
}
int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -294,25 +295,25 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
int thismse;
const int y_stride = xd->plane[0].pre[0].stride;
- const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+ const int offset = bestmv->row * y_stride + bestmv->col;
uint8_t *y = xd->plane[0].pre[0].buf + offset;
- int rr = ref_mv->as_mv.row;
- int rc = ref_mv->as_mv.col;
- int br = bestmv->as_mv.row * 8;
- int bc = bestmv->as_mv.col * 8;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
int hstep = 4;
- const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
- const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
// central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->row <<= 3;
+ bestmv->col <<= 3;
// calculate central point error
besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
@@ -347,7 +348,7 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
}
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
@@ -360,18 +361,18 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
}
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -391,25 +392,25 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
unsigned int eighthiters = iters_per_step;
const int y_stride = xd->plane[0].pre[0].stride;
- const int offset = bestmv->as_mv.row * y_stride + bestmv->as_mv.col;
+ const int offset = bestmv->row * y_stride + bestmv->col;
uint8_t *y = xd->plane[0].pre[0].buf + offset;
- int rr = ref_mv->as_mv.row;
- int rc = ref_mv->as_mv.col;
- int br = bestmv->as_mv.row * 8;
- int bc = bestmv->as_mv.col * 8;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
int hstep = 4;
- const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
- const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
// central mv
- bestmv->as_mv.row *= 8;
- bestmv->as_mv.col *= 8;
+ bestmv->row *= 8;
+ bestmv->col *= 8;
// calculate central point error
besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
@@ -435,7 +436,7 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
@@ -446,11 +447,11 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
tc = bc;
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
@@ -463,7 +464,7 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
z, src_stride, &sse, second_pred)
int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -487,25 +488,25 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
const int y_stride = xd->plane[0].pre[0].stride;
- const int offset = bestmv->as_mv.row * y_stride + bestmv->as_mv.col;
+ const int offset = bestmv->row * y_stride + bestmv->col;
uint8_t *const y = xd->plane[0].pre[0].buf + offset;
- int rr = ref_mv->as_mv.row;
- int rc = ref_mv->as_mv.col;
- int br = bestmv->as_mv.row * 8;
- int bc = bestmv->as_mv.col * 8;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
int hstep = 4;
- const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
- const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
// central mv
- bestmv->as_mv.row *= 8;
- bestmv->as_mv.col *= 8;
+ bestmv->row *= 8;
+ bestmv->col *= 8;
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
@@ -543,7 +544,7 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
}
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
@@ -555,18 +556,18 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
tc = bc;
}
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -589,25 +590,25 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
const int y_stride = xd->plane[0].pre[0].stride;
- const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+ const int offset = bestmv->row * y_stride + bestmv->col;
uint8_t *y = xd->plane[0].pre[0].buf + offset;
- int rr = ref_mv->as_mv.row;
- int rc = ref_mv->as_mv.col;
- int br = bestmv->as_mv.row * 8;
- int bc = bestmv->as_mv.col * 8;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
int hstep = 4;
- const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
- const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
// central mv
- bestmv->as_mv.row *= 8;
- bestmv->as_mv.col *= 8;
+ bestmv->row *= 8;
+ bestmv->col *= 8;
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
@@ -641,7 +642,7 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
@@ -651,11 +652,11 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
tr = br;
tc = bc;
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
@@ -679,10 +680,10 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
#define CHECK_POINT \
{\
- if (this_mv.as_mv.col < x->mv_col_min) continue;\
- if (this_mv.as_mv.col > x->mv_col_max) continue;\
- if (this_mv.as_mv.row < x->mv_row_min) continue;\
- if (this_mv.as_mv.row > x->mv_row_max) continue;\
+ if (this_mv.col < x->mv_col_min) continue;\
+ if (this_mv.col > x->mv_col_max) continue;\
+ if (this_mv.row < x->mv_row_min) continue;\
+ if (this_mv.row > x->mv_row_max) continue;\
}
#define CHECK_BETTER \
@@ -690,7 +691,7 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
if (thissad < bestsad)\
{\
if (use_mvcost) \
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \
mvjsadcost, mvsadcost, \
sad_per_bit);\
if (thissad < bestsad)\
@@ -715,14 +716,14 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
static int vp9_pattern_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
int do_refine,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv, int_mv *best_mv,
+ const MV *center_mv, MV *best_mv,
const int num_candidates[MAX_PATTERN_SCALES],
const MV candidates[MAX_PATTERN_SCALES]
[MAX_PATTERN_CANDIDATES]) {
@@ -735,7 +736,7 @@ static int vp9_pattern_search(MACROBLOCK *x,
int what_stride = x->plane[0].src.stride;
int in_what_stride = xd->plane[0].pre[0].stride;
int br, bc;
- int_mv this_mv;
+ MV this_mv;
int bestsad = INT_MAX;
int thissad;
uint8_t *base_offset;
@@ -748,24 +749,22 @@ static int vp9_pattern_search(MACROBLOCK *x,
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
- fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
- fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+ fcenter_mv.as_mv.row = center_mv->row >> 3;
+ fcenter_mv.as_mv.col = center_mv->col >> 3;
// adjust ref_mv to make sure it is within MV range
- clamp_mv(&ref_mv->as_mv,
- x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
- br = ref_mv->as_mv.row;
- bc = ref_mv->as_mv.col;
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ br = ref_mv->row;
+ bc = ref_mv->col;
// Work out the start point for the search
base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
this_offset = base_offset + (br * in_what_stride) + bc;
- this_mv.as_mv.row = br;
- this_mv.as_mv.col = bc;
- bestsad = vfp->sdf(what, what_stride, this_offset,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ this_mv.row = br;
+ this_mv.col = bc;
+ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Search all possible scales upto the search param around the center point
// pick the scale of the point that is best as the starting scale of
@@ -778,21 +777,21 @@ static int vp9_pattern_search(MACROBLOCK *x,
CHECK_BOUNDS((1 << t))
if (all_in) {
for (i = 0; i < num_candidates[t]; i++) {
- this_mv.as_mv.row = br + candidates[t][i].row;
- this_mv.as_mv.col = bc + candidates[t][i].col;
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_mv.row = br + candidates[t][i].row;
+ this_mv.col = bc + candidates[t][i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < num_candidates[t]; i++) {
- this_mv.as_mv.row = br + candidates[t][i].row;
- this_mv.as_mv.col = bc + candidates[t][i].col;
+ this_mv.row = br + candidates[t][i].row;
+ this_mv.col = bc + candidates[t][i].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -822,21 +821,21 @@ static int vp9_pattern_search(MACROBLOCK *x,
CHECK_BOUNDS((1 << s))
if (all_in) {
for (i = 0; i < num_candidates[s]; i++) {
- this_mv.as_mv.row = br + candidates[s][i].row;
- this_mv.as_mv.col = bc + candidates[s][i].col;
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_mv.row = br + candidates[s][i].row;
+ this_mv.col = bc + candidates[s][i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < num_candidates[s]; i++) {
- this_mv.as_mv.row = br + candidates[s][i].row;
- this_mv.as_mv.col = bc + candidates[s][i].col;
+ this_mv.row = br + candidates[s][i].row;
+ this_mv.col = bc + candidates[s][i].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -860,25 +859,21 @@ static int vp9_pattern_search(MACROBLOCK *x,
get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
if (all_in) {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- this_mv.as_mv.row = br +
- candidates[s][next_chkpts_indices[i]].row;
- this_mv.as_mv.col = bc +
- candidates[s][next_chkpts_indices[i]].col;
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
+ this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- this_mv.as_mv.row = br +
- candidates[s][next_chkpts_indices[i]].row;
- this_mv.as_mv.col = bc +
- candidates[s][next_chkpts_indices[i]].col;
+ this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
+ this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -905,21 +900,21 @@ static int vp9_pattern_search(MACROBLOCK *x,
CHECK_BOUNDS(1)
if (all_in) {
for (i = 0; i < 4; i++) {
- this_mv.as_mv.row = br + neighbors[i].row;
- this_mv.as_mv.col = bc + neighbors[i].col;
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_mv.row = br + neighbors[i].row;
+ this_mv.col = bc + neighbors[i].col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < 4; i++) {
- this_mv.as_mv.row = br + neighbors[i].row;
- this_mv.as_mv.col = bc + neighbors[i].col;
+ this_mv.row = br + neighbors[i].row;
+ this_mv.col = bc + neighbors[i].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -935,31 +930,32 @@ static int vp9_pattern_search(MACROBLOCK *x,
}
}
- best_mv->as_mv.row = br;
- best_mv->as_mv.col = bc;
+ best_mv->row = br;
+ best_mv->col = bc;
- this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) +
- best_mv->as_mv.col;
- this_mv.as_mv.row = best_mv->as_mv.row * 8;
- this_mv.as_mv.col = best_mv->as_mv.col * 8;
+ this_offset = base_offset + (best_mv->row * in_what_stride) +
+ best_mv->col;
+ this_mv.row = best_mv->row * 8;
+ this_mv.col = best_mv->col * 8;
if (bestsad == INT_MAX)
return INT_MAX;
- return
- vfp->vf(what, what_stride, this_offset, in_what_stride,
- (unsigned int *)(&bestsad)) +
- use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit) : 0;
+
+ return vfp->vf(what, what_stride, this_offset, in_what_stride,
+ (unsigned int *)&bestsad) +
+ use_mvcost ? mv_err_cost(&this_mv, center_mv,
+ x->nmvjointcost, x->mvcost, x->errorperbit)
+ : 0;
}
int vp9_hex_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv, int_mv *best_mv) {
+ const MV *center_mv, MV *best_mv) {
// First scale has 8-closest points, the rest have 6 points in hex shape
// at increasing scales
static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
@@ -988,14 +984,14 @@ int vp9_hex_search(MACROBLOCK *x,
}
int vp9_bigdia_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv) {
+ const MV *center_mv,
+ MV *best_mv) {
// First scale has 4-closest points, the rest have 8 points in diamond
// shape at increasing scales
static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
@@ -1022,22 +1018,21 @@ int vp9_bigdia_search(MACROBLOCK *x,
{{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
{-512, 512}, {-1024, 0}},
};
- return
- vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
- do_init_search, 0, vfp, use_mvcost,
- center_mv, best_mv,
- bigdia_num_candidates, bigdia_candidates);
+ return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
+ do_init_search, 0, vfp, use_mvcost,
+ center_mv, best_mv,
+ bigdia_num_candidates, bigdia_candidates);
}
int vp9_square_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv) {
+ const MV *center_mv,
+ MV *best_mv) {
// All scales have 8 closest points in square shape
static const int square_num_candidates[MAX_PATTERN_SCALES] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
@@ -1064,11 +1059,10 @@ int vp9_square_search(MACROBLOCK *x,
{{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
{0, 1024}, {-1024, 1024}, {-1024, 0}},
};
- return
- vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
- do_init_search, 0, vfp, use_mvcost,
- center_mv, best_mv,
- square_num_candidates, square_candidates);
+ return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
+ do_init_search, 0, vfp, use_mvcost,
+ center_mv, best_mv,
+ square_num_candidates, square_candidates);
};
#undef CHECK_BOUNDS
@@ -1124,10 +1118,9 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
best_address = in_what;
// Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// search_param determines the length of the initial step and hence the number of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1153,7 +1146,7 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1185,7 +1178,7 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1210,8 +1203,9 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
return INT_MAX;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost,
- mvcost, x->errorperbit);
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
}
int vp9_diamond_search_sadx4(MACROBLOCK *x,
@@ -1265,10 +1259,9 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
best_address = in_what;
// Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride,
- in_what, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// search_param determines the length of the initial step and hence the number of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1303,7 +1296,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
if (sad_array[t] < bestsad) {
this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
- sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
+ sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (sad_array[t] < bestsad) {
@@ -1327,7 +1320,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1358,7 +1351,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1383,8 +1376,9 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
return INT_MAX;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) + mv_err_cost(&this_mv,
- center_mv, mvjcost, mvcost, x->errorperbit);
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
}
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
@@ -1495,8 +1489,8 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Apply further limits to prevent us looking using vectors that stretch
// beyond the UMV border
@@ -1513,8 +1507,8 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1531,10 +1525,10 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1585,8 +1579,8 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Apply further limits to prevent us looking using vectors that stretch
// beyond the UMV border
@@ -1610,8 +1604,8 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1631,7 +1625,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1652,10 +1646,10 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1708,8 +1702,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Apply further limits to prevent us looking using vectors that stretch
// beyond the UMV border
@@ -1733,8 +1727,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1759,7 +1753,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1780,8 +1774,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1800,10 +1794,10 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1834,8 +1828,10 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
@@ -1852,8 +1848,8 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1876,10 +1872,10 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
this_mv.as_mv.col = ref_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1911,8 +1907,10 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
@@ -1935,8 +1933,8 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
if (sad_array[j] < bestsad) {
this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
- sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
+ sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (sad_array[j] < bestsad) {
bestsad = sad_array[j];
@@ -1957,8 +1955,8 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1982,10 +1980,10 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
this_mv.as_mv.col = ref_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2025,7 +2023,8 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
/* Get compound pred by averaging two pred blocks. */
bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
second_pred, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
@@ -2048,9 +2047,8 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
-
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_site = j;
@@ -2075,10 +2073,10 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
if (bestsad < INT_MAX) {
// FIXME(rbultje, yunqing): add full-pixel averaging variance functions
// so we don't have to use the subpixel with xoff=0,yoff=0 here.
- return fn_ptr->svaf(best_address, in_what_stride, 0, 0,
- what, what_stride, (unsigned int *)(&thissad),
- second_pred) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride,
+ (unsigned int *)(&thissad), second_pred) +
+ mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
} else {
return INT_MAX;
}
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 3598fa09a..77c157c5b 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -22,10 +22,14 @@
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
// Maximum size of the first step in full pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
+// Allowed motion vector pixel distance outside image border
+// for Block_16x16
+#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
+
void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv);
-int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
- int *mvcost[2], int weight);
+int vp9_mv_bit_cost(const MV *mv, const MV *ref,
+ const int *mvjcost, int *mvcost[2], int weight);
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
@@ -40,37 +44,36 @@ int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x,
int_mv *ref_mv, int_mv *dst_mv);
int vp9_hex_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int error_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv);
+ const MV *center_mv,
+ MV *best_mv);
int vp9_bigdia_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int error_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv);
+ const MV *center_mv,
+ MV *best_mv);
int vp9_square_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int error_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv);
+ const MV *center_mv,
+ MV *best_mv);
typedef int (fractional_mv_step_fp) (
MACROBLOCK *x,
- int_mv *bestmv,
- int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
@@ -84,7 +87,7 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
typedef int (fractional_mv_step_comp_fp) (
MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 9b20dafde..a106014f8 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -61,16 +61,11 @@
#define INTRA_ZBIN_BOOST 0
typedef struct {
- nmv_context nmvc;
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
vp9_prob segment_pred_probs[PREDICTION_PROBS];
- vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
- vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
- vp9_prob single_ref_prob[REF_CONTEXTS][2];
- vp9_prob comp_ref_prob[REF_CONTEXTS];
unsigned char *last_frame_seg_map_copy;
@@ -79,20 +74,8 @@ typedef struct {
// 0 = ZERO_MV, MV
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
- vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
-
- vp9_prob y_mode_prob[4][INTRA_MODES - 1];
- vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
- vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-
- vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
- [SWITCHABLE_FILTERS - 1];
-
int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
- vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
-
- struct tx_probs tx_probs;
- vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
+ FRAME_CONTEXT fc;
} CODING_CONTEXT;
typedef struct {
@@ -649,7 +632,7 @@ typedef struct VP9_COMP {
unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1]
[SWITCHABLE_FILTERS];
- unsigned int txfm_stepdown_count[TX_SIZES];
+ unsigned int tx_stepdown_count[TX_SIZES];
int initial_width;
int initial_height;
@@ -712,9 +695,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
void vp9_set_speed_features(VP9_COMP *cpi);
-extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest);
+int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
-extern void vp9_alloc_compressor_data(VP9_COMP *cpi);
+void vp9_alloc_compressor_data(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_ONYX_INT_H_
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 6c8b2a04b..05e893ee9 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -337,10 +337,10 @@ void vp9_frame_init_quantizer(VP9_COMP *cpi) {
vp9_mb_init_quantizer(cpi, &cpi->mb);
}
-void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) {
+void vp9_set_quantizer(struct VP9_COMP *cpi, int q) {
VP9_COMMON *cm = &cpi->common;
- cm->base_qindex = Q;
+ cm->base_qindex = q;
// if any of the delta_q values are changing update flag will
// have to be set.
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 3229eaad2..3191c49ae 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -30,14 +30,14 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int y_blocks);
struct VP9_COMP;
-extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q);
+void vp9_set_quantizer(struct VP9_COMP *cpi, int q);
-extern void vp9_frame_init_quantizer(struct VP9_COMP *cpi);
+void vp9_frame_init_quantizer(struct VP9_COMP *cpi);
-extern void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x);
+void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x);
-extern void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x);
+void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x);
-extern void vp9_init_quantizer(struct VP9_COMP *cpi);
+void vp9_init_quantizer(struct VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_QUANTIZE_H_
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 2d12ba94f..bbcad172d 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -76,35 +76,19 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
// restored with a call to vp9_restore_coding_context. These functions are
// intended for use in a re-code loop in vp9_compress_frame where the
// quantizer value is adjusted between loop iterations.
-
- cc->nmvc = cm->fc.nmvc;
vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost);
vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts);
vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp);
- vp9_copy(cc->inter_mode_probs, cm->fc.inter_mode_probs);
-
- vp9_copy(cc->y_mode_prob, cm->fc.y_mode_prob);
- vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob);
- vp9_copy(cc->partition_prob, cm->fc.partition_prob);
-
vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
- vp9_copy(cc->intra_inter_prob, cm->fc.intra_inter_prob);
- vp9_copy(cc->comp_inter_prob, cm->fc.comp_inter_prob);
- vp9_copy(cc->single_ref_prob, cm->fc.single_ref_prob);
- vp9_copy(cc->comp_ref_prob, cm->fc.comp_ref_prob);
-
vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy,
cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
- vp9_copy(cc->coef_probs, cm->fc.coef_probs);
- vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
- cc->tx_probs = cm->fc.tx_probs;
- vp9_copy(cc->mbskip_probs, cm->fc.mbskip_probs);
+ cc->fc = cm->fc;
}
void vp9_restore_coding_context(VP9_COMP *cpi) {
@@ -113,25 +97,12 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
// Restore key state variables to the snapshot state stored in the
// previous call to vp9_save_coding_context.
-
- cm->fc.nmvc = cc->nmvc;
vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts);
vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
- vp9_copy(cm->fc.inter_mode_probs, cc->inter_mode_probs);
-
- vp9_copy(cm->fc.y_mode_prob, cc->y_mode_prob);
- vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob);
- vp9_copy(cm->fc.partition_prob, cc->partition_prob);
-
vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
- vp9_copy(cm->fc.intra_inter_prob, cc->intra_inter_prob);
- vp9_copy(cm->fc.comp_inter_prob, cc->comp_inter_prob);
- vp9_copy(cm->fc.single_ref_prob, cc->single_ref_prob);
- vp9_copy(cm->fc.comp_ref_prob, cc->comp_ref_prob);
-
vpx_memcpy(cm->last_frame_seg_map,
cpi->coding_context.last_frame_seg_map_copy,
(cm->mi_rows * cm->mi_cols));
@@ -139,10 +110,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
- vp9_copy(cm->fc.coef_probs, cc->coef_probs);
- vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
- cm->fc.tx_probs = cc->tx_probs;
- vp9_copy(cm->fc.mbskip_probs, cc->mbskip_probs);
+ cm->fc = cc->fc;
}
void vp9_setup_key_frame(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 473317605..ddda7130c 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -32,8 +32,8 @@ int vp9_pick_frame_size(VP9_COMP *cpi);
double vp9_convert_qindex_to_q(int qindex);
int vp9_gfboost_qadjust(int qindex);
-extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
- double correction_factor);
+int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor);
void vp9_setup_inter_frame(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_RATECTRL_H_
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 3ef3eeeeb..83cd61226 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -186,6 +186,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
// cpi->common.refresh_alt_ref_frame)
qindex = clamp(qindex, 0, MAXQ);
+ cpi->RDDIV = 100;
cpi->RDMULT = compute_rd_mult(qindex);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
@@ -204,42 +205,18 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
if (q < 8)
q = 8;
- if (cpi->RDMULT > 1000) {
- cpi->RDDIV = 1;
- cpi->RDMULT /= 100;
+ for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
+ for (i = 0; i < MAX_MODES; i++) {
+ // Threshold here seem unecessarily harsh but fine given actual
+ // range of values used for cpi->sf.thresh_mult[]
+ int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
- for (i = 0; i < MAX_MODES; ++i) {
- // Threshold here seem unecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[]
- int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
-
- // *4 relates to the scaling of rd_thresh_block_size_factor[]
- if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
- cpi->rd_threshes[bsize][i] =
- cpi->sf.thresh_mult[i] * q *
- rd_thresh_block_size_factor[bsize] / (4 * 100);
- } else {
- cpi->rd_threshes[bsize][i] = INT_MAX;
- }
- }
- }
- } else {
- cpi->RDDIV = 100;
-
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
- for (i = 0; i < MAX_MODES; i++) {
- // Threshold here seem unecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[]
- int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
-
- if (cpi->sf.thresh_mult[i] < thresh_max) {
- cpi->rd_threshes[bsize][i] =
+ if (cpi->sf.thresh_mult[i] < thresh_max) {
+ cpi->rd_threshes[bsize][i] =
cpi->sf.thresh_mult[i] * q *
rd_thresh_block_size_factor[bsize] / 4;
- } else {
- cpi->rd_threshes[bsize][i] = INT_MAX;
- }
+ } else {
+ cpi->rd_threshes[bsize][i] = INT_MAX;
}
}
}
@@ -554,9 +531,13 @@ struct rdcost_block_args {
TX_SIZE tx_size;
int bw;
int bh;
- int rate;
- int64_t dist;
- int64_t sse;
+ int rate[256];
+ int64_t dist[256];
+ int64_t sse[256];
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_sse;
+ int64_t this_rd;
int64_t best_rd;
int skip;
const int16_t *scan, *nb;
@@ -573,17 +554,17 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
int shift = args->tx_size == TX_32X32 ? 0 : 2;
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
+ args->dist[block] = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
- args->sse += this_sse >> shift;
+ args->sse[block] = this_sse >> shift;
if (x->skip_encode &&
xd->this_mi->mbmi.ref_frame[0] == INTRA_FRAME) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> shift;
- args->dist += p;
- args->sse += p;
+ args->dist[block] = p;
+ args->sse[block] = p;
}
}
@@ -594,10 +575,10 @@ static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
int x_idx, y_idx;
txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
- args->rate += cost_coeffs(args->x, plane, block,
- args->t_above + x_idx,
- args->t_left + y_idx, args->tx_size,
- args->scan, args->nb);
+ args->rate[block] = cost_coeffs(args->x, plane, block,
+ args->t_above + x_idx,
+ args->t_left + y_idx, args->tx_size,
+ args->scan, args->nb);
}
static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -610,16 +591,6 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
if (args->skip)
return;
- rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
- rd = MIN(rd1, rd2);
- if (rd > args->best_rd) {
- args->skip = 1;
- args->rate = INT_MAX;
- args->dist = INT64_MAX;
- args->sse = INT64_MAX;
- return;
- }
if (!is_inter_block(&xd->this_mi->mbmi))
vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
@@ -628,6 +599,56 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
dist_block(plane, block, tx_size, args);
rate_block(plane, block, plane_bsize, tx_size, args);
+ rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
+ rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
+
+ // TODO(jingning): temporarily enabled only for luma component
+ rd = MIN(rd1, rd2);
+ if (plane == 0)
+ x->zcoeff_blk[tx_size][block] = rd1 > rd2;
+
+ args->this_rate += args->rate[block];
+ args->this_dist += args->dist[block];
+ args->this_sse += args->sse[block];
+ args->this_rd += rd;
+
+ if (args->this_rd > args->best_rd) {
+ args->skip = 1;
+ return;
+ }
+}
+
+void vp9_get_entropy_contexts(TX_SIZE tx_size,
+ ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
+ const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
+ int num_4x4_w, int num_4x4_h) {
+ int i;
+ switch (tx_size) {
+ case TX_4X4:
+ vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+ case TX_8X8:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_16X16:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+ case TX_32X32:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+ default:
+ assert(!"Invalid transform size.");
+ }
}
static void txfm_rd_in_plane(MACROBLOCK *x,
@@ -638,45 +659,33 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
- int i;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
+
struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size,
- num_4x4_blocks_wide, num_4x4_blocks_high,
- 0, 0, 0, ref_best_rd, 0 };
+ num_4x4_w, num_4x4_h,
+ { 0 }, { 0 }, { 0 },
+ 0, 0, 0, 0, ref_best_rd, 0 };
if (plane == 0)
xd->this_mi->mbmi.tx_size = tx_size;
+ vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
+ pd->above_context, pd->left_context,
+ num_4x4_w, num_4x4_h);
switch (tx_size) {
case TX_4X4:
- vpx_memcpy(&args.t_above, pd->above_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
- vpx_memcpy(&args.t_left, pd->left_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0),
&args.scan, &args.nb);
break;
case TX_8X8:
- for (i = 0; i < num_4x4_blocks_wide; i += 2)
- args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_blocks_high; i += 2)
- args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd),
&args.scan, &args.nb);
break;
case TX_16X16:
- for (i = 0; i < num_4x4_blocks_wide; i += 4)
- args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_blocks_high; i += 4)
- args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd),
&args.scan, &args.nb);
break;
case TX_32X32:
- for (i = 0; i < num_4x4_blocks_wide; i += 8)
- args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_blocks_high; i += 8)
- args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
args.scan = vp9_default_scan_32x32;
args.nb = vp9_default_scan_32x32_neighbors;
break;
@@ -685,10 +694,17 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
}
foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args);
- *distortion = args.dist;
- *rate = args.rate;
- *sse = args.sse;
- *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip);
+ if (args.skip) {
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ } else {
+ *distortion = args.this_dist;
+ *rate = args.this_rate;
+ *sse = args.this_sse;
+ *skippable = vp9_is_skippable_in_plane(xd, bsize, plane);
+ }
}
static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
@@ -696,15 +712,15 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
- const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
(cm->tx_mode == ALLOW_32X32 ||
cm->tx_mode == TX_MODE_SELECT)) {
mbmi->tx_size = TX_32X32;
- } else if (max_txfm_size >= TX_16X16 &&
+ } else if (max_tx_size >= TX_16X16 &&
(cm->tx_mode == ALLOW_16X16 ||
cm->tx_mode == ALLOW_32X32 ||
cm->tx_mode == TX_MODE_SELECT)) {
@@ -717,7 +733,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
txfm_rd_in_plane(x, rate, distortion, skip,
&sse[mbmi->tx_size], ref_best_rd, 0, bs,
mbmi->tx_size);
- cpi->txfm_stepdown_count[0]++;
+ cpi->tx_stepdown_count[0]++;
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -811,15 +827,15 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
rd[TX_32X32][1] < rd[TX_16X16][1] &&
rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[0]++;
+ cpi->tx_stepdown_count[0]++;
} else if (max_tx_size >= TX_16X16 &&
rd[TX_16X16][1] < rd[TX_8X8][1] &&
rd[TX_16X16][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
- cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
@@ -829,7 +845,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
int *s, int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
- const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
@@ -845,9 +861,9 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
// for (n = TX_4X4; n <= max_txfm_size; n++)
// r[n][0] = (r[n][0] * scale_r[n]);
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
r[n][1] = r[n][0];
- for (m = 0; m <= n - (n == max_txfm_size); m++) {
+ for (m = 0; m <= n - (n == max_tx_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
@@ -859,7 +875,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
if (s[n]) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
@@ -867,19 +883,19 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
}
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
}
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
(cm->tx_mode == ALLOW_32X32 ||
(cm->tx_mode == TX_MODE_SELECT &&
rd[TX_32X32][1] <= rd[TX_16X16][1] &&
rd[TX_32X32][1] <= rd[TX_8X8][1] &&
rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
mbmi->tx_size = TX_32X32;
- } else if (max_txfm_size >= TX_16X16 &&
+ } else if (max_tx_size >= TX_16X16 &&
(cm->tx_mode == ALLOW_16X16 ||
cm->tx_mode == ALLOW_32X32 ||
(cm->tx_mode == TX_MODE_SELECT &&
@@ -901,19 +917,19 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size],
ref_best_rd, 0, bs, mbmi->tx_size);
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
rd[TX_32X32][1] <= rd[TX_16X16][1] &&
rd[TX_32X32][1] <= rd[TX_8X8][1] &&
rd[TX_32X32][1] <= rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[0]++;
- } else if (max_txfm_size >= TX_16X16 &&
+ cpi->tx_stepdown_count[0]++;
+ } else if (max_tx_size >= TX_16X16 &&
rd[TX_16X16][1] <= rd[TX_8X8][1] &&
rd[TX_16X16][1] <= rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
- cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
@@ -1058,6 +1074,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
int64_t ssz;
const int16_t *scan;
+ const int16_t *nb;
uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
@@ -1083,10 +1100,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
x->quantize_b_4x4(x, block, tx_type, 16);
}
- scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
+ get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block),
+ &scan, &nb);
ratey += cost_coeffs(x, 0, block,
- tempa + idx, templ + idy, TX_4X4, scan,
- vp9_get_coef_neighbors_handle(scan));
+ tempa + idx, templ + idy, TX_4X4, scan, nb);
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &ssz) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1458,11 +1475,12 @@ static int labels2mode(MACROBLOCK *x, int i,
switch (m = this_mode) {
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
- 102);
+ thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+ mvjcost, mvcost, 102);
if (has_second_rf) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
- thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
+ thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
+ &second_best_ref_mv->as_mv,
mvjcost, mvcost, 102);
}
break;
@@ -1796,20 +1814,23 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
// adjust src pointer for this block
mi_buf_shift(x, i);
if (cpi->sf.search_method == HEX) {
- bestsme = vp9_hex_search(x, &mvp_full,
+ bestsme = vp9_hex_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1, v_fn_ptr, 1,
- bsi->ref_mv, &mode_mv[NEWMV]);
+ &bsi->ref_mv->as_mv,
+ &mode_mv[NEWMV].as_mv);
} else if (cpi->sf.search_method == SQUARE) {
- bestsme = vp9_square_search(x, &mvp_full,
+ bestsme = vp9_square_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1, v_fn_ptr, 1,
- bsi->ref_mv, &mode_mv[NEWMV]);
+ &bsi->ref_mv->as_mv,
+ &mode_mv[NEWMV].as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
- bestsme = vp9_bigdia_search(x, &mvp_full,
+ bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1, v_fn_ptr, 1,
- bsi->ref_mv, &mode_mv[NEWMV]);
+ &bsi->ref_mv->as_mv,
+ &mode_mv[NEWMV].as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
@@ -1840,8 +1861,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int distortion;
unsigned int sse;
- cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
- bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ cpi->find_fractional_mv_step(x,
+ &mode_mv[NEWMV].as_mv,
+ &bsi->ref_mv->as_mv,
+ x->errorperbit, v_fn_ptr,
0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&distortion, &sse);
@@ -2220,11 +2243,12 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
- // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
- // doesn't actually work this way
- memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
- memcpy(ctx->best_filter_diff, best_filter_diff,
- sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
+ vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],
+ sizeof(ctx->zcoeff_blk));
+
+ vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
+ vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
+ sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
}
static void setup_pred_block(const MACROBLOCKD *xd,
@@ -2403,23 +2427,23 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
if (cpi->sf.search_method == HEX) {
- bestsme = vp9_hex_search(x, &mvp_full,
+ bestsme = vp9_hex_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1,
&cpi->fn_ptr[block_size], 1,
- &ref_mv, tmp_mv);
+ &ref_mv.as_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == SQUARE) {
- bestsme = vp9_square_search(x, &mvp_full,
+ bestsme = vp9_square_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1,
&cpi->fn_ptr[block_size], 1,
- &ref_mv, tmp_mv);
+ &ref_mv.as_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
- bestsme = vp9_bigdia_search(x, &mvp_full,
+ bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1,
&cpi->fn_ptr[block_size], 1,
- &ref_mv, tmp_mv);
+ &ref_mv.as_mv, &tmp_mv->as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 1,
@@ -2435,16 +2459,15 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
+ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
- *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
- x->nmvjointcost, x->mvcost,
- 96);
+ *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
+ x->nmvjointcost, x->mvcost, 96);
if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
x->pred_mv[ref].as_int = tmp_mv->as_int;
@@ -2570,8 +2593,8 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int sse;
bestsme = cpi->find_fractional_mv_step_comp(
- x, &tmp_mv,
- &ref_mv[id],
+ x, &tmp_mv.as_mv,
+ &ref_mv[id].as_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
0, cpi->sf.subpel_iters_per_step,
@@ -2603,11 +2626,11 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[1] = backup_second_yv12[i];
}
- *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
- &mbmi->ref_mvs[refs[0]][0],
+ *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi->ref_mvs[refs[0]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
- *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
- &mbmi->ref_mvs[refs[1]][0],
+ *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &mbmi->ref_mvs[refs[1]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
vpx_free(second_pred);
@@ -2630,7 +2653,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
- const int is_comp_pred = (mbmi->ref_frame[1] > 0);
+ const int is_comp_pred = has_second_ref(mbmi);
const int num_refs = is_comp_pred ? 2 : 1;
const int this_mode = mbmi->mode;
int_mv *frame_mv = mode_mv[this_mode];
@@ -2659,11 +2682,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
joint_motion_search(cpi, x, bsize, frame_mv,
mi_row, mi_col, single_newmv, &rate_mv);
} else {
- rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
- &mbmi->ref_mvs[refs[0]][0],
+ rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi->ref_mvs[refs[0]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
- rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
- &mbmi->ref_mvs[refs[1]][0],
+ rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &mbmi->ref_mvs[refs[1]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
}
if (frame_mv[refs[0]].as_int == INVALID_MV ||
@@ -3071,8 +3094,12 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
*returndist = dist_y + dist_uv;
if (cpi->sf.tx_size_search_method == USE_FULL_RD)
- for (i = 0; i < TX_MODES; i++)
- ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
+ for (i = 0; i < TX_MODES; i++) {
+ if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
+ ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
+ else
+ ctx->tx_rd_diff[i] = 0;
+ }
}
ctx->mic = *xd->this_mi;
@@ -3139,8 +3166,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
int best_skip2 = 0;
+ unsigned char best_zcoeff_blk[256] = { 0 };
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
+ vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk));
+ vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk));
for (i = 0; i < 4; i++) {
int j;
@@ -3812,6 +3842,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info;
+ vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
+ sizeof(best_zcoeff_blk));
if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)
for (i = 0; i < 4; i++)
@@ -3993,13 +4025,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
best_mbmode.sb_type < BLOCK_8X8) {
for (i = 0; i < 4; i++)
- xd->this_mi->bmi[i].as_mv[0].as_int =
- best_bmodes[i].as_mv[0].as_int;
+ xd->this_mi->bmi[i].as_mv[0].as_int = best_bmodes[i].as_mv[0].as_int;
- if (mbmi->ref_frame[1] > 0)
+ if (has_second_ref(mbmi))
for (i = 0; i < 4; i++)
- xd->this_mi->bmi[i].as_mv[1].as_int =
- best_bmodes[i].as_mv[1].as_int;
+ xd->this_mi->bmi[i].as_mv[1].as_int = best_bmodes[i].as_mv[1].as_int;
*x->partition_info = best_partition;
@@ -4007,6 +4037,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
}
+ vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
+ sizeof(best_zcoeff_blk));
+
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
best_pred_diff[i] = INT_MIN;
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index eba7df907..9796c0d7c 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -33,4 +33,9 @@ void vp9_init_me_luts();
void vp9_set_mbmode_and_mvs(MACROBLOCK *x,
MB_PREDICTION_MODE mb, int_mv *mv);
+void vp9_get_entropy_contexts(TX_SIZE tx_size,
+ ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
+ const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
+ int num_4x4_w, int num_4x4_h);
+
#endif // VP9_ENCODER_VP9_RDOPT_H_
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 10655e8a7..874b71ab1 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -130,6 +130,8 @@ static void count_segs(VP9_COMP *cpi, MODE_INFO **mi_8x8,
return;
segment_id = mi_8x8[0]->mbmi.segment_id;
+ xd->mi_8x8 = mi_8x8;
+ xd->this_mi = mi_8x8[0];
set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 63826eea5..1768b5bed 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -154,10 +154,10 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
// TODO Check that the 16x16 vf & sdf are selected here
// Ignore mv costing by sending NULL pointer instead of cost arrays
ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0];
- bestsme = vp9_hex_search(x, &best_ref_mv1_full,
+ bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv,
step_param, sadpb, 1,
&cpi->fn_ptr[BLOCK_16X16],
- 0, &best_ref_mv1, ref_mv);
+ 0, &best_ref_mv1.as_mv, &ref_mv->as_mv);
#if ALT_REF_SUBPEL_ENABLED
// Try sub-pixel MC?
@@ -166,8 +166,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int distortion;
unsigned int sse;
// Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, ref_mv,
- &best_ref_mv1,
+ bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv,
+ &best_ref_mv1.as_mv,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
0, cpi->sf.subpel_iters_per_step,
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index a59f6db88..7c14c18aa 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -137,8 +137,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
pt = get_entropy_context(tx_size, pd->above_context + aoff,
pd->left_context + loff);
- get_scan_and_band(xd, tx_size, type, block, &scan, &band_translate);
- nb = vp9_get_coef_neighbors_handle(scan);
+ get_scan_and_band(xd, tx_size, type, block, &scan, &nb, &band_translate);
c = 0;
do {
const int band = get_coef_band(band_translate, c);
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index 155ba8a3e..991ef4d29 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -8,13 +8,150 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp9_rtcd.h"
-#include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_filter.h"
-#include "vp9/common/vp9_subpelvar.h"
-#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
-#include "./vp9_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_convolve.h"
+#include "vp9/common/vp9_filter.h"
+#include "vp9/encoder/vp9_variance.h"
+
+static void variance(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ int w,
+ int h,
+ unsigned int *sse,
+ int *sum) {
+ int i, j;
+ int diff;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ diff = src_ptr[j] - ref_ptr[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ src_ptr += source_stride;
+ ref_ptr += recon_stride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_first_pass
+ *
+ * INPUTS : uint8_t *src_ptr : Pointer to source block.
+ * uint32_t src_pixels_per_line : Stride of input block.
+ * uint32_t pixel_step : Offset between filter input
+ * samples (see notes).
+ * uint32_t output_height : Input block height.
+ * uint32_t output_width : Input block width.
+ * int32_t *vp9_filter : Array of 2 bi-linear filter
+ * taps.
+ *
+ * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement first-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ * pixel_step defines whether the filter is applied
+ * horizontally (pixel_step=1) or vertically (pixel_step=
+ * stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
+ uint16_t *output_ptr,
+ unsigned int src_pixels_per_line,
+ int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const int16_t *vp9_filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
+ (int)src_ptr[pixel_step] * vp9_filter[1],
+ FILTER_BITS);
+
+ src_ptr++;
+ }
+
+ // Next row...
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_second_pass
+ *
+ * INPUTS : int32_t *src_ptr : Pointer to source block.
+ * uint32_t src_pixels_per_line : Stride of input block.
+ * uint32_t pixel_step : Offset between filter input
+ * samples (see notes).
+ * uint32_t output_height : Input block height.
+ * uint32_t output_width : Input block width.
+ * int32_t *vp9_filter : Array of 2 bi-linear filter
+ * taps.
+ *
+ * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement second-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by
+ * filter_block2d_bil_first_pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ * pixel_step defines whether the filter is applied
+ * horizontally (pixel_step=1) or vertically (pixel_step=
+ * stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
+ uint8_t *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const int16_t *vp9_filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
+ (int)src_ptr[pixel_step] * vp9_filter[1],
+ FILTER_BITS);
+ src_ptr++;
+ }
+
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 687fb487c..7d040f7db 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -48,7 +48,6 @@ VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
VP9_COMMON_SRCS-yes += common/vp9_sadmxn.h
-VP9_COMMON_SRCS-yes += common/vp9_subpelvar.h
VP9_COMMON_SRCS-yes += common/vp9_scale.h
VP9_COMMON_SRCS-yes += common/vp9_scale.c
VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
@@ -92,7 +91,6 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
@@ -109,5 +107,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 08a1a8458..157752a86 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -89,7 +89,7 @@ struct vpx_codec_alg_priv {
unsigned int fixed_kf_cntr;
};
-static const VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
+static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
switch (frame) {
case VP8_LAST_FRAME:
return VP9_LAST_FLAG;