summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_idct16x16_neon.c44
-rw-r--r--vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm14
-rw-r--r--vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm14
-rw-r--r--vp9/common/vp9_blockd.h2
-rw-r--r--vp9/common/vp9_entropy.h5
-rw-r--r--vp9/common/vp9_entropymode.c15
-rw-r--r--vp9/common/vp9_entropymode.h13
-rw-r--r--vp9/common/vp9_entropymv.c8
-rw-r--r--vp9/common/vp9_entropymv.h16
-rw-r--r--vp9/common/vp9_idct.c139
-rw-r--r--vp9/common/vp9_idct.h32
-rw-r--r--vp9/common/vp9_rtcd_defs.sh42
-rw-r--r--vp9/common/vp9_systemdependent.h4
-rw-r--r--vp9/common/vp9_treecoder.h2
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c178
-rw-r--r--vp9/decoder/vp9_decodemv.c28
-rw-r--r--vp9/decoder/vp9_decodframe.c27
-rw-r--r--vp9/decoder/vp9_dsubexp.c12
-rw-r--r--vp9/decoder/vp9_dsubexp.h2
-rw-r--r--vp9/decoder/vp9_onyxd_if.c41
-rw-r--r--vp9/encoder/vp9_bitstream.c34
-rw-r--r--vp9/encoder/vp9_block.h1
-rw-r--r--vp9/encoder/vp9_dct.c25
-rw-r--r--vp9/encoder/vp9_encodeframe.c2
-rw-r--r--vp9/encoder/vp9_encodemb.c6
-rw-r--r--vp9/encoder/vp9_onyx_if.c2
-rw-r--r--vp9/encoder/vp9_rdopt.c23
-rw-r--r--vp9/encoder/vp9_rdopt.h4
-rw-r--r--vp9/encoder/vp9_subexp.c3
-rw-r--r--vp9/encoder/vp9_subexp.h2
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c5
-rw-r--r--vp9/vp9_dx_iface.c6
32 files changed, 365 insertions, 386 deletions
diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c
index 33aa4e001..0b9fc09ab 100644
--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c
+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c
@@ -11,31 +11,31 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
-extern void vp9_idct16x16_256_add_neon_pass1(int16_t *input,
- int16_t *output,
- int output_stride);
-extern void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
- int16_t *output,
- int16_t *pass1Output,
- int16_t skip_adding,
- uint8_t *dest,
- int dest_stride);
-extern void vp9_idct16x16_10_add_neon_pass1(int16_t *input,
- int16_t *output,
- int output_stride);
-extern void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
- int16_t *output,
- int16_t *pass1Output,
- int16_t skip_adding,
- uint8_t *dest,
- int dest_stride);
+void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,
+ int16_t *output,
+ int output_stride);
+void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,
+ int16_t *output,
+ int16_t *pass1Output,
+ int16_t skip_adding,
+ uint8_t *dest,
+ int dest_stride);
+void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,
+ int16_t *output,
+ int output_stride);
+void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
+ int16_t *output,
+ int16_t *pass1Output,
+ int16_t skip_adding,
+ uint8_t *dest,
+ int dest_stride);
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
extern void vp9_push_neon(int64_t *store);
extern void vp9_pop_neon(int64_t *store);
-void vp9_idct16x16_256_add_neon(int16_t *input,
- uint8_t *dest, int dest_stride) {
+void vp9_idct16x16_256_add_neon(const int16_t *input,
+ uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
@@ -109,8 +109,8 @@ void vp9_idct16x16_256_add_neon(int16_t *input,
return;
}
-void vp9_idct16x16_10_add_neon(int16_t *input,
- uint8_t *dest, int dest_stride) {
+void vp9_idct16x16_10_add_neon(const int16_t *input,
+ uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
diff --git a/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm b/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
index 963ef35da..2f326e24c 100644
--- a/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
@@ -8,7 +8,7 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_short_iht4x4_add_neon|
+ EXPORT |vp9_iht4x4_16_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -139,7 +139,7 @@
MEND
AREA Block, CODE, READONLY ; name this block of code
-;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest,
+;void vp9_iht4x4_16_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride, int tx_type)
;
; r0 int16_t input
@@ -147,7 +147,7 @@
; r2 int dest_stride
; r3 int tx_type)
; This function will only handle tx_type of 1,2,3.
-|vp9_short_iht4x4_add_neon| PROC
+|vp9_iht4x4_16_add_neon| PROC
; load the inputs into d16-d19
vld1.s16 {q8,q9}, [r0]!
@@ -175,7 +175,7 @@ iadst_idct
; then transform columns
IADST4x4_1D
- b end_vp9_short_iht4x4_add_neon
+ b end_vp9_iht4x4_16_add_neon
idct_iadst
; generate constants
@@ -191,7 +191,7 @@ idct_iadst
; then transform columns
IDCT4x4_1D
- b end_vp9_short_iht4x4_add_neon
+ b end_vp9_iht4x4_16_add_neon
iadst_iadst
; generate constants
@@ -206,7 +206,7 @@ iadst_iadst
; then transform columns
IADST4x4_1D
-end_vp9_short_iht4x4_add_neon
+end_vp9_iht4x4_16_add_neon
; ROUND_POWER_OF_TWO(temp_out[j], 4)
vrshr.s16 q8, q8, #4
vrshr.s16 q9, q9, #4
@@ -232,6 +232,6 @@ end_vp9_short_iht4x4_add_neon
vst1.32 {d26[1]}, [r1], r2
vst1.32 {d26[0]}, [r1] ; no post-increment
bx lr
- ENDP ; |vp9_short_iht4x4_add_neon|
+ ENDP ; |vp9_iht4x4_16_add_neon|
END
diff --git a/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm b/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
index bab9cb4a4..93d3af301 100644
--- a/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
@@ -8,7 +8,7 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_short_iht8x8_add_neon|
+ EXPORT |vp9_iht8x8_64_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -559,7 +559,7 @@
AREA Block, CODE, READONLY ; name this block of code
-;void vp9_short_iht8x8_add_neon(int16_t *input, uint8_t *dest,
+;void vp9_iht8x8_64_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride, int tx_type)
;
; r0 int16_t input
@@ -567,7 +567,7 @@
; r2 int dest_stride
; r3 int tx_type)
; This function will only handle tx_type of 1,2,3.
-|vp9_short_iht8x8_add_neon| PROC
+|vp9_iht8x8_64_add_neon| PROC
; load the inputs into d16-d19
vld1.s16 {q8,q9}, [r0]!
@@ -602,7 +602,7 @@ iadst_idct
; then transform columns
IADST8X8_1D
- b end_vp9_short_iht8x8_add_neon
+ b end_vp9_iht8x8_64_add_neon
idct_iadst
; generate IADST constants
@@ -620,7 +620,7 @@ idct_iadst
; then transform columns
IDCT8x8_1D
- b end_vp9_short_iht8x8_add_neon
+ b end_vp9_iht8x8_64_add_neon
iadst_iadst
; generate IADST constants
@@ -635,7 +635,7 @@ iadst_iadst
; then transform columns
IADST8X8_1D
-end_vp9_short_iht8x8_add_neon
+end_vp9_iht8x8_64_add_neon
pop {r0-r10}
; ROUND_POWER_OF_TWO(temp_out[j], 5)
@@ -691,6 +691,6 @@ end_vp9_short_iht8x8_add_neon
vst1.64 {d6}, [r0], r2
vst1.64 {d7}, [r0], r2
bx lr
- ENDP ; |vp9_short_iht8x8_add_neon|
+ ENDP ; |vp9_iht8x8_64_add_neon|
END
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index b1af13891..0538b37ac 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -221,7 +221,7 @@ typedef struct macroblockd {
int lossless;
/* Inverse transform function pointers. */
- void (*itxm_add)(int16_t *input, uint8_t *dest, int stride, int eob);
+ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
struct subpix_fn_table subpix;
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 3cf508e05..02178b579 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -18,6 +18,8 @@
#include "vp9/common/vp9_scan.h"
#include "vp9/common/vp9_treecoder.h"
+#define DIFF_UPDATE_PROB 252
+
/* Coefficient token alphabet */
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
@@ -208,7 +210,4 @@ static void get_scan_and_band(const MACROBLOCKD *xd, TX_SIZE tx_size,
}
}
-
-enum { VP9_COEF_UPDATE_PROB = 252 };
-
#endif // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index e17679616..56e644460 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
};
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
-const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = {
+const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
-DC_PRED, 2, /* 0 = DC_NODE */
-TM_PRED, 4, /* 1 = TM_NODE */
-V_PRED, 6, /* 2 = V_NODE */
@@ -237,22 +237,20 @@ const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = {
-D63_PRED, 16, /* 7 = D63_NODE */
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
};
+struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
-const vp9_tree_index vp9_inter_mode_tree[6] = {
+const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
-ZEROMV, 2,
-NEARESTMV, 4,
-NEARMV, -NEWMV
};
+struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
-const vp9_tree_index vp9_partition_tree[6] = {
+const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
-PARTITION_NONE, 2,
-PARTITION_HORZ, 4,
-PARTITION_VERT, -PARTITION_SPLIT
};
-
-struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
-struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
-
struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
@@ -338,7 +336,8 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
}
-const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = {
+const vp9_tree_index vp9_switchable_interp_tree
+ [TREE_SIZE(SWITCHABLE_FILTERS)] = {
-EIGHTTAP, 2,
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
};
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index ccade2752..ab37b75c6 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -15,7 +15,6 @@
#include "vp9/common/vp9_treecoder.h"
#define TX_SIZE_CONTEXTS 2
-#define MODE_UPDATE_PROB 252
#define SWITCHABLE_FILTERS 3 // number of switchable filters
// #define MODE_STATS
@@ -38,19 +37,17 @@ extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
[INTRA_MODES - 1];
-extern const vp9_tree_index vp9_intra_mode_tree[];
-extern const vp9_tree_index vp9_inter_mode_tree[];
-
+extern const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
+
+extern const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)];
extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
-// probability models for partition information
-extern const vp9_tree_index vp9_partition_tree[];
+extern const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)];
extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
extern const vp9_tree_index vp9_switchable_interp_tree
- [2 * (SWITCHABLE_FILTERS - 1)];
-
+ [TREE_SIZE(SWITCHABLE_FILTERS)];
extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
void vp9_entropy_mode_init();
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index baff637e8..e85118118 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -18,14 +18,14 @@
/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
#define COMPANDED_MVREF_THRESH 8
-const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
+const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
-MV_JOINT_ZERO, 2,
-MV_JOINT_HNZVZ, 4,
-MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
};
struct vp9_token vp9_mv_joint_encodings[MV_JOINTS];
-const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
+const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
-MV_CLASS_0, 2,
-MV_CLASS_1, 4,
6, 8,
@@ -39,12 +39,12 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
};
struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
-const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2] = {
+const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
-0, -1,
};
struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
-const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2] = {
+const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(4)] = {
-0, 2,
-1, 4,
-2, -3
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 3b782ab0a..c42653d42 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -43,9 +43,6 @@ static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) {
return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ;
}
-extern const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2];
-extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS];
-
/* Symbols for coding magnitude class of nonzero components */
#define MV_CLASSES 11
typedef enum {
@@ -62,9 +59,6 @@ typedef enum {
MV_CLASS_10 = 10, /* (1024,2048] integer pel */
} MV_CLASS_TYPE;
-extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2];
-extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
-
#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
#define CLASS0_SIZE (1 << CLASS0_BITS)
#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
@@ -77,10 +71,16 @@ extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
#define MV_LOW (-(1 << MV_IN_USE_BITS))
-extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2];
+extern const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)];
+extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS];
+
+extern const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)];
+extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
+
+extern const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)];
extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
-extern const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2];
+extern const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(4)];
extern struct vp9_token vp9_mv_fp_encodings[4];
typedef struct {
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 78d10877a..52b039d99 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -18,13 +18,13 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
-void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
0.5 shifts per pixel. */
int i;
int16_t output[16];
int a1, b1, c1, d1, e1;
- int16_t *ip = input;
+ const int16_t *ip = input;
int16_t *op = output;
for (i = 0; i < 4; i++) {
@@ -60,21 +60,21 @@ void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
c1 = e1 - c1;
a1 -= b1;
d1 += c1;
- dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);
- dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1);
- dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1);
- dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1);
+ dest[stride * 0] = clip_pixel(dest[stride * 0] + a1);
+ dest[stride * 1] = clip_pixel(dest[stride * 1] + b1);
+ dest[stride * 2] = clip_pixel(dest[stride * 2] + c1);
+ dest[stride * 3] = clip_pixel(dest[stride * 3] + d1);
ip++;
dest++;
}
}
-void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
+void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
int i;
int a1, e1;
int16_t tmp[4];
- int16_t *ip = in;
+ const int16_t *ip = in;
int16_t *op = tmp;
a1 = ip[0] >> UNIT_QUANT_SHIFT;
@@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
}
}
-static void idct4_1d(int16_t *input, int16_t *output) {
+static void idct4_1d(const int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
// stage 1
@@ -116,7 +116,7 @@ static void idct4_1d(int16_t *input, int16_t *output) {
output[3] = step[0] - step[3];
}
-void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[4 * 4];
int16_t *outptr = out;
int i, j;
@@ -135,12 +135,12 @@ void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 4 + i];
idct4_1d(temp_in, temp_out);
for (j = 0; j < 4; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ + dest[j * stride + i]);
}
}
-void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
int i;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
@@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
}
}
-static void idct8_1d(int16_t *input, int16_t *output) {
+static void idct8_1d(const int16_t *input, int16_t *output) {
int16_t step1[8], step2[8];
int temp1, temp2;
// stage 1
@@ -201,7 +201,7 @@ static void idct8_1d(int16_t *input, int16_t *output) {
output[7] = step1[0] - step1[7];
}
-void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[8 * 8];
int16_t *outptr = out;
int i, j;
@@ -220,12 +220,12 @@ void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 8 + i];
idct8_1d(temp_in, temp_out);
for (j = 0; j < 8; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ + dest[j * stride + i]);
}
}
-void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
@@ -234,11 +234,11 @@ void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i)
dest[i] = clip_pixel(dest[i] + a1);
- dest += dest_stride;
+ dest += stride;
}
}
-static void iadst4_1d(int16_t *input, int16_t *output) {
+static void iadst4_1d(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[0];
@@ -280,8 +280,8 @@ static void iadst4_1d(int16_t *input, int16_t *output) {
output[3] = dct_const_round_shift(s3);
}
-void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride,
- int tx_type) {
+void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
const transform_2d IHT_4[] = {
{ idct4_1d, idct4_1d }, // DCT_DCT = 0
{ iadst4_1d, idct4_1d }, // ADST_DCT = 1
@@ -307,11 +307,11 @@ void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride,
temp_in[j] = out[j * 4 + i];
IHT_4[tx_type].cols(temp_in, temp_out);
for (j = 0; j < 4; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ + dest[j * stride + i]);
}
}
-static void iadst8_1d(int16_t *input, int16_t *output) {
+static void iadst8_1d(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[7];
@@ -395,8 +395,8 @@ static const transform_2d IHT_8[] = {
{ iadst8_1d, iadst8_1d } // ADST_ADST = 3
};
-void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride,
- int tx_type) {
+void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
int i, j;
int16_t out[8 * 8];
int16_t *outptr = out;
@@ -416,12 +416,12 @@ void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride,
temp_in[j] = out[j * 8 + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
- + dest[j * dest_stride + i]); }
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ + dest[j * stride + i]);
+ }
}
-void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[8 * 8] = { 0 };
int16_t *outptr = out;
int i, j;
@@ -441,12 +441,12 @@ void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
temp_in[j] = out[j * 8 + i];
idct8_1d(temp_in, temp_out);
for (j = 0; j < 8; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ + dest[j * stride + i]);
}
}
-static void idct16_1d(int16_t *input, int16_t *output) {
+static void idct16_1d(const int16_t *input, int16_t *output) {
int16_t step1[16], step2[16];
int temp1, temp2;
@@ -611,7 +611,7 @@ static void idct16_1d(int16_t *input, int16_t *output) {
output[15] = step2[0] - step2[15];
}
-void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[16 * 16];
int16_t *outptr = out;
int i, j;
@@ -630,12 +630,12 @@ void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ + dest[j * stride + i]);
}
}
-void iadst16_1d(int16_t *input, int16_t *output) {
+static void iadst16_1d(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -813,8 +813,8 @@ static const transform_2d IHT_16[] = {
{ iadst16_1d, iadst16_1d } // ADST_ADST = 3
};
-void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
- int tx_type) {
+void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
int i, j;
int16_t out[16 * 16];
int16_t *outptr = out;
@@ -834,12 +834,11 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
temp_in[j] = out[j * 16 + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * dest_stride + i]); }
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ + dest[j * stride + i]); }
}
-void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[16 * 16] = { 0 };
int16_t *outptr = out;
int i, j;
@@ -859,13 +858,12 @@ void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
temp_in[j] = out[j*16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ + dest[j * stride + i]);
}
}
-void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
@@ -874,11 +872,11 @@ void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i)
dest[i] = clip_pixel(dest[i] + a1);
- dest += dest_stride;
+ dest += stride;
}
}
-static void idct32_1d(int16_t *input, int16_t *output) {
+static void idct32_1d(const int16_t *input, int16_t *output) {
int16_t step1[32], step2[32];
int temp1, temp2;
@@ -1245,7 +1243,7 @@ static void idct32_1d(int16_t *input, int16_t *output) {
output[31] = step1[0] - step1[31];
}
-void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[32 * 32];
int16_t *outptr = out;
int i, j;
@@ -1277,13 +1275,12 @@ void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * dest_stride + i]);
+ dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ + dest[j * stride + i]);
}
}
-void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
@@ -1294,12 +1291,12 @@ void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
for (j = 0; j < 32; ++j) {
for (i = 0; i < 32; ++i)
dest[i] = clip_pixel(dest[i] + a1);
- dest += dest_stride;
+ dest += stride;
}
}
// idct
-void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob > 1)
vp9_idct4x4_16_add(input, dest, stride);
else
@@ -1307,14 +1304,14 @@ void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
-void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob > 1)
vp9_iwht4x4_16_add(input, dest, stride);
else
vp9_iwht4x4_1_add(input, dest, stride);
}
-void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
@@ -1333,7 +1330,8 @@ void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
}
-void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride,
+ int eob) {
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if (eob) {
@@ -1347,7 +1345,8 @@ void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
}
-void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
+ int eob) {
if (eob) {
if (eob == 1)
vp9_idct32x32_1_add(input, dest, stride);
@@ -1357,32 +1356,32 @@ void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
// iht
-void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
- int eob) {
+void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
+ int stride, int eob) {
if (tx_type == DCT_DCT)
vp9_idct4x4_add(input, dest, stride, eob);
else
- vp9_short_iht4x4_add(input, dest, stride, tx_type);
+ vp9_iht4x4_16_add(input, dest, stride, tx_type);
}
-void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
- int stride, int eob) {
+void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
+ int stride, int eob) {
if (tx_type == DCT_DCT) {
vp9_idct8x8_add(input, dest, stride, eob);
} else {
if (eob > 0) {
- vp9_short_iht8x8_add(input, dest, stride, tx_type);
+ vp9_iht8x8_64_add(input, dest, stride, tx_type);
}
}
}
-void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
- int stride, int eob) {
+void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
+ int stride, int eob) {
if (tx_type == DCT_DCT) {
vp9_idct16x16_add(input, dest, stride, eob);
} else {
if (eob > 0) {
- vp9_short_iht16x16_add(input, dest, stride, tx_type);
+ vp9_iht16x16_256_add(input, dest, stride, tx_type);
}
}
}
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index e85404e7a..2b3f35f0a 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -81,27 +81,27 @@ static INLINE int dct_const_round_shift(int input) {
return rv;
}
-typedef void (*transform_1d)(int16_t*, int16_t*);
+typedef void (*transform_1d)(const int16_t*, int16_t*);
typedef struct {
transform_1d cols, rows; // vertical and horizontal
} transform_2d;
-
-void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
-void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
-void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob);
-void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob);
-void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob);
-
-void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
- int stride, int eob);
-
-void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
- int stride, int eob);
-
-void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
- int stride, int eob);
+void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
+
+void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int
+ eob);
+void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
+ int eob);
+
+void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
+ int stride, int eob);
+void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
+ int stride, int eob);
+void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
+ int stride, int eob);
#endif // VP9_COMMON_VP9_IDCT_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 31227ad54..526be87df 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -267,51 +267,51 @@ specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2
#
# dct
#
-prototype void vp9_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct4x4_1_add sse2 neon
-prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct4x4_16_add sse2 neon
-prototype void vp9_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_1_add sse2 neon
-prototype void vp9_idct8x8_64_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_64_add sse2 neon
-prototype void vp9_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_10_add sse2 neon
-prototype void vp9_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_1_add sse2 neon
-prototype void vp9_idct16x16_256_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_256_add sse2 neon
-prototype void vp9_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_10_add sse2 neon
-prototype void vp9_idct32x32_1024_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1024_add sse2 neon
-prototype void vp9_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1_add sse2
-prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_short_iht4x4_add sse2 neon
+prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
+specialize vp9_iht4x4_16_add sse2 neon
-prototype void vp9_short_iht8x8_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_short_iht8x8_add sse2 neon
+prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
+specialize vp9_iht8x8_64_add sse2 neon
-prototype void vp9_short_iht16x16_add "int16_t *input, uint8_t *output, int pitch, int tx_type"
-specialize vp9_short_iht16x16_add sse2
+prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type"
+specialize vp9_iht16x16_256_add sse2
# dct and add
-prototype void vp9_iwht4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_iwht4x4_1_add
-prototype void vp9_iwht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
+prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_iwht4x4_16_add
#
@@ -701,9 +701,6 @@ specialize vp9_short_fdct8x8 sse2
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct4x4 sse2
-prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fdct8x4 sse2
-
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct32x32 sse2
@@ -716,9 +713,6 @@ specialize vp9_short_fdct16x16 sse2
prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_walsh4x4
-prototype void vp9_short_walsh8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_walsh8x4
-
#
# Motion search
#
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index b8d161d19..254a431a3 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -24,8 +24,8 @@ void vpx_reset_mmx_state(void);
#define vp9_clear_system_state()
#endif
-#ifdef _MSC_VER
-// round is not defined in MSVC
+#if defined(_MSC_VER) && _MSC_VER < 1800
+// round is not defined in MSVC before VS2013.
static int round(double x) {
if (x < 0)
return (int)ceil(x - 0.5);
diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h
index 24e6fa295..4ba171f46 100644
--- a/vp9/common/vp9_treecoder.h
+++ b/vp9/common/vp9_treecoder.h
@@ -21,6 +21,8 @@ typedef uint8_t vp9_prob;
typedef int8_t vp9_tree_index;
+#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2)
+
#define vp9_complement(x) (255 - x)
/* We build coding trees compactly in arrays.
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index a2b0e8c73..cfec36b42 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -15,7 +15,7 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
-void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
@@ -26,10 +26,10 @@ void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
__m128i input0, input1, input2, input3;
// Rows
- input0 = _mm_loadl_epi64((__m128i *)input);
- input1 = _mm_loadl_epi64((__m128i *)(input + 4));
- input2 = _mm_loadl_epi64((__m128i *)(input + 8));
- input3 = _mm_loadl_epi64((__m128i *)(input + 12));
+ input0 = _mm_loadl_epi64((const __m128i *)input);
+ input1 = _mm_loadl_epi64((const __m128i *)(input + 4));
+ input2 = _mm_loadl_epi64((const __m128i *)(input + 8));
+ input3 = _mm_loadl_epi64((const __m128i *)(input + 12));
// Construct i3, i1, i3, i1, i2, i0, i2, i0
input0 = _mm_shufflelo_epi16(input0, 0xd8);
@@ -148,7 +148,7 @@ void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE4X4(dest, input3);
}
-void vp9_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
@@ -264,16 +264,16 @@ static void iadst4_1d_sse2(__m128i *in) {
in[3] = _mm_unpackhi_epi64(in[1], in[1]);
}
-void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride,
- int tx_type) {
+void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
__m128i in[4];
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
- in[0] = _mm_loadl_epi64((__m128i *)input);
- in[1] = _mm_loadl_epi64((__m128i *)(input + 4));
- in[2] = _mm_loadl_epi64((__m128i *)(input + 8));
- in[3] = _mm_loadl_epi64((__m128i *)(input + 12));
+ in[0] = _mm_loadl_epi64((const __m128i *)input);
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 4));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 8));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 12));
switch (tx_type) {
case 0: // DCT_DCT
@@ -494,7 +494,7 @@ void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride,
dest += stride; \
}
-void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
@@ -514,14 +514,14 @@ void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {
int i;
// Load input data.
- in0 = _mm_load_si128((__m128i *)input);
- in1 = _mm_load_si128((__m128i *)(input + 8 * 1));
- in2 = _mm_load_si128((__m128i *)(input + 8 * 2));
- in3 = _mm_load_si128((__m128i *)(input + 8 * 3));
- in4 = _mm_load_si128((__m128i *)(input + 8 * 4));
- in5 = _mm_load_si128((__m128i *)(input + 8 * 5));
- in6 = _mm_load_si128((__m128i *)(input + 8 * 6));
- in7 = _mm_load_si128((__m128i *)(input + 8 * 7));
+ in0 = _mm_load_si128((const __m128i *)input);
+ in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in4 = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in5 = _mm_load_si128((const __m128i *)(input + 8 * 5));
+ in6 = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in7 = _mm_load_si128((const __m128i *)(input + 8 * 7));
// 2-D
for (i = 0; i < 2; i++) {
@@ -562,7 +562,7 @@ void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, in7);
}
-void vp9_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
@@ -883,21 +883,21 @@ static void iadst8_1d_sse2(__m128i *in) {
}
-void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride,
- int tx_type) {
+void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
__m128i in[8];
const __m128i zero = _mm_setzero_si128();
const __m128i final_rounding = _mm_set1_epi16(1<<4);
// load input data
- in[0] = _mm_load_si128((__m128i *)input);
- in[1] = _mm_load_si128((__m128i *)(input + 8 * 1));
- in[2] = _mm_load_si128((__m128i *)(input + 8 * 2));
- in[3] = _mm_load_si128((__m128i *)(input + 8 * 3));
- in[4] = _mm_load_si128((__m128i *)(input + 8 * 4));
- in[5] = _mm_load_si128((__m128i *)(input + 8 * 5));
- in[6] = _mm_load_si128((__m128i *)(input + 8 * 6));
- in[7] = _mm_load_si128((__m128i *)(input + 8 * 7));
+ in[0] = _mm_load_si128((const __m128i *)input);
+ in[1] = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in[2] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in[3] = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in[4] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in[5] = _mm_load_si128((const __m128i *)(input + 8 * 5));
+ in[6] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in[7] = _mm_load_si128((const __m128i *)(input + 8 * 7));
switch (tx_type) {
case 0: // DCT_DCT
@@ -950,7 +950,7 @@ void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride,
RECON_AND_STORE(dest, in[7]);
}
-void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
@@ -970,10 +970,10 @@ void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
// Rows. Load 4-row input data.
- in0 = _mm_load_si128((__m128i *)input);
- in1 = _mm_load_si128((__m128i *)(input + 8 * 1));
- in2 = _mm_load_si128((__m128i *)(input + 8 * 2));
- in3 = _mm_load_si128((__m128i *)(input + 8 * 3));
+ in0 = _mm_load_si128((const __m128i *)input);
+ in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
// 8x4 Transpose
TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3)
@@ -1228,7 +1228,8 @@ void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
stp2_10, stp2_13, stp2_11, stp2_12) \
}
-void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
const __m128i zero = _mm_setzero_si128();
@@ -1283,22 +1284,22 @@ void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) {
if (i == 1) input += 128;
// Load input data.
- in0 = _mm_load_si128((__m128i *)input);
- in8 = _mm_load_si128((__m128i *)(input + 8 * 1));
- in1 = _mm_load_si128((__m128i *)(input + 8 * 2));
- in9 = _mm_load_si128((__m128i *)(input + 8 * 3));
- in2 = _mm_load_si128((__m128i *)(input + 8 * 4));
- in10 = _mm_load_si128((__m128i *)(input + 8 * 5));
- in3 = _mm_load_si128((__m128i *)(input + 8 * 6));
- in11 = _mm_load_si128((__m128i *)(input + 8 * 7));
- in4 = _mm_load_si128((__m128i *)(input + 8 * 8));
- in12 = _mm_load_si128((__m128i *)(input + 8 * 9));
- in5 = _mm_load_si128((__m128i *)(input + 8 * 10));
- in13 = _mm_load_si128((__m128i *)(input + 8 * 11));
- in6 = _mm_load_si128((__m128i *)(input + 8 * 12));
- in14 = _mm_load_si128((__m128i *)(input + 8 * 13));
- in7 = _mm_load_si128((__m128i *)(input + 8 * 14));
- in15 = _mm_load_si128((__m128i *)(input + 8 * 15));
+ in0 = _mm_load_si128((const __m128i *)input);
+ in8 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in1 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in9 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in2 = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in10 = _mm_load_si128((const __m128i *)(input + 8 * 5));
+ in3 = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in11 = _mm_load_si128((const __m128i *)(input + 8 * 7));
+ in4 = _mm_load_si128((const __m128i *)(input + 8 * 8));
+ in12 = _mm_load_si128((const __m128i *)(input + 8 * 9));
+ in5 = _mm_load_si128((const __m128i *)(input + 8 * 10));
+ in13 = _mm_load_si128((const __m128i *)(input + 8 * 11));
+ in6 = _mm_load_si128((const __m128i *)(input + 8 * 12));
+ in14 = _mm_load_si128((const __m128i *)(input + 8 * 13));
+ in7 = _mm_load_si128((const __m128i *)(input + 8 * 14));
+ in15 = _mm_load_si128((const __m128i *)(input + 8 * 15));
TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
in4, in5, in6, in7);
@@ -1435,7 +1436,7 @@ void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) {
}
}
-void vp9_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a, i;
@@ -2310,24 +2311,24 @@ static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) {
iadst16_1d_8col(in1);
}
-static INLINE void load_buffer_8x16(int16_t *input, __m128i *in) {
- in[0] = _mm_load_si128((__m128i *)(input + 0 * 16));
- in[1] = _mm_load_si128((__m128i *)(input + 1 * 16));
- in[2] = _mm_load_si128((__m128i *)(input + 2 * 16));
- in[3] = _mm_load_si128((__m128i *)(input + 3 * 16));
- in[4] = _mm_load_si128((__m128i *)(input + 4 * 16));
- in[5] = _mm_load_si128((__m128i *)(input + 5 * 16));
- in[6] = _mm_load_si128((__m128i *)(input + 6 * 16));
- in[7] = _mm_load_si128((__m128i *)(input + 7 * 16));
-
- in[8] = _mm_load_si128((__m128i *)(input + 8 * 16));
- in[9] = _mm_load_si128((__m128i *)(input + 9 * 16));
- in[10] = _mm_load_si128((__m128i *)(input + 10 * 16));
- in[11] = _mm_load_si128((__m128i *)(input + 11 * 16));
- in[12] = _mm_load_si128((__m128i *)(input + 12 * 16));
- in[13] = _mm_load_si128((__m128i *)(input + 13 * 16));
- in[14] = _mm_load_si128((__m128i *)(input + 14 * 16));
- in[15] = _mm_load_si128((__m128i *)(input + 15 * 16));
+static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
+
+ in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
+ in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
+ in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
+ in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
+ in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
+ in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
+ in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
+ in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
}
static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
@@ -2386,8 +2387,8 @@ static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
RECON_AND_STORE(dest, in[15]);
}
-void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride,
- int tx_type) {
+void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
__m128i in0[16], in1[16];
load_buffer_8x16(input, in0);
@@ -2421,8 +2422,8 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride,
write_buffer_8x16(dest, in1, stride);
}
-void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
- int stride) {
+void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
const __m128i zero = _mm_setzero_si128();
@@ -2468,14 +2469,14 @@ void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int i;
// 1-D idct. Load input data.
- in0 = _mm_load_si128((__m128i *)input);
- in8 = _mm_load_si128((__m128i *)(input + 8 * 1));
- in1 = _mm_load_si128((__m128i *)(input + 8 * 2));
- in9 = _mm_load_si128((__m128i *)(input + 8 * 3));
- in2 = _mm_load_si128((__m128i *)(input + 8 * 4));
- in10 = _mm_load_si128((__m128i *)(input + 8 * 5));
- in3 = _mm_load_si128((__m128i *)(input + 8 * 6));
- in11 = _mm_load_si128((__m128i *)(input + 8 * 7));
+ in0 = _mm_load_si128((const __m128i *)input);
+ in8 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in1 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in9 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in2 = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in10 = _mm_load_si128((const __m128i *)(input + 8 * 5));
+ in3 = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in11 = _mm_load_si128((const __m128i *)(input + 8 * 7));
TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3);
TRANSPOSE_8X4(in8, in9, in10, in11, in8, in9, in10, in11);
@@ -2780,11 +2781,12 @@ void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
#define LOAD_DQCOEFF(reg, input) \
{ \
- reg = _mm_load_si128((__m128i *) input); \
+ reg = _mm_load_si128((const __m128i *) input); \
input += 8; \
} \
-void vp9_idct32x32_1024_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
@@ -3515,7 +3517,7 @@ void vp9_idct32x32_1024_add_sse2(int16_t *input, uint8_t *dest, int stride) {
}
} //NOLINT
-void vp9_idct32x32_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a, i;
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 27e5f2cda..8c1399d79 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -363,15 +363,14 @@ static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
int i, j;
for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j)
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB,
- &fc->switchable_interp_prob[j][i]);
+ vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
}
static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
int i, j;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
for (j = 0; j < INTER_MODES - 1; ++j)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &fc->inter_mode_probs[i][j]);
+ vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
}
static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) {
@@ -505,7 +504,11 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
- assert(bsize >= BLOCK_8X8);
+ if (bsize < BLOCK_8X8) {
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid usage of segement feature on small blocks");
+ return;
+ }
} else {
if (bsize >= BLOCK_8X8)
mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx);
@@ -606,17 +609,17 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
if (cm->comp_pred_mode == HYBRID_PREDICTION)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.comp_inter_prob[i]);
+ vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
for (i = 0; i < REF_CONTEXTS; i++) {
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.single_ref_prob[i][0]);
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.single_ref_prob[i][1]);
+ vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
+ vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
}
if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
for (i = 0; i < REF_CONTEXTS; i++)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.comp_ref_prob[i]);
+ vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
}
void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
@@ -626,7 +629,7 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
// TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove.
// vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs));
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.mbskip_probs[k]);
+ vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]);
if (cm->frame_type != KEY_FRAME && !cm->intra_only) {
nmv_context *const nmvc = &pbi->common.fc.nmvc;
@@ -639,18 +642,17 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
read_switchable_interp_probs(&cm->fc, r);
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.intra_inter_prob[i]);
+ vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
read_comp_pred(cm, r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.y_mode_prob[j][i]);
+ vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]);
for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB,
- &cm->fc.partition_prob[INTER_FRAME][j][i]);
+ vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]);
read_mv_probs(r, nmvc, xd->allow_high_precision_mv);
}
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 061508b08..acde390f2 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -63,15 +63,15 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 3; ++j)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p8x8[i][j]);
+ vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 2; ++j)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p16x16[i][j]);
+ vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 1; ++j)
- vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p32x32[i][j]);
+ vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
}
static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
@@ -101,15 +101,15 @@ static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
if (tx_type == DCT_DCT)
xd->itxm_add(qcoeff, dst, stride, eob);
else
- vp9_iht_add(tx_type, qcoeff, dst, stride, eob);
+ vp9_iht4x4_add(tx_type, qcoeff, dst, stride, eob);
break;
case TX_8X8:
tx_type = get_tx_type_8x8(pd->plane_type, xd);
- vp9_iht_add_8x8(tx_type, qcoeff, dst, stride, eob);
+ vp9_iht8x8_add(tx_type, qcoeff, dst, stride, eob);
break;
case TX_16X16:
tx_type = get_tx_type_16x16(pd->plane_type, xd);
- vp9_iht_add_16x16(tx_type, qcoeff, dst, stride, eob);
+ vp9_iht16x16_add(tx_type, qcoeff, dst, stride, eob);
break;
case TX_32X32:
tx_type = DCT_DCT;
@@ -371,8 +371,7 @@ static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs,
for (l = 0; l < PREV_COEF_CONTEXTS; l++)
if (k > 0 || l < 3)
for (m = 0; m < UNCONSTRAINED_NODES; m++)
- vp9_diff_update_prob(r, VP9_COEF_UPDATE_PROB,
- &coef_probs[i][j][k][l][m]);
+ vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]);
}
static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode,
@@ -956,9 +955,15 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
YV12_BUFFER_CONFIG *new_fb = &cm->yv12_fb[cm->new_fb_idx];
if (!first_partition_size) {
- // showing a frame directly
- *p_data_end = data + 1;
- return 0;
+ if (!keyframe) {
+ // showing a frame directly
+ *p_data_end = data + 1;
+ return 0;
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid key frame");
+ return -1;
+ }
}
data += vp9_rb_bytes_read(&rb);
xd->corrupted = 0;
diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index 6f01cead6..fcca01729 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c
@@ -48,8 +48,6 @@ static int merge_index(int v, int n, int modulus) {
static int inv_remap_prob(int v, int m) {
static int inv_map_table[MAX_PROB - 1] = {
- // generated by:
- // inv_map_table[j] = merge_index(j, MAX_PROB - 1, MODULUS_PARAM);
6, 19, 32, 45, 58, 71, 84, 97, 110, 123, 136, 149, 162, 175, 188,
201, 214, 227, 240, 253, 0, 1, 2, 3, 4, 5, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26,
@@ -66,9 +64,11 @@ static int inv_remap_prob(int v, int m) {
190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 203, 204, 205,
206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221,
222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
- 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
+ 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252
};
- // v = merge_index(v, MAX_PROBS - 1, MODULUS_PARAM);
+ // The clamp is not necessary for conforming VP9 stream, it is added to
+ // prevent out of bound access for bad input data
+ v = clamp(v, 0, 253);
v = inv_map_table[v];
m--;
if ((m << 1) <= MAX_PROB) {
@@ -99,8 +99,8 @@ static int decode_term_subexp(vp9_reader *r, int k, int num_syms) {
return word;
}
-void vp9_diff_update_prob(vp9_reader *r, int update_prob, vp9_prob* p) {
- if (vp9_read(r, update_prob)) {
+void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) {
+ if (vp9_read(r, DIFF_UPDATE_PROB)) {
const int delp = decode_term_subexp(r, SUBEXP_PARAM, 255);
*p = (vp9_prob)inv_remap_prob(delp, *p);
}
diff --git a/vp9/decoder/vp9_dsubexp.h b/vp9/decoder/vp9_dsubexp.h
index 21ac31393..aeb9399d0 100644
--- a/vp9/decoder/vp9_dsubexp.h
+++ b/vp9/decoder/vp9_dsubexp.h
@@ -14,6 +14,6 @@
#include "vp9/decoder/vp9_dboolhuff.h"
-void vp9_diff_update_prob(vp9_reader *r, int update_prob, vp9_prob* p);
+void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p);
#endif // VP9_DECODER_VP9_DSUBEXP_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index a42c2cf30..d3030746d 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -342,36 +342,33 @@ int vp9_receive_compressed_data(VP9D_PTR ptr,
return retcode;
}
- {
- swap_frame_buffers(pbi);
+ swap_frame_buffers(pbi);
#if WRITE_RECON_BUFFER == 2
- if (cm->show_frame)
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame);
- else
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 1000);
+ if (cm->show_frame)
+ write_dx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame);
+ else
+ write_dx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 1000);
#endif
- if (!pbi->do_loopfilter_inline) {
- /* Apply the loop filter if appropriate. */
- vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0);
- }
+ if (!pbi->do_loopfilter_inline) {
+ vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0);
+ }
#if WRITE_RECON_BUFFER == 2
- if (cm->show_frame)
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 2000);
- else
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 3000);
+ if (cm->show_frame)
+ write_dx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 2000);
+ else
+ write_dx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 3000);
#endif
- vp9_extend_frame_inner_borders(cm->frame_to_show,
- cm->subsampling_x,
- cm->subsampling_y);
- }
+ vp9_extend_frame_inner_borders(cm->frame_to_show,
+ cm->subsampling_x,
+ cm->subsampling_y);
#if WRITE_RECON_BUFFER == 1
if (cm->show_frame)
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 2f59d333a..428ca7e2b 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -179,9 +179,8 @@ static void update_mode(
vp9_tree_probs_from_distribution(tree, Pnew, bct, num_events, 0);
n--;
- for (i = 0; i < n; ++i) {
- vp9_cond_prob_diff_update(w, &Pcur[i], MODE_UPDATE_PROB, bct[i]);
- }
+ for (i = 0; i < n; ++i)
+ vp9_cond_prob_diff_update(w, &Pcur[i], bct[i]);
}
static void update_mbintra_mode_probs(VP9_COMP* const cpi,
@@ -227,8 +226,7 @@ void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *w) {
int k;
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
- vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k],
- MODE_UPDATE_PROB, cm->counts.mbskip[k]);
+ vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], cm->counts.mbskip[k]);
}
static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) {
@@ -251,7 +249,7 @@ static void update_switchable_interp_probs(VP9_COMP *const cpi,
for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) {
vp9_cond_prob_diff_update(bc, &cm->fc.switchable_interp_prob[j][i],
- MODE_UPDATE_PROB, branch_ct[j][i]);
+ branch_ct[j][i]);
}
}
#ifdef MODE_STATS
@@ -273,7 +271,7 @@ static void update_inter_mode_probs(VP9_COMMON *cm, vp9_writer* const bc) {
for (j = 0; j < INTER_MODES - 1; ++j)
vp9_cond_prob_diff_update(bc, &cm->fc.inter_mode_probs[i][j],
- MODE_UPDATE_PROB, branch_ct[j]);
+ branch_ct[j]);
}
}
@@ -781,7 +779,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
vp9_coeff_probs_model *old_frame_coef_probs =
cpi->common.fc.coef_probs[tx_size];
vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size];
- const vp9_prob upd = VP9_COEF_UPDATE_PROB;
+ const vp9_prob upd = DIFF_UPDATE_PROB;
const int entropy_nodes_update = UNCONSTRAINED_NODES;
int i, j, k, l, t;
switch (cpi->sf.use_fast_coef_updates) {
@@ -836,7 +834,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
for (t = 0; t < entropy_nodes_update; ++t) {
vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
- const vp9_prob upd = VP9_COEF_UPDATE_PROB;
+ const vp9_prob upd = DIFF_UPDATE_PROB;
int s;
int u = 0;
if (l >= 3 && k == 0)
@@ -1119,26 +1117,23 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
- tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i],
- ct_8x8p);
+ tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p);
for (j = 0; j < TX_SIZES - 3; j++)
- vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j],
- MODE_UPDATE_PROB, ct_8x8p[j]);
+ vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], ct_8x8p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
- tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i],
- ct_16x16p);
+ tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p);
for (j = 0; j < TX_SIZES - 2; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
- MODE_UPDATE_PROB, ct_16x16p[j]);
+ ct_16x16p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
for (j = 0; j < TX_SIZES - 1; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
- MODE_UPDATE_PROB, ct_32x32p[j]);
+ ct_32x32p[j]);
}
#ifdef MODE_STATS
if (!cpi->dummy_packing)
@@ -1468,7 +1463,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i],
- MODE_UPDATE_PROB,
cpi->intra_inter_count[i]);
if (cm->allow_comp_inter_inter) {
@@ -1482,7 +1476,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
if (use_hybrid_pred)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
- MODE_UPDATE_PROB,
cpi->comp_inter_count[i]);
}
}
@@ -1490,10 +1483,8 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
for (i = 0; i < REF_CONTEXTS; i++) {
vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
- MODE_UPDATE_PROB,
cpi->single_ref_count[i][0]);
vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1],
- MODE_UPDATE_PROB,
cpi->single_ref_count[i][1]);
}
}
@@ -1501,7 +1492,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
for (i = 0; i < REF_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
- MODE_UPDATE_PROB,
cpi->comp_ref_count[i]);
update_mbintra_mode_probs(cpi, &header_bc);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 3a2be56a1..b26ae329f 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -172,7 +172,6 @@ struct macroblock {
BLOCK_SIZE sb64_partitioning;
void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
- void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 3008e46dd..b6555bc05 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -17,7 +17,7 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
-static void fdct4(int16_t *input, int16_t *output) {
+static void fdct4(const int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
@@ -102,7 +102,7 @@ void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
}
}
-static void fadst4(int16_t *input, int16_t *output) {
+static void fadst4(const int16_t *input, int16_t *output) {
int x0, x1, x2, x3;
int s0, s1, s2, s3, s4, s5, s6, s7;
@@ -178,12 +178,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
}
}
-void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) {
- vp9_short_fdct4x4_c(input, output, pitch);
- vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
-}
-
-static void fdct8(int16_t *input, int16_t *output) {
+static void fdct8(const int16_t *input, int16_t *output) {
/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
/*needs32*/ int t0, t1, t2, t3;
/*canbe16*/ int x0, x1, x2, x3;
@@ -486,7 +481,7 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {
}
}
-static void fadst8(int16_t *input, int16_t *output) {
+static void fadst8(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[7];
@@ -647,14 +642,8 @@ void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) {
}
}
-void vp9_short_walsh8x4_c(int16_t *input, int16_t *output, int pitch) {
- vp9_short_walsh4x4_c(input, output, pitch);
- vp9_short_walsh4x4_c(input + 4, output + 16, pitch);
-}
-
-
// Rewrote to use same algorithm as others.
-static void fdct16(int16_t in[16], int16_t out[16]) {
+static void fdct16(const int16_t in[16], int16_t out[16]) {
/*canbe16*/ int step1[8];
/*canbe16*/ int step2[8];
/*canbe16*/ int step3[8];
@@ -795,7 +784,7 @@ static void fdct16(int16_t in[16], int16_t out[16]) {
out[15] = dct_const_round_shift(temp2);
}
-void fadst16(int16_t *input, int16_t *output) {
+static void fadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -1003,7 +992,7 @@ static INLINE int half_round_shift(int input) {
return rv;
}
-static void dct32_1d(int *input, int *output, int round) {
+static void dct32_1d(const int *input, int *output, int round) {
int step[32];
// Stage 1
step[0] = input[0] + input[(32 - 1)];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index b74609bc2..ac1fd6215 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1853,7 +1853,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
if (lossless) {
// printf("Switching to lossless\n");
- cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
cpi->mb.optimize = 0;
@@ -1862,7 +1861,6 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
cpi->common.tx_mode = ONLY_4X4;
} else {
// printf("Not lossless\n");
- cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 0fc36d98f..a0a7bab27 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -564,7 +564,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob)
- vp9_iht_add_16x16(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
+ vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
case TX_8X8:
tx_type = get_tx_type_8x8(pd->plane_type, xd);
@@ -589,7 +589,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob)
- vp9_iht_add_8x8(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
+ vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
case TX_4X4:
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
@@ -623,7 +623,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
// case.
xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob);
else
- vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type);
+ vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type);
}
break;
default:
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 0833b4ac8..0afb35f54 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -955,10 +955,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16;
cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8;
- cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
- cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 54e60d6e1..eb7ca6b72 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -110,6 +110,7 @@ static int rd_thresh_block_size_factor[BLOCK_SIZES] =
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
#define RD_THRESH_POW 1.25
+#define RD_MULT_EPB_RATIO 64
#define MV_COST_WEIGHT 108
#define MV_COST_WEIGHT_SUB 120
@@ -162,7 +163,17 @@ void vp9_init_me_luts() {
static int compute_rd_mult(int qindex) {
const int q = vp9_dc_quant(qindex, 0);
- return (11 * q * q) >> 2;
+ // TODO(debargha): Adjust the function below
+ return (88 * q * q / 25);
+}
+
+static int compute_rd_thresh_factor(int qindex) {
+ int q;
+ // TODO(debargha): Adjust the function below
+ q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
+ if (q < 8)
+ q = 8;
+ return q;
}
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
@@ -172,9 +183,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
static void set_block_thresholds(VP9_COMP *cpi, int qindex) {
int q, i, bsize;
- q = ((int)pow(vp9_dc_quant(qindex, 0) >> 2, RD_THRESH_POW)) << 2;
- if (q < 8)
- q = 8;
+ q = compute_rd_thresh_factor(qindex);
for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
for (i = 0; i < MAX_MODES; ++i) {
@@ -216,7 +225,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
// cpi->common.refresh_alt_ref_frame)
qindex = clamp(qindex, 0, MAXQ);
- cpi->RDDIV = 100;
+ cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
cpi->RDMULT = compute_rd_mult(qindex);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
@@ -225,7 +234,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
cpi->RDMULT +=
(cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
}
- cpi->mb.errorperbit = cpi->RDMULT >> 6;
+ cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
vp9_set_speed_features(cpi);
@@ -1100,7 +1109,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
goto next;
if (tx_type != DCT_DCT)
- vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
+ vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
dst, pd->dst.stride, tx_type);
else
xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index c86ea2723..aa4068d76 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -12,8 +12,10 @@
#ifndef VP9_ENCODER_VP9_RDOPT_H_
#define VP9_ENCODER_VP9_RDOPT_H_
+#define RDDIV_BITS 7
+
#define RDCOST(RM, DM, R, D) \
- (((128 + ((int64_t)R) * (RM)) >> 8) + ((int64_t)DM) * (D))
+ (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
#define QIDX_SKIP_THRESH 115
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex);
diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index 667b8012c..eb864d96c 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c
@@ -221,7 +221,8 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
}
void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp,
- vp9_prob upd, unsigned int *ct) {
+ unsigned int *ct) {
+ const vp9_prob upd = DIFF_UPDATE_PROB;
vp9_prob newp = get_binary_prob(ct[0], ct[1]);
const int savings = vp9_prob_diff_update_savings_search(ct, *oldp, &newp,
upd);
diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h
index 7acdaf6f1..521c7778d 100644
--- a/vp9/encoder/vp9_subexp.h
+++ b/vp9/encoder/vp9_subexp.h
@@ -19,7 +19,7 @@ void vp9_write_prob_diff_update(vp9_writer *w,
vp9_prob newp, vp9_prob oldp);
void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp,
- vp9_prob upd, unsigned int *ct);
+ unsigned int *ct);
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
vp9_prob oldp, vp9_prob *bestp,
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index ad3d01da9..5e1e5ed4a 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -112,11 +112,6 @@ void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
}
}
-void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) {
- vp9_short_fdct4x4_sse2(input, output, pitch);
- vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch);
-}
-
static INLINE void load_buffer_4x4(int16_t *input, __m128i *in, int stride) {
const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 7a5b78634..6b923162f 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -658,8 +658,10 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
if (corrupted) {
VP9D_COMP *pbi = (VP9D_COMP *)ctx->pbi;
- *corrupted = pbi->common.frame_to_show->corrupted;
-
+ if (pbi)
+ *corrupted = pbi->common.frame_to_show->corrupted;
+ else
+ return VPX_CODEC_ERROR;
return VPX_CODEC_OK;
} else {
return VPX_CODEC_INVALID_PARAM;