diff options
Diffstat (limited to 'vp9')
32 files changed, 365 insertions, 386 deletions
diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c index 33aa4e001..0b9fc09ab 100644 --- a/vp9/common/arm/neon/vp9_idct16x16_neon.c +++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c @@ -11,31 +11,31 @@ #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" -extern void vp9_idct16x16_256_add_neon_pass1(int16_t *input, - int16_t *output, - int output_stride); -extern void vp9_idct16x16_256_add_neon_pass2(int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); -extern void vp9_idct16x16_10_add_neon_pass1(int16_t *input, - int16_t *output, - int output_stride); -extern void vp9_idct16x16_10_add_neon_pass2(int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); +void vp9_idct16x16_256_add_neon_pass1(const int16_t *input, + int16_t *output, + int output_stride); +void vp9_idct16x16_256_add_neon_pass2(const int16_t *src, + int16_t *output, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride); +void vp9_idct16x16_10_add_neon_pass1(const int16_t *input, + int16_t *output, + int output_stride); +void vp9_idct16x16_10_add_neon_pass2(const int16_t *src, + int16_t *output, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride); /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ extern void vp9_push_neon(int64_t *store); extern void vp9_pop_neon(int64_t *store); -void vp9_idct16x16_256_add_neon(int16_t *input, - uint8_t *dest, int dest_stride) { +void vp9_idct16x16_256_add_neon(const int16_t *input, + uint8_t *dest, int dest_stride) { int64_t store_reg[8]; int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; @@ -109,8 +109,8 @@ void vp9_idct16x16_256_add_neon(int16_t *input, return; } -void vp9_idct16x16_10_add_neon(int16_t *input, - uint8_t *dest, int dest_stride) { +void vp9_idct16x16_10_add_neon(const int16_t *input, + uint8_t *dest, int dest_stride) { int64_t store_reg[8]; int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; diff --git a/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm b/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm index 963ef35da..2f326e24c 100644 --- a/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm @@ -8,7 +8,7 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_short_iht4x4_add_neon| + EXPORT |vp9_iht4x4_16_add_neon| ARM REQUIRE8 PRESERVE8 @@ -139,7 +139,7 @@ MEND AREA Block, CODE, READONLY ; name this block of code -;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest, +;void vp9_iht4x4_16_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride, int tx_type) ; ; r0 int16_t input @@ -147,7 +147,7 @@ ; r2 int dest_stride ; r3 int tx_type) ; This function will only handle tx_type of 1,2,3. -|vp9_short_iht4x4_add_neon| PROC +|vp9_iht4x4_16_add_neon| PROC ; load the inputs into d16-d19 vld1.s16 {q8,q9}, [r0]! @@ -175,7 +175,7 @@ iadst_idct ; then transform columns IADST4x4_1D - b end_vp9_short_iht4x4_add_neon + b end_vp9_iht4x4_16_add_neon idct_iadst ; generate constants @@ -191,7 +191,7 @@ idct_iadst ; then transform columns IDCT4x4_1D - b end_vp9_short_iht4x4_add_neon + b end_vp9_iht4x4_16_add_neon iadst_iadst ; generate constants @@ -206,7 +206,7 @@ iadst_iadst ; then transform columns IADST4x4_1D -end_vp9_short_iht4x4_add_neon +end_vp9_iht4x4_16_add_neon ; ROUND_POWER_OF_TWO(temp_out[j], 4) vrshr.s16 q8, q8, #4 vrshr.s16 q9, q9, #4 @@ -232,6 +232,6 @@ end_vp9_short_iht4x4_add_neon vst1.32 {d26[1]}, [r1], r2 vst1.32 {d26[0]}, [r1] ; no post-increment bx lr - ENDP ; |vp9_short_iht4x4_add_neon| + ENDP ; |vp9_iht4x4_16_add_neon| END diff --git a/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm b/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm index bab9cb4a4..93d3af301 100644 --- a/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm @@ -8,7 +8,7 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_short_iht8x8_add_neon| + EXPORT |vp9_iht8x8_64_add_neon| ARM REQUIRE8 PRESERVE8 @@ -559,7 +559,7 @@ AREA Block, CODE, READONLY ; name this block of code -;void vp9_short_iht8x8_add_neon(int16_t *input, uint8_t *dest, +;void vp9_iht8x8_64_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride, int tx_type) ; ; r0 int16_t input @@ -567,7 +567,7 @@ ; r2 int dest_stride ; r3 int tx_type) ; This function will only handle tx_type of 1,2,3. -|vp9_short_iht8x8_add_neon| PROC +|vp9_iht8x8_64_add_neon| PROC ; load the inputs into d16-d19 vld1.s16 {q8,q9}, [r0]! @@ -602,7 +602,7 @@ iadst_idct ; then transform columns IADST8X8_1D - b end_vp9_short_iht8x8_add_neon + b end_vp9_iht8x8_64_add_neon idct_iadst ; generate IADST constants @@ -620,7 +620,7 @@ idct_iadst ; then transform columns IDCT8x8_1D - b end_vp9_short_iht8x8_add_neon + b end_vp9_iht8x8_64_add_neon iadst_iadst ; generate IADST constants @@ -635,7 +635,7 @@ iadst_iadst ; then transform columns IADST8X8_1D -end_vp9_short_iht8x8_add_neon +end_vp9_iht8x8_64_add_neon pop {r0-r10} ; ROUND_POWER_OF_TWO(temp_out[j], 5) @@ -691,6 +691,6 @@ end_vp9_short_iht8x8_add_neon vst1.64 {d6}, [r0], r2 vst1.64 {d7}, [r0], r2 bx lr - ENDP ; |vp9_short_iht8x8_add_neon| + ENDP ; |vp9_iht8x8_64_add_neon| END diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b1af13891..0538b37ac 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -221,7 +221,7 @@ typedef struct macroblockd { int lossless; /* Inverse transform function pointers. */ - void (*itxm_add)(int16_t *input, uint8_t *dest, int stride, int eob); + void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); struct subpix_fn_table subpix; diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 3cf508e05..02178b579 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -18,6 +18,8 @@ #include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_treecoder.h" +#define DIFF_UPDATE_PROB 252 + /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ @@ -208,7 +210,4 @@ static void get_scan_and_band(const MACROBLOCKD *xd, TX_SIZE tx_size, } } - -enum { VP9_COEF_UPDATE_PROB = 252 }; - #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index e17679616..56e644460 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] }; /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ -const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = { +const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { -DC_PRED, 2, /* 0 = DC_NODE */ -TM_PRED, 4, /* 1 = TM_NODE */ -V_PRED, 6, /* 2 = V_NODE */ @@ -237,22 +237,20 @@ const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = { -D63_PRED, 16, /* 7 = D63_NODE */ -D153_PRED, -D207_PRED /* 8 = D153_NODE */ }; +struct vp9_token vp9_intra_mode_encodings[INTRA_MODES]; -const vp9_tree_index vp9_inter_mode_tree[6] = { +const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { -ZEROMV, 2, -NEARESTMV, 4, -NEARMV, -NEWMV }; +struct vp9_token vp9_inter_mode_encodings[INTER_MODES]; -const vp9_tree_index vp9_partition_tree[6] = { +const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT }; - -struct vp9_token vp9_intra_mode_encodings[INTRA_MODES]; -struct vp9_token vp9_inter_mode_encodings[INTER_MODES]; - struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { @@ -338,7 +336,8 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) { vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs); } -const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = { +const vp9_tree_index vp9_switchable_interp_tree + [TREE_SIZE(SWITCHABLE_FILTERS)] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index ccade2752..ab37b75c6 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -15,7 +15,6 @@ #include "vp9/common/vp9_treecoder.h" #define TX_SIZE_CONTEXTS 2 -#define MODE_UPDATE_PROB 252 #define SWITCHABLE_FILTERS 3 // number of switchable filters // #define MODE_STATS @@ -38,19 +37,17 @@ extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] [INTRA_MODES - 1]; -extern const vp9_tree_index vp9_intra_mode_tree[]; -extern const vp9_tree_index vp9_inter_mode_tree[]; - +extern const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES]; + +extern const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)]; extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES]; -// probability models for partition information -extern const vp9_tree_index vp9_partition_tree[]; +extern const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)]; extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; extern const vp9_tree_index vp9_switchable_interp_tree - [2 * (SWITCHABLE_FILTERS - 1)]; - + [TREE_SIZE(SWITCHABLE_FILTERS)]; extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS]; void vp9_entropy_mode_init(); diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index baff637e8..e85118118 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c @@ -18,14 +18,14 @@ /* Integer pel reference mv threshold for use of high-precision 1/8 mv */ #define COMPANDED_MVREF_THRESH 8 -const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = { +const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ }; struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; -const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { +const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { -MV_CLASS_0, 2, -MV_CLASS_1, 4, 6, 8, @@ -39,12 +39,12 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { }; struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; -const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2] = { +const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1, }; struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; -const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2] = { +const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(4)] = { -0, 2, -1, 4, -2, -3 diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index 3b782ab0a..c42653d42 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -43,9 +43,6 @@ static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; } -extern const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2]; -extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; - /* Symbols for coding magnitude class of nonzero components */ #define MV_CLASSES 11 typedef enum { @@ -62,9 +59,6 @@ typedef enum { MV_CLASS_10 = 10, /* (1024,2048] integer pel */ } MV_CLASS_TYPE; -extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2]; -extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; - #define CLASS0_BITS 1 /* bits at integer precision for class 0 */ #define CLASS0_SIZE (1 << CLASS0_BITS) #define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) @@ -77,10 +71,16 @@ extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; #define MV_UPP ((1 << MV_IN_USE_BITS) - 1) #define MV_LOW (-(1 << MV_IN_USE_BITS)) -extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2]; +extern const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)]; +extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; + +extern const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)]; +extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; + +extern const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)]; extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; -extern const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2]; +extern const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(4)]; extern struct vp9_token vp9_mv_fp_encodings[4]; typedef struct { diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 78d10877a..52b039d99 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -18,13 +18,13 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; int16_t output[16]; int a1, b1, c1, d1, e1; - int16_t *ip = input; + const int16_t *ip = input; int16_t *op = output; for (i = 0; i < 4; i++) { @@ -60,21 +60,21 @@ void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { c1 = e1 - c1; a1 -= b1; d1 += c1; - dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); - dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1); - dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1); - dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1); + dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); + dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); + dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); + dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); ip++; dest++; } } -void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { +void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { int i; int a1, e1; int16_t tmp[4]; - int16_t *ip = in; + const int16_t *ip = in; int16_t *op = tmp; a1 = ip[0] >> UNIT_QUANT_SHIFT; @@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { } } -static void idct4_1d(int16_t *input, int16_t *output) { +static void idct4_1d(const int16_t *input, int16_t *output) { int16_t step[4]; int temp1, temp2; // stage 1 @@ -116,7 +116,7 @@ static void idct4_1d(int16_t *input, int16_t *output) { output[3] = step[0] - step[3]; } -void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[4 * 4]; int16_t *outptr = out; int i, j; @@ -135,12 +135,12 @@ void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { temp_in[j] = out[j * 4 + i]; idct4_1d(temp_in, temp_out); for (j = 0; j < 4; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + + dest[j * stride + i]); } } -void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) { int i; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); @@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { } } -static void idct8_1d(int16_t *input, int16_t *output) { +static void idct8_1d(const int16_t *input, int16_t *output) { int16_t step1[8], step2[8]; int temp1, temp2; // stage 1 @@ -201,7 +201,7 @@ static void idct8_1d(int16_t *input, int16_t *output) { output[7] = step1[0] - step1[7]; } -void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[8 * 8]; int16_t *outptr = out; int i, j; @@ -220,12 +220,12 @@ void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) { temp_in[j] = out[j * 8 + i]; idct8_1d(temp_in, temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * stride + i]); } } -void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) { int i, j; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); @@ -234,11 +234,11 @@ void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) dest[i] = clip_pixel(dest[i] + a1); - dest += dest_stride; + dest += stride; } } -static void iadst4_1d(int16_t *input, int16_t *output) { +static void iadst4_1d(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0 = input[0]; @@ -280,8 +280,8 @@ static void iadst4_1d(int16_t *input, int16_t *output) { output[3] = dct_const_round_shift(s3); } -void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, - int tx_type) { +void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { const transform_2d IHT_4[] = { { idct4_1d, idct4_1d }, // DCT_DCT = 0 { iadst4_1d, idct4_1d }, // ADST_DCT = 1 @@ -307,11 +307,11 @@ void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, temp_in[j] = out[j * 4 + i]; IHT_4[tx_type].cols(temp_in, temp_out); for (j = 0; j < 4; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + + dest[j * stride + i]); } } -static void iadst8_1d(int16_t *input, int16_t *output) { +static void iadst8_1d(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0 = input[7]; @@ -395,8 +395,8 @@ static const transform_2d IHT_8[] = { { iadst8_1d, iadst8_1d } // ADST_ADST = 3 }; -void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride, - int tx_type) { +void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { int i, j; int16_t out[8 * 8]; int16_t *outptr = out; @@ -416,12 +416,12 @@ void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride, temp_in[j] = out[j * 8 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) - + dest[j * dest_stride + i]); } + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * stride + i]); + } } -void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest, - int dest_stride) { +void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[8 * 8] = { 0 }; int16_t *outptr = out; int i, j; @@ -441,12 +441,12 @@ void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest, temp_in[j] = out[j * 8 + i]; idct8_1d(temp_in, temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * stride + i]); } } -static void idct16_1d(int16_t *input, int16_t *output) { +static void idct16_1d(const int16_t *input, int16_t *output) { int16_t step1[16], step2[16]; int temp1, temp2; @@ -611,7 +611,7 @@ static void idct16_1d(int16_t *input, int16_t *output) { output[15] = step2[0] - step2[15]; } -void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[16 * 16]; int16_t *outptr = out; int i, j; @@ -630,12 +630,12 @@ void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) { temp_in[j] = out[j * 16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * stride + i]); } } -void iadst16_1d(int16_t *input, int16_t *output) { +static void iadst16_1d(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; int x0 = input[15]; @@ -813,8 +813,8 @@ static const transform_2d IHT_16[] = { { iadst16_1d, iadst16_1d } // ADST_ADST = 3 }; -void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, - int tx_type) { +void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { int i, j; int16_t out[16 * 16]; int16_t *outptr = out; @@ -834,12 +834,11 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * dest_stride + i]); } + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * stride + i]); } } -void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest, - int dest_stride) { +void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[16 * 16] = { 0 }; int16_t *outptr = out; int i, j; @@ -859,13 +858,12 @@ void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest, temp_in[j] = out[j*16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * stride + i]); } } -void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest, - int dest_stride) { +void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { int i, j; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); @@ -874,11 +872,11 @@ void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest, for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) dest[i] = clip_pixel(dest[i] + a1); - dest += dest_stride; + dest += stride; } } -static void idct32_1d(int16_t *input, int16_t *output) { +static void idct32_1d(const int16_t *input, int16_t *output) { int16_t step1[32], step2[32]; int temp1, temp2; @@ -1245,7 +1243,7 @@ static void idct32_1d(int16_t *input, int16_t *output) { output[31] = step1[0] - step1[31]; } -void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[32 * 32]; int16_t *outptr = out; int i, j; @@ -1277,13 +1275,12 @@ void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * dest_stride + i]); + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * stride + i]); } } -void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest, - int dest_stride) { +void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) { int i, j; int a1; @@ -1294,12 +1291,12 @@ void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest, for (j = 0; j < 32; ++j) { for (i = 0; i < 32; ++i) dest[i] = clip_pixel(dest[i] + a1); - dest += dest_stride; + dest += stride; } } // idct -void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) { +void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) vp9_idct4x4_16_add(input, dest, stride); else @@ -1307,14 +1304,14 @@ void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) { } -void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) { +void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) vp9_iwht4x4_16_add(input, dest, stride); else vp9_iwht4x4_1_add(input, dest, stride); } -void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) { +void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) { // If dc is 1, then input[0] is the reconstructed value, do not need // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. @@ -1333,7 +1330,8 @@ void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) { } } -void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) { +void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, + int eob) { /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ if (eob) { @@ -1347,7 +1345,8 @@ void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) { } } -void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) { +void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, + int eob) { if (eob) { if (eob == 1) vp9_idct32x32_1_add(input, dest, stride); @@ -1357,32 +1356,32 @@ void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) { } // iht -void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, - int eob) { +void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, + int stride, int eob) { if (tx_type == DCT_DCT) vp9_idct4x4_add(input, dest, stride, eob); else - vp9_short_iht4x4_add(input, dest, stride, tx_type); + vp9_iht4x4_16_add(input, dest, stride, tx_type); } -void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { +void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, + int stride, int eob) { if (tx_type == DCT_DCT) { vp9_idct8x8_add(input, dest, stride, eob); } else { if (eob > 0) { - vp9_short_iht8x8_add(input, dest, stride, tx_type); + vp9_iht8x8_64_add(input, dest, stride, tx_type); } } } -void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { +void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, + int stride, int eob) { if (tx_type == DCT_DCT) { vp9_idct16x16_add(input, dest, stride, eob); } else { if (eob > 0) { - vp9_short_iht16x16_add(input, dest, stride, tx_type); + vp9_iht16x16_256_add(input, dest, stride, tx_type); } } } diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index e85404e7a..2b3f35f0a 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -81,27 +81,27 @@ static INLINE int dct_const_round_shift(int input) { return rv; } -typedef void (*transform_1d)(int16_t*, int16_t*); +typedef void (*transform_1d)(const int16_t*, int16_t*); typedef struct { transform_1d cols, rows; // vertical and horizontal } transform_2d; - -void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob); -void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob); -void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob); -void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob); -void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob); - -void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob); - -void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob); - -void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob); +void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob); + +void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob); +void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob); +void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int + eob); +void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, + int eob); + +void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, + int stride, int eob); +void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, + int stride, int eob); +void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, + int stride, int eob); #endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 31227ad54..526be87df 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -267,51 +267,51 @@ specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 # # dct # -prototype void vp9_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct4x4_1_add sse2 neon -prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct4x4_16_add sse2 neon -prototype void vp9_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct8x8_1_add sse2 neon -prototype void vp9_idct8x8_64_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct8x8_64_add sse2 neon -prototype void vp9_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct8x8_10_add sse2 neon -prototype void vp9_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct16x16_1_add sse2 neon -prototype void vp9_idct16x16_256_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct16x16_256_add sse2 neon -prototype void vp9_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct16x16_10_add sse2 neon -prototype void vp9_idct32x32_1024_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct32x32_1024_add sse2 neon -prototype void vp9_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_idct32x32_1_add sse2 -prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type" -specialize vp9_short_iht4x4_add sse2 neon +prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" +specialize vp9_iht4x4_16_add sse2 neon -prototype void vp9_short_iht8x8_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type" -specialize vp9_short_iht8x8_add sse2 neon +prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" +specialize vp9_iht8x8_64_add sse2 neon -prototype void vp9_short_iht16x16_add "int16_t *input, uint8_t *output, int pitch, int tx_type" -specialize vp9_short_iht16x16_add sse2 +prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type" +specialize vp9_iht16x16_256_add sse2 # dct and add -prototype void vp9_iwht4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_iwht4x4_1_add -prototype void vp9_iwht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride" +prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_iwht4x4_16_add # @@ -701,9 +701,6 @@ specialize vp9_short_fdct8x8 sse2 prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct4x4 sse2 -prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fdct8x4 sse2 - prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct32x32 sse2 @@ -716,9 +713,6 @@ specialize vp9_short_fdct16x16 sse2 prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_walsh4x4 -prototype void vp9_short_walsh8x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_walsh8x4 - # # Motion search # diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h index b8d161d19..254a431a3 100644 --- a/vp9/common/vp9_systemdependent.h +++ b/vp9/common/vp9_systemdependent.h @@ -24,8 +24,8 @@ void vpx_reset_mmx_state(void); #define vp9_clear_system_state() #endif -#ifdef _MSC_VER -// round is not defined in MSVC +#if defined(_MSC_VER) && _MSC_VER < 1800 +// round is not defined in MSVC before VS2013. static int round(double x) { if (x < 0) return (int)ceil(x - 0.5); diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h index 24e6fa295..4ba171f46 100644 --- a/vp9/common/vp9_treecoder.h +++ b/vp9/common/vp9_treecoder.h @@ -21,6 +21,8 @@ typedef uint8_t vp9_prob; typedef int8_t vp9_tree_index; +#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2) + #define vp9_complement(x) (255 - x) /* We build coding trees compactly in arrays. diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index a2b0e8c73..cfec36b42 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -15,7 +15,7 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, @@ -26,10 +26,10 @@ void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) { __m128i input0, input1, input2, input3; // Rows - input0 = _mm_loadl_epi64((__m128i *)input); - input1 = _mm_loadl_epi64((__m128i *)(input + 4)); - input2 = _mm_loadl_epi64((__m128i *)(input + 8)); - input3 = _mm_loadl_epi64((__m128i *)(input + 12)); + input0 = _mm_loadl_epi64((const __m128i *)input); + input1 = _mm_loadl_epi64((const __m128i *)(input + 4)); + input2 = _mm_loadl_epi64((const __m128i *)(input + 8)); + input3 = _mm_loadl_epi64((const __m128i *)(input + 12)); // Construct i3, i1, i3, i1, i2, i0, i2, i0 input0 = _mm_shufflelo_epi16(input0, 0xd8); @@ -148,7 +148,7 @@ void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE4X4(dest, input3); } -void vp9_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; @@ -264,16 +264,16 @@ static void iadst4_1d_sse2(__m128i *in) { in[3] = _mm_unpackhi_epi64(in[1], in[1]); } -void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride, - int tx_type) { +void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { __m128i in[4]; const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); - in[0] = _mm_loadl_epi64((__m128i *)input); - in[1] = _mm_loadl_epi64((__m128i *)(input + 4)); - in[2] = _mm_loadl_epi64((__m128i *)(input + 8)); - in[3] = _mm_loadl_epi64((__m128i *)(input + 12)); + in[0] = _mm_loadl_epi64((const __m128i *)input); + in[1] = _mm_loadl_epi64((const __m128i *)(input + 4)); + in[2] = _mm_loadl_epi64((const __m128i *)(input + 8)); + in[3] = _mm_loadl_epi64((const __m128i *)(input + 12)); switch (tx_type) { case 0: // DCT_DCT @@ -494,7 +494,7 @@ void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride, dest += stride; \ } -void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<4); @@ -514,14 +514,14 @@ void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) { int i; // Load input data. - in0 = _mm_load_si128((__m128i *)input); - in1 = _mm_load_si128((__m128i *)(input + 8 * 1)); - in2 = _mm_load_si128((__m128i *)(input + 8 * 2)); - in3 = _mm_load_si128((__m128i *)(input + 8 * 3)); - in4 = _mm_load_si128((__m128i *)(input + 8 * 4)); - in5 = _mm_load_si128((__m128i *)(input + 8 * 5)); - in6 = _mm_load_si128((__m128i *)(input + 8 * 6)); - in7 = _mm_load_si128((__m128i *)(input + 8 * 7)); + in0 = _mm_load_si128((const __m128i *)input); + in1 = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in2 = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in3 = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in4 = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in5 = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in6 = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in7 = _mm_load_si128((const __m128i *)(input + 8 * 7)); // 2-D for (i = 0; i < 2; i++) { @@ -562,7 +562,7 @@ void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE(dest, in7); } -void vp9_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; @@ -883,21 +883,21 @@ static void iadst8_1d_sse2(__m128i *in) { } -void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride, - int tx_type) { +void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { __m128i in[8]; const __m128i zero = _mm_setzero_si128(); const __m128i final_rounding = _mm_set1_epi16(1<<4); // load input data - in[0] = _mm_load_si128((__m128i *)input); - in[1] = _mm_load_si128((__m128i *)(input + 8 * 1)); - in[2] = _mm_load_si128((__m128i *)(input + 8 * 2)); - in[3] = _mm_load_si128((__m128i *)(input + 8 * 3)); - in[4] = _mm_load_si128((__m128i *)(input + 8 * 4)); - in[5] = _mm_load_si128((__m128i *)(input + 8 * 5)); - in[6] = _mm_load_si128((__m128i *)(input + 8 * 6)); - in[7] = _mm_load_si128((__m128i *)(input + 8 * 7)); + in[0] = _mm_load_si128((const __m128i *)input); + in[1] = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in[2] = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in[3] = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in[4] = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in[5] = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in[6] = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in[7] = _mm_load_si128((const __m128i *)(input + 8 * 7)); switch (tx_type) { case 0: // DCT_DCT @@ -950,7 +950,7 @@ void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride, RECON_AND_STORE(dest, in[7]); } -void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<4); @@ -970,10 +970,10 @@ void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) { __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // Rows. Load 4-row input data. - in0 = _mm_load_si128((__m128i *)input); - in1 = _mm_load_si128((__m128i *)(input + 8 * 1)); - in2 = _mm_load_si128((__m128i *)(input + 8 * 2)); - in3 = _mm_load_si128((__m128i *)(input + 8 * 3)); + in0 = _mm_load_si128((const __m128i *)input); + in1 = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in2 = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in3 = _mm_load_si128((const __m128i *)(input + 8 * 3)); // 8x4 Transpose TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3) @@ -1228,7 +1228,8 @@ void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) { stp2_10, stp2_13, stp2_11, stp2_12) \ } -void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); const __m128i zero = _mm_setzero_si128(); @@ -1283,22 +1284,22 @@ void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) { if (i == 1) input += 128; // Load input data. - in0 = _mm_load_si128((__m128i *)input); - in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); - in1 = _mm_load_si128((__m128i *)(input + 8 * 2)); - in9 = _mm_load_si128((__m128i *)(input + 8 * 3)); - in2 = _mm_load_si128((__m128i *)(input + 8 * 4)); - in10 = _mm_load_si128((__m128i *)(input + 8 * 5)); - in3 = _mm_load_si128((__m128i *)(input + 8 * 6)); - in11 = _mm_load_si128((__m128i *)(input + 8 * 7)); - in4 = _mm_load_si128((__m128i *)(input + 8 * 8)); - in12 = _mm_load_si128((__m128i *)(input + 8 * 9)); - in5 = _mm_load_si128((__m128i *)(input + 8 * 10)); - in13 = _mm_load_si128((__m128i *)(input + 8 * 11)); - in6 = _mm_load_si128((__m128i *)(input + 8 * 12)); - in14 = _mm_load_si128((__m128i *)(input + 8 * 13)); - in7 = _mm_load_si128((__m128i *)(input + 8 * 14)); - in15 = _mm_load_si128((__m128i *)(input + 8 * 15)); + in0 = _mm_load_si128((const __m128i *)input); + in8 = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in1 = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in9 = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in2 = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in10 = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in3 = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in11 = _mm_load_si128((const __m128i *)(input + 8 * 7)); + in4 = _mm_load_si128((const __m128i *)(input + 8 * 8)); + in12 = _mm_load_si128((const __m128i *)(input + 8 * 9)); + in5 = _mm_load_si128((const __m128i *)(input + 8 * 10)); + in13 = _mm_load_si128((const __m128i *)(input + 8 * 11)); + in6 = _mm_load_si128((const __m128i *)(input + 8 * 12)); + in14 = _mm_load_si128((const __m128i *)(input + 8 * 13)); + in7 = _mm_load_si128((const __m128i *)(input + 8 * 14)); + in15 = _mm_load_si128((const __m128i *)(input + 8 * 15)); TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); @@ -1435,7 +1436,7 @@ void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) { } } -void vp9_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a, i; @@ -2310,24 +2311,24 @@ static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) { iadst16_1d_8col(in1); } -static INLINE void load_buffer_8x16(int16_t *input, __m128i *in) { - in[0] = _mm_load_si128((__m128i *)(input + 0 * 16)); - in[1] = _mm_load_si128((__m128i *)(input + 1 * 16)); - in[2] = _mm_load_si128((__m128i *)(input + 2 * 16)); - in[3] = _mm_load_si128((__m128i *)(input + 3 * 16)); - in[4] = _mm_load_si128((__m128i *)(input + 4 * 16)); - in[5] = _mm_load_si128((__m128i *)(input + 5 * 16)); - in[6] = _mm_load_si128((__m128i *)(input + 6 * 16)); - in[7] = _mm_load_si128((__m128i *)(input + 7 * 16)); - - in[8] = _mm_load_si128((__m128i *)(input + 8 * 16)); - in[9] = _mm_load_si128((__m128i *)(input + 9 * 16)); - in[10] = _mm_load_si128((__m128i *)(input + 10 * 16)); - in[11] = _mm_load_si128((__m128i *)(input + 11 * 16)); - in[12] = _mm_load_si128((__m128i *)(input + 12 * 16)); - in[13] = _mm_load_si128((__m128i *)(input + 13 * 16)); - in[14] = _mm_load_si128((__m128i *)(input + 14 * 16)); - in[15] = _mm_load_si128((__m128i *)(input + 15 * 16)); +static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) { + in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16)); + in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16)); + in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16)); + in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16)); + in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16)); + in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16)); + in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16)); + in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16)); + + in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16)); + in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16)); + in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16)); + in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16)); + in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16)); + in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16)); + in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16)); + in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); } static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { @@ -2386,8 +2387,8 @@ static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { RECON_AND_STORE(dest, in[15]); } -void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride, - int tx_type) { +void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride, + int tx_type) { __m128i in0[16], in1[16]; load_buffer_8x16(input, in0); @@ -2421,8 +2422,8 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride, write_buffer_8x16(dest, in1, stride); } -void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest, - int stride) { +void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); const __m128i zero = _mm_setzero_si128(); @@ -2468,14 +2469,14 @@ void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest, __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; // 1-D idct. Load input data. - in0 = _mm_load_si128((__m128i *)input); - in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); - in1 = _mm_load_si128((__m128i *)(input + 8 * 2)); - in9 = _mm_load_si128((__m128i *)(input + 8 * 3)); - in2 = _mm_load_si128((__m128i *)(input + 8 * 4)); - in10 = _mm_load_si128((__m128i *)(input + 8 * 5)); - in3 = _mm_load_si128((__m128i *)(input + 8 * 6)); - in11 = _mm_load_si128((__m128i *)(input + 8 * 7)); + in0 = _mm_load_si128((const __m128i *)input); + in8 = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in1 = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in9 = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in2 = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in10 = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in3 = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in11 = _mm_load_si128((const __m128i *)(input + 8 * 7)); TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE_8X4(in8, in9, in10, in11, in8, in9, in10, in11); @@ -2780,11 +2781,12 @@ void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest, #define LOAD_DQCOEFF(reg, input) \ { \ - reg = _mm_load_si128((__m128i *) input); \ + reg = _mm_load_si128((const __m128i *) input); \ input += 8; \ } \ -void vp9_idct32x32_1024_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); @@ -3515,7 +3517,7 @@ void vp9_idct32x32_1024_add_sse2(int16_t *input, uint8_t *dest, int stride) { } } //NOLINT -void vp9_idct32x32_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a, i; diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 27e5f2cda..8c1399d79 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -363,15 +363,14 @@ static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { int i, j; for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j) for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, - &fc->switchable_interp_prob[j][i]); + vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); } static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { int i, j; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) for (j = 0; j < INTER_MODES - 1; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &fc->inter_mode_probs[i][j]); + vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); } static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { @@ -505,7 +504,11 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { mbmi->mode = ZEROMV; - assert(bsize >= BLOCK_8X8); + if (bsize < BLOCK_8X8) { + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid usage of segement feature on small blocks"); + return; + } } else { if (bsize >= BLOCK_8X8) mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx); @@ -606,17 +609,17 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { if (cm->comp_pred_mode == HYBRID_PREDICTION) for (i = 0; i < COMP_INTER_CONTEXTS; i++) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.comp_inter_prob[i]); + vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) for (i = 0; i < REF_CONTEXTS; i++) { - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.single_ref_prob[i][0]); - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.single_ref_prob[i][1]); + vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]); + vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]); } if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) for (i = 0; i < REF_CONTEXTS; i++) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.comp_ref_prob[i]); + vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); } void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { @@ -626,7 +629,7 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove. // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs)); for (k = 0; k < MBSKIP_CONTEXTS; ++k) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.mbskip_probs[k]); + vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]); if (cm->frame_type != KEY_FRAME && !cm->intra_only) { nmv_context *const nmvc = &pbi->common.fc.nmvc; @@ -639,18 +642,17 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { read_switchable_interp_probs(&cm->fc, r); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.intra_inter_prob[i]); + vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]); read_comp_pred(cm, r); for (j = 0; j < BLOCK_SIZE_GROUPS; j++) for (i = 0; i < INTRA_MODES - 1; ++i) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.y_mode_prob[j][i]); + vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]); for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j) for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, - &cm->fc.partition_prob[INTER_FRAME][j][i]); + vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]); read_mv_probs(r, nmvc, xd->allow_high_precision_mv); } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 061508b08..acde390f2 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -63,15 +63,15 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 3; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p8x8[i][j]); + vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 2; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p16x16[i][j]); + vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 1; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p32x32[i][j]); + vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); } static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { @@ -101,15 +101,15 @@ static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, if (tx_type == DCT_DCT) xd->itxm_add(qcoeff, dst, stride, eob); else - vp9_iht_add(tx_type, qcoeff, dst, stride, eob); + vp9_iht4x4_add(tx_type, qcoeff, dst, stride, eob); break; case TX_8X8: tx_type = get_tx_type_8x8(pd->plane_type, xd); - vp9_iht_add_8x8(tx_type, qcoeff, dst, stride, eob); + vp9_iht8x8_add(tx_type, qcoeff, dst, stride, eob); break; case TX_16X16: tx_type = get_tx_type_16x16(pd->plane_type, xd); - vp9_iht_add_16x16(tx_type, qcoeff, dst, stride, eob); + vp9_iht16x16_add(tx_type, qcoeff, dst, stride, eob); break; case TX_32X32: tx_type = DCT_DCT; @@ -371,8 +371,7 @@ static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, for (l = 0; l < PREV_COEF_CONTEXTS; l++) if (k > 0 || l < 3) for (m = 0; m < UNCONSTRAINED_NODES; m++) - vp9_diff_update_prob(r, VP9_COEF_UPDATE_PROB, - &coef_probs[i][j][k][l][m]); + vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); } static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, @@ -956,9 +955,15 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { YV12_BUFFER_CONFIG *new_fb = &cm->yv12_fb[cm->new_fb_idx]; if (!first_partition_size) { - // showing a frame directly - *p_data_end = data + 1; - return 0; + if (!keyframe) { + // showing a frame directly + *p_data_end = data + 1; + return 0; + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid key frame"); + return -1; + } } data += vp9_rb_bytes_read(&rb); xd->corrupted = 0; diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c index 6f01cead6..fcca01729 100644 --- a/vp9/decoder/vp9_dsubexp.c +++ b/vp9/decoder/vp9_dsubexp.c @@ -48,8 +48,6 @@ static int merge_index(int v, int n, int modulus) { static int inv_remap_prob(int v, int m) { static int inv_map_table[MAX_PROB - 1] = { - // generated by: - // inv_map_table[j] = merge_index(j, MAX_PROB - 1, MODULUS_PARAM); 6, 19, 32, 45, 58, 71, 84, 97, 110, 123, 136, 149, 162, 175, 188, 201, 214, 227, 240, 253, 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, @@ -66,9 +64,11 @@ static int inv_remap_prob(int v, int m) { 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, - 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, + 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252 }; - // v = merge_index(v, MAX_PROBS - 1, MODULUS_PARAM); + // The clamp is not necessary for conforming VP9 stream, it is added to + // prevent out of bound access for bad input data + v = clamp(v, 0, 253); v = inv_map_table[v]; m--; if ((m << 1) <= MAX_PROB) { @@ -99,8 +99,8 @@ static int decode_term_subexp(vp9_reader *r, int k, int num_syms) { return word; } -void vp9_diff_update_prob(vp9_reader *r, int update_prob, vp9_prob* p) { - if (vp9_read(r, update_prob)) { +void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) { + if (vp9_read(r, DIFF_UPDATE_PROB)) { const int delp = decode_term_subexp(r, SUBEXP_PARAM, 255); *p = (vp9_prob)inv_remap_prob(delp, *p); } diff --git a/vp9/decoder/vp9_dsubexp.h b/vp9/decoder/vp9_dsubexp.h index 21ac31393..aeb9399d0 100644 --- a/vp9/decoder/vp9_dsubexp.h +++ b/vp9/decoder/vp9_dsubexp.h @@ -14,6 +14,6 @@ #include "vp9/decoder/vp9_dboolhuff.h" -void vp9_diff_update_prob(vp9_reader *r, int update_prob, vp9_prob* p); +void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p); #endif // VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index a42c2cf30..d3030746d 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -342,36 +342,33 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, return retcode; } - { - swap_frame_buffers(pbi); + swap_frame_buffers(pbi); #if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 1000); + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 1000); #endif - if (!pbi->do_loopfilter_inline) { - /* Apply the loop filter if appropriate. */ - vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0); - } + if (!pbi->do_loopfilter_inline) { + vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0); + } #if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 2000); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 3000); + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 2000); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 3000); #endif - vp9_extend_frame_inner_borders(cm->frame_to_show, - cm->subsampling_x, - cm->subsampling_y); - } + vp9_extend_frame_inner_borders(cm->frame_to_show, + cm->subsampling_x, + cm->subsampling_y); #if WRITE_RECON_BUFFER == 1 if (cm->show_frame) diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 2f59d333a..428ca7e2b 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -179,9 +179,8 @@ static void update_mode( vp9_tree_probs_from_distribution(tree, Pnew, bct, num_events, 0); n--; - for (i = 0; i < n; ++i) { - vp9_cond_prob_diff_update(w, &Pcur[i], MODE_UPDATE_PROB, bct[i]); - } + for (i = 0; i < n; ++i) + vp9_cond_prob_diff_update(w, &Pcur[i], bct[i]); } static void update_mbintra_mode_probs(VP9_COMP* const cpi, @@ -227,8 +226,7 @@ void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *w) { int k; for (k = 0; k < MBSKIP_CONTEXTS; ++k) - vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], - MODE_UPDATE_PROB, cm->counts.mbskip[k]); + vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], cm->counts.mbskip[k]); } static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) { @@ -251,7 +249,7 @@ static void update_switchable_interp_probs(VP9_COMP *const cpi, for (j = 0; j <= SWITCHABLE_FILTERS; ++j) { for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) { vp9_cond_prob_diff_update(bc, &cm->fc.switchable_interp_prob[j][i], - MODE_UPDATE_PROB, branch_ct[j][i]); + branch_ct[j][i]); } } #ifdef MODE_STATS @@ -273,7 +271,7 @@ static void update_inter_mode_probs(VP9_COMMON *cm, vp9_writer* const bc) { for (j = 0; j < INTER_MODES - 1; ++j) vp9_cond_prob_diff_update(bc, &cm->fc.inter_mode_probs[i][j], - MODE_UPDATE_PROB, branch_ct[j]); + branch_ct[j]); } } @@ -781,7 +779,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, vp9_coeff_probs_model *old_frame_coef_probs = cpi->common.fc.coef_probs[tx_size]; vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size]; - const vp9_prob upd = VP9_COEF_UPDATE_PROB; + const vp9_prob upd = DIFF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; int i, j, k, l, t; switch (cpi->sf.use_fast_coef_updates) { @@ -836,7 +834,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, for (t = 0; t < entropy_nodes_update; ++t) { vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; - const vp9_prob upd = VP9_COEF_UPDATE_PROB; + const vp9_prob upd = DIFF_UPDATE_PROB; int s; int u = 0; if (l >= 3 && k == 0) @@ -1119,26 +1117,23 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], - ct_8x8p); + tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p); for (j = 0; j < TX_SIZES - 3; j++) - vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], - MODE_UPDATE_PROB, ct_8x8p[j]); + vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], ct_8x8p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], - ct_16x16p); + tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p); for (j = 0; j < TX_SIZES - 2; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j], - MODE_UPDATE_PROB, ct_16x16p[j]); + ct_16x16p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p); for (j = 0; j < TX_SIZES - 1; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j], - MODE_UPDATE_PROB, ct_32x32p[j]); + ct_32x32p[j]); } #ifdef MODE_STATS if (!cpi->dummy_packing) @@ -1468,7 +1463,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i], - MODE_UPDATE_PROB, cpi->intra_inter_count[i]); if (cm->allow_comp_inter_inter) { @@ -1482,7 +1476,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (use_hybrid_pred) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], - MODE_UPDATE_PROB, cpi->comp_inter_count[i]); } } @@ -1490,10 +1483,8 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { for (i = 0; i < REF_CONTEXTS; i++) { vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0], - MODE_UPDATE_PROB, cpi->single_ref_count[i][0]); vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1], - MODE_UPDATE_PROB, cpi->single_ref_count[i][1]); } } @@ -1501,7 +1492,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) for (i = 0; i < REF_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i], - MODE_UPDATE_PROB, cpi->comp_ref_count[i]); update_mbintra_mode_probs(cpi, &header_bc); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 3a2be56a1..b26ae329f 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -172,7 +172,6 @@ struct macroblock { BLOCK_SIZE sb64_partitioning; void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); - void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 3008e46dd..b6555bc05 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -17,7 +17,7 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" -static void fdct4(int16_t *input, int16_t *output) { +static void fdct4(const int16_t *input, int16_t *output) { int16_t step[4]; int temp1, temp2; @@ -102,7 +102,7 @@ void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) { } } -static void fadst4(int16_t *input, int16_t *output) { +static void fadst4(const int16_t *input, int16_t *output) { int x0, x1, x2, x3; int s0, s1, s2, s3, s4, s5, s6, s7; @@ -178,12 +178,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output, } } -void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) { - vp9_short_fdct4x4_c(input, output, pitch); - vp9_short_fdct4x4_c(input + 4, output + 16, pitch); -} - -static void fdct8(int16_t *input, int16_t *output) { +static void fdct8(const int16_t *input, int16_t *output) { /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; /*needs32*/ int t0, t1, t2, t3; /*canbe16*/ int x0, x1, x2, x3; @@ -486,7 +481,7 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) { } } -static void fadst8(int16_t *input, int16_t *output) { +static void fadst8(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0 = input[7]; @@ -647,14 +642,8 @@ void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) { } } -void vp9_short_walsh8x4_c(int16_t *input, int16_t *output, int pitch) { - vp9_short_walsh4x4_c(input, output, pitch); - vp9_short_walsh4x4_c(input + 4, output + 16, pitch); -} - - // Rewrote to use same algorithm as others. -static void fdct16(int16_t in[16], int16_t out[16]) { +static void fdct16(const int16_t in[16], int16_t out[16]) { /*canbe16*/ int step1[8]; /*canbe16*/ int step2[8]; /*canbe16*/ int step3[8]; @@ -795,7 +784,7 @@ static void fdct16(int16_t in[16], int16_t out[16]) { out[15] = dct_const_round_shift(temp2); } -void fadst16(int16_t *input, int16_t *output) { +static void fadst16(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; int x0 = input[15]; @@ -1003,7 +992,7 @@ static INLINE int half_round_shift(int input) { return rv; } -static void dct32_1d(int *input, int *output, int round) { +static void dct32_1d(const int *input, int *output, int round) { int step[32]; // Stage 1 step[0] = input[0] + input[(32 - 1)]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b74609bc2..ac1fd6215 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1853,7 +1853,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { if (lossless) { // printf("Switching to lossless\n"); - cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4; cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; cpi->mb.optimize = 0; @@ -1862,7 +1861,6 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { cpi->common.tx_mode = ONLY_4X4; } else { // printf("Not lossless\n"); - cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 0fc36d98f..a0a7bab27 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -564,7 +564,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan, iscan); if (!x->skip_encode && *eob) - vp9_iht_add_16x16(tx_type, dqcoeff, dst, pd->dst.stride, *eob); + vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); break; case TX_8X8: tx_type = get_tx_type_8x8(pd->plane_type, xd); @@ -589,7 +589,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan, iscan); if (!x->skip_encode && *eob) - vp9_iht_add_8x8(tx_type, dqcoeff, dst, pd->dst.stride, *eob); + vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); break; case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); @@ -623,7 +623,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // case. xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); else - vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); + vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); } break; default: diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0833b4ac8..0afb35f54 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -955,10 +955,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16; cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8; - cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { - cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4; cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 54e60d6e1..eb7ca6b72 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -110,6 +110,7 @@ static int rd_thresh_block_size_factor[BLOCK_SIZES] = #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 #define RD_THRESH_POW 1.25 +#define RD_MULT_EPB_RATIO 64 #define MV_COST_WEIGHT 108 #define MV_COST_WEIGHT_SUB 120 @@ -162,7 +163,17 @@ void vp9_init_me_luts() { static int compute_rd_mult(int qindex) { const int q = vp9_dc_quant(qindex, 0); - return (11 * q * q) >> 2; + // TODO(debargha): Adjust the function below + return (88 * q * q / 25); +} + +static int compute_rd_thresh_factor(int qindex) { + int q; + // TODO(debargha): Adjust the function below + q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); + if (q < 8) + q = 8; + return q; } void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { @@ -172,9 +183,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { static void set_block_thresholds(VP9_COMP *cpi, int qindex) { int q, i, bsize; - q = ((int)pow(vp9_dc_quant(qindex, 0) >> 2, RD_THRESH_POW)) << 2; - if (q < 8) - q = 8; + q = compute_rd_thresh_factor(qindex); for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { for (i = 0; i < MAX_MODES; ++i) { @@ -216,7 +225,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { // cpi->common.refresh_alt_ref_frame) qindex = clamp(qindex, 0, MAXQ); - cpi->RDDIV = 100; + cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) cpi->RDMULT = compute_rd_mult(qindex); if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { if (cpi->twopass.next_iiratio > 31) @@ -225,7 +234,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; } - cpi->mb.errorperbit = cpi->RDMULT >> 6; + cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); vp9_set_speed_features(cpi); @@ -1100,7 +1109,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, goto next; if (tx_type != DCT_DCT) - vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), + vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride, tx_type); else xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride, diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index c86ea2723..aa4068d76 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -12,8 +12,10 @@ #ifndef VP9_ENCODER_VP9_RDOPT_H_ #define VP9_ENCODER_VP9_RDOPT_H_ +#define RDDIV_BITS 7 + #define RDCOST(RM, DM, R, D) \ - (((128 + ((int64_t)R) * (RM)) >> 8) + ((int64_t)DM) * (D)) + (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) #define QIDX_SKIP_THRESH 115 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex); diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c index 667b8012c..eb864d96c 100644 --- a/vp9/encoder/vp9_subexp.c +++ b/vp9/encoder/vp9_subexp.c @@ -221,7 +221,8 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, } void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp, - vp9_prob upd, unsigned int *ct) { + unsigned int *ct) { + const vp9_prob upd = DIFF_UPDATE_PROB; vp9_prob newp = get_binary_prob(ct[0], ct[1]); const int savings = vp9_prob_diff_update_savings_search(ct, *oldp, &newp, upd); diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h index 7acdaf6f1..521c7778d 100644 --- a/vp9/encoder/vp9_subexp.h +++ b/vp9/encoder/vp9_subexp.h @@ -19,7 +19,7 @@ void vp9_write_prob_diff_update(vp9_writer *w, vp9_prob newp, vp9_prob oldp); void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp, - vp9_prob upd, unsigned int *ct); + unsigned int *ct); int vp9_prob_diff_update_savings_search(const unsigned int *ct, vp9_prob oldp, vp9_prob *bestp, diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index ad3d01da9..5e1e5ed4a 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -112,11 +112,6 @@ void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) { } } -void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) { - vp9_short_fdct4x4_sse2(input, output, pitch); - vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch); -} - static INLINE void load_buffer_4x4(int16_t *input, __m128i *in, int stride) { const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 7a5b78634..6b923162f 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -658,8 +658,10 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, if (corrupted) { VP9D_COMP *pbi = (VP9D_COMP *)ctx->pbi; - *corrupted = pbi->common.frame_to_show->corrupted; - + if (pbi) + *corrupted = pbi->common.frame_to_show->corrupted; + else + return VPX_CODEC_ERROR; return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; |