summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_alloccommon.c4
-rw-r--r--vp9/common/vp9_blockd.h46
-rw-r--r--vp9/common/vp9_convolve.c21
-rw-r--r--vp9/common/vp9_debugmodes.c4
-rw-r--r--vp9/common/vp9_entropy.c776
-rw-r--r--vp9/common/vp9_entropy.h26
-rw-r--r--vp9/common/vp9_findnearmv.h12
-rw-r--r--vp9/common/vp9_idctllm.c799
-rw-r--r--vp9/common/vp9_invtrans.c6
-rw-r--r--vp9/common/vp9_onyx.h1
-rw-r--r--vp9/common/vp9_onyxc_int.h6
-rw-r--r--vp9/common/vp9_reconinter.c160
-rw-r--r--vp9/common/vp9_rtcd_defs.sh5
-rw-r--r--vp9/common/vp9_tile_common.c55
-rw-r--r--vp9/common/vp9_tile_common.h26
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c39
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_ssse3.asm239
-rw-r--r--vp9/decoder/vp9_decodemv.c8
-rw-r--r--vp9/decoder/vp9_decodframe.c249
-rw-r--r--vp9/decoder/vp9_dequantize.c4
-rw-r--r--vp9/decoder/vp9_dequantize.h14
-rw-r--r--vp9/decoder/vp9_detokenize.c32
-rw-r--r--vp9/decoder/vp9_idct_blk.c12
-rw-r--r--vp9/decoder/vp9_onyxd_int.h6
-rw-r--r--vp9/encoder/vp9_bitstream.c103
-rw-r--r--vp9/encoder/vp9_block.h12
-rw-r--r--vp9/encoder/vp9_dct.c781
-rw-r--r--vp9/encoder/vp9_encodeframe.c24
-rw-r--r--vp9/encoder/vp9_encodeintra.c10
-rw-r--r--vp9/encoder/vp9_encodemb.c68
-rw-r--r--vp9/encoder/vp9_mcomp.c6
-rw-r--r--vp9/encoder/vp9_onyx_if.c70
-rw-r--r--vp9/encoder/vp9_onyx_int.h2
-rw-r--r--vp9/encoder/vp9_quantize.c84
-rw-r--r--vp9/encoder/vp9_rdopt.c110
-rw-r--r--vp9/encoder/vp9_segmentation.c23
-rw-r--r--vp9/encoder/vp9_temporal_filter.c12
-rw-r--r--vp9/encoder/vp9_tokenize.c29
-rw-r--r--vp9/vp9_common.mk2
-rw-r--r--vp9/vp9_cx_iface.c11
40 files changed, 1015 insertions, 2882 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 01fa63fdb..c3d6dae93 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -219,8 +219,4 @@ void vp9_initialize_common() {
vp9_entropy_mode_init();
vp9_entropy_mv_init();
-
-#if CONFIG_NEWCOEFCONTEXT
- vp9_init_neighbors();
-#endif
}
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 3351e6928..054d58dba 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -47,18 +47,6 @@ void vpx_log(const char *format, ...);
#define MAX_MV_REFS 9
#define MAX_MV_REF_CANDIDATES 4
-#if CONFIG_DWTDCTHYBRID
-#define DWT_MAX_LENGTH 64
-#define DWT_TYPE 26 // 26/53/97
-#define DWT_PRECISION_BITS 2
-#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2)
-
-#define DWTDCT16X16 0
-#define DWTDCT16X16_LEAN 1
-#define DWTDCT8X8 2
-#define DWTDCT_TYPE DWTDCT16X16_LEAN
-#endif
-
typedef struct {
int r, c;
} POS;
@@ -218,10 +206,7 @@ union b_mode_info {
B_PREDICTION_MODE context;
#endif
} as_mode;
- struct {
- int_mv first;
- int_mv second;
- } as_mv;
+ int_mv as_mv[2]; // first, second inter predictor motion vectors
};
typedef enum {
@@ -386,11 +371,28 @@ typedef struct macroblockd {
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
+#if CONFIG_LOSSLESS
+ int lossless;
+#endif
/* Inverse transform function pointers. */
- void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);
- void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);
- void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);
- void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);
+ void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);
+ void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*inv_2ndtxm4x4_1)(int16_t *in, int16_t *out);
+ void (*inv_2ndtxm4x4)(int16_t *in, int16_t *out);
+ void (*itxm_add)(int16_t *input, const int16_t *dq,
+ uint8_t *pred, uint8_t *output, int pitch, int stride);
+ void (*dc_itxm_add)(int16_t *input, const int16_t *dq,
+ uint8_t *pred, uint8_t *output, int pitch, int stride, int dc);
+ void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr,
+ uint8_t *dst_ptr, int pitch, int stride);
+ void (*dc_itxm_add_y_block)(int16_t *q, const int16_t *dq,
+ uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs,
+ const int16_t *dc);
+ void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
+ uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs);
+ void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
+ uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
+ uint16_t *eobs);
struct subpix_fn_table subpix;
@@ -501,6 +503,10 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
int ib = (int)(b - xd->block);
if (ib >= 16)
return tx_type;
+#if CONFIG_LOSSLESS
+ if (xd->lossless)
+ return DCT_DCT;
+#endif
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index f21f1d84e..b87c410df 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -7,12 +7,15 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vp9/common/vp9_convolve.h"
+
#include <assert.h>
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
@@ -293,9 +296,21 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- convolve_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h, 8);
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_avg(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
}
void vp9_convolve_copy(const uint8_t *src, int src_stride,
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 5ea7736b7..1953d60c6 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -129,8 +129,8 @@ void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
fprintf(mvs, "%3d:%-3d ",
- mi[mb_index].bmi[bindex].as_mv.first.as_mv.row,
- mi[mb_index].bmi[bindex].as_mv.first.as_mv.col);
+ mi[mb_index].bmi[bindex].as_mv[0].as_mv.row,
+ mi[mb_index].bmi[bindex].as_mv[0].as_mv.col);
}
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 352e17c0c..e21eaba83 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -143,624 +143,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = {
237, 252, 253, 238, 223, 239, 254, 255,
};
-#if CONFIG_DWTDCTHYBRID
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4, 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 256, 225, 194, 163,
- 132, 101, 70, 39, 8, 9, 40, 71,
- 102, 133, 164, 195, 226, 257, 288, 320,
- 289, 258, 227, 196, 165, 134, 103, 72,
- 41, 10, 11, 42, 73, 104, 135, 166,
- 197, 228, 259, 290, 321, 352, 384, 353,
- 322, 291, 260, 229, 198, 167, 136, 105,
- 74, 43, 12, 13, 44, 75, 106, 137,
- 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262,
- 231, 200, 169, 138, 107, 76, 45, 14,
- 15, 46, 77, 108, 139, 170, 201, 232,
- 263, 294, 325, 356, 387, 418, 449, 480,
- 481, 450, 419, 388, 357, 326, 295, 264,
- 233, 202, 171, 140, 109, 78, 47, 79,
- 110, 141, 172, 203, 234, 265, 296, 327,
- 358, 389, 420, 451, 482, 483, 452, 421,
- 390, 359, 328, 297, 266, 235, 204, 173,
- 142, 111, 143, 174, 205, 236, 267, 298,
- 329, 360, 391, 422, 453, 484, 485, 454,
- 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 207, 238, 269, 300, 331, 362, 393,
- 424, 455, 486, 487, 456, 425, 394, 363,
- 332, 301, 270, 239, 271, 302, 333, 364,
- 395, 426, 457, 488, 489, 458, 427, 396,
- 365, 334, 303, 335, 366, 397, 428, 459,
- 490, 491, 460, 429, 398, 367, 399, 430,
- 461, 492, 493, 462, 431, 463, 494, 495,
-
- 16, 512, 528, 17, 513, 529, 48, 544,
- 560, 80, 576, 592, 49, 545, 561, 18,
- 514, 530, 19, 515, 531, 50, 546, 562,
- 81, 577, 593, 112, 608, 624, 144, 640,
- 656, 113, 609, 625, 82, 578, 594, 51,
- 547, 563, 20, 516, 532, 21, 517, 533,
- 52, 548, 564, 83, 579, 595, 114, 610,
- 626, 145, 641, 657, 176, 672, 688, 208,
- 704, 720, 177, 673, 689, 146, 642, 658,
- 115, 611, 627, 84, 580, 596, 53, 549,
- 565, 22, 518, 534, 23, 519, 535, 54,
- 550, 566, 85, 581, 597, 116, 612, 628,
- 147, 643, 659, 178, 674, 690, 209, 705,
- 721, 240, 736, 752, 272, 768, 784, 241,
- 737, 753, 210, 706, 722, 179, 675, 691,
- 148, 644, 660, 117, 613, 629, 86, 582,
- 598, 55, 551, 567, 24, 520, 536, 25,
- 521, 537, 56, 552, 568, 87, 583, 599,
- 118, 614, 630, 149, 645, 661, 180, 676,
- 692, 211, 707, 723, 242, 738, 754, 273,
- 769, 785, 304, 800, 816, 336, 832, 848,
- 305, 801, 817, 274, 770, 786, 243, 739,
- 755, 212, 708, 724, 181, 677, 693, 150,
- 646, 662, 119, 615, 631, 88, 584, 600,
- 57, 553, 569, 26, 522, 538, 27, 523,
- 539, 58, 554, 570, 89, 585, 601, 120,
- 616, 632, 151, 647, 663, 182, 678, 694,
- 213, 709, 725, 244, 740, 756, 275, 771,
- 787, 306, 802, 818, 337, 833, 849, 368,
- 864, 880, 400, 896, 912, 369, 865, 881,
- 338, 834, 850, 307, 803, 819, 276, 772,
- 788, 245, 741, 757, 214, 710, 726, 183,
-
- 679, 695, 152, 648, 664, 121, 617, 633,
- 90, 586, 602, 59, 555, 571, 28, 524,
- 540, 29, 525, 541, 60, 556, 572, 91,
- 587, 603, 122, 618, 634, 153, 649, 665,
- 184, 680, 696, 215, 711, 727, 246, 742,
- 758, 277, 773, 789, 308, 804, 820, 339,
- 835, 851, 370, 866, 882, 401, 897, 913,
- 432, 928, 944, 464, 960, 976, 433, 929,
- 945, 402, 898, 914, 371, 867, 883, 340,
- 836, 852, 309, 805, 821, 278, 774, 790,
- 247, 743, 759, 216, 712, 728, 185, 681,
- 697, 154, 650, 666, 123, 619, 635, 92,
- 588, 604, 61, 557, 573, 30, 526, 542,
- 31, 527, 543, 62, 558, 574, 93, 589,
- 605, 124, 620, 636, 155, 651, 667, 186,
- 682, 698, 217, 713, 729, 248, 744, 760,
- 279, 775, 791, 310, 806, 822, 341, 837,
- 853, 372, 868, 884, 403, 899, 915, 434,
- 930, 946, 465, 961, 977, 496, 992, 1008,
- 497, 993, 1009, 466, 962, 978, 435, 931,
- 947, 404, 900, 916, 373, 869, 885, 342,
- 838, 854, 311, 807, 823, 280, 776, 792,
- 249, 745, 761, 218, 714, 730, 187, 683,
- 699, 156, 652, 668, 125, 621, 637, 94,
- 590, 606, 63, 559, 575, 95, 591, 607,
- 126, 622, 638, 157, 653, 669, 188, 684,
- 700, 219, 715, 731, 250, 746, 762, 281,
- 777, 793, 312, 808, 824, 343, 839, 855,
- 374, 870, 886, 405, 901, 917, 436, 932,
- 948, 467, 963, 979, 498, 994, 1010, 499,
- 995, 1011, 468, 964, 980, 437, 933, 949,
- 406, 902, 918, 375, 871, 887, 344, 840,
-
- 856, 313, 809, 825, 282, 778, 794, 251,
- 747, 763, 220, 716, 732, 189, 685, 701,
- 158, 654, 670, 127, 623, 639, 159, 655,
- 671, 190, 686, 702, 221, 717, 733, 252,
- 748, 764, 283, 779, 795, 314, 810, 826,
- 345, 841, 857, 376, 872, 888, 407, 903,
- 919, 438, 934, 950, 469, 965, 981, 500,
- 996, 1012, 501, 997, 1013, 470, 966, 982,
- 439, 935, 951, 408, 904, 920, 377, 873,
- 889, 346, 842, 858, 315, 811, 827, 284,
- 780, 796, 253, 749, 765, 222, 718, 734,
- 191, 687, 703, 223, 719, 735, 254, 750,
- 766, 285, 781, 797, 316, 812, 828, 347,
- 843, 859, 378, 874, 890, 409, 905, 921,
- 440, 936, 952, 471, 967, 983, 502, 998,
- 1014, 503, 999, 1015, 472, 968, 984, 441,
- 937, 953, 410, 906, 922, 379, 875, 891,
- 348, 844, 860, 317, 813, 829, 286, 782,
- 798, 255, 751, 767, 287, 783, 799, 318,
- 814, 830, 349, 845, 861, 380, 876, 892,
- 411, 907, 923, 442, 938, 954, 473, 969,
- 985, 504, 1000, 1016, 505, 1001, 1017, 474,
- 970, 986, 443, 939, 955, 412, 908, 924,
- 381, 877, 893, 350, 846, 862, 319, 815,
- 831, 351, 847, 863, 382, 878, 894, 413,
- 909, 925, 444, 940, 956, 475, 971, 987,
- 506, 1002, 1018, 507, 1003, 1019, 476, 972,
- 988, 445, 941, 957, 414, 910, 926, 383,
- 879, 895, 415, 911, 927, 446, 942, 958,
- 477, 973, 989, 508, 1004, 1020, 509, 1005,
- 1021, 478, 974, 990, 447, 943, 959, 479,
- 975, 991, 510, 1006, 1022, 511, 1007, 1023,
-};
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6,
- 6, 6, 6,
- 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4,
- 16, 512, 528,
- 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 256, 225, 194, 163,
- 132, 101, 70, 39, 8, 9, 40, 71,
- 102, 133, 164, 195, 226, 257, 288, 320,
- 289, 258, 227, 196, 165, 134, 103, 72,
- 41, 10, 11, 42, 73, 104, 135, 166,
- 197, 228, 259, 290, 321, 352, 384, 353,
- 322, 291, 260, 229, 198, 167, 136, 105,
- 74, 43, 12, 13, 44, 75, 106, 137,
- 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262,
- 231, 200, 169, 138, 107, 76, 45, 14,
- 15, 46, 77, 108, 139, 170, 201, 232,
- 263, 294, 325, 356, 387, 418, 449, 480,
- 481, 450, 419, 388, 357, 326, 295, 264,
- 233, 202, 171, 140, 109, 78, 47, 79,
- 110, 141, 172, 203, 234, 265, 296, 327,
- 358, 389, 420, 451, 482, 483, 452, 421,
- 390, 359, 328, 297, 266, 235, 204, 173,
- 142, 111, 143, 174, 205, 236, 267, 298,
- 329, 360, 391, 422, 453, 484, 485, 454,
- 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 207, 238, 269, 300, 331, 362, 393,
- 424, 455, 486, 487, 456, 425, 394, 363,
- 332, 301, 270, 239, 271, 302, 333, 364,
- 395, 426, 457, 488, 489, 458, 427, 396,
- 365, 334, 303, 335, 366, 397, 428, 459,
- 490, 491, 460, 429, 398, 367, 399, 430,
- 461, 492, 493, 462, 431, 463, 494, 495,
-
- 17, 513, 529, 48, 544,
- 560, 80, 576, 592, 49, 545, 561, 18,
- 514, 530, 19, 515, 531, 50, 546, 562,
- 81, 577, 593, 112, 608, 624, 144, 640,
- 656, 113, 609, 625, 82, 578, 594, 51,
- 547, 563, 20, 516, 532, 21, 517, 533,
- 52, 548, 564, 83, 579, 595, 114, 610,
- 626, 145, 641, 657, 176, 672, 688, 208,
- 704, 720, 177, 673, 689, 146, 642, 658,
- 115, 611, 627, 84, 580, 596, 53, 549,
- 565, 22, 518, 534, 23, 519, 535, 54,
- 550, 566, 85, 581, 597, 116, 612, 628,
- 147, 643, 659, 178, 674, 690, 209, 705,
- 721, 240, 736, 752, 272, 768, 784, 241,
- 737, 753, 210, 706, 722, 179, 675, 691,
- 148, 644, 660, 117, 613, 629, 86, 582,
- 598, 55, 551, 567, 24, 520, 536, 25,
- 521, 537, 56, 552, 568, 87, 583, 599,
- 118, 614, 630, 149, 645, 661, 180, 676,
- 692, 211, 707, 723, 242, 738, 754, 273,
- 769, 785, 304, 800, 816, 336, 832, 848,
- 305, 801, 817, 274, 770, 786, 243, 739,
- 755, 212, 708, 724, 181, 677, 693, 150,
- 646, 662, 119, 615, 631, 88, 584, 600,
- 57, 553, 569, 26, 522, 538, 27, 523,
- 539, 58, 554, 570, 89, 585, 601, 120,
- 616, 632, 151, 647, 663, 182, 678, 694,
- 213, 709, 725, 244, 740, 756, 275, 771,
- 787, 306, 802, 818, 337, 833, 849, 368,
- 864, 880, 400, 896, 912, 369, 865, 881,
- 338, 834, 850, 307, 803, 819, 276, 772,
- 788, 245, 741, 757, 214, 710, 726, 183,
-
- 679, 695, 152, 648, 664, 121, 617, 633,
- 90, 586, 602, 59, 555, 571, 28, 524,
- 540, 29, 525, 541, 60, 556, 572, 91,
- 587, 603, 122, 618, 634, 153, 649, 665,
- 184, 680, 696, 215, 711, 727, 246, 742,
- 758, 277, 773, 789, 308, 804, 820, 339,
- 835, 851, 370, 866, 882, 401, 897, 913,
- 432, 928, 944, 464, 960, 976, 433, 929,
- 945, 402, 898, 914, 371, 867, 883, 340,
- 836, 852, 309, 805, 821, 278, 774, 790,
- 247, 743, 759, 216, 712, 728, 185, 681,
- 697, 154, 650, 666, 123, 619, 635, 92,
- 588, 604, 61, 557, 573, 30, 526, 542,
- 31, 527, 543, 62, 558, 574, 93, 589,
- 605, 124, 620, 636, 155, 651, 667, 186,
- 682, 698, 217, 713, 729, 248, 744, 760,
- 279, 775, 791, 310, 806, 822, 341, 837,
- 853, 372, 868, 884, 403, 899, 915, 434,
- 930, 946, 465, 961, 977, 496, 992, 1008,
- 497, 993, 1009, 466, 962, 978, 435, 931,
- 947, 404, 900, 916, 373, 869, 885, 342,
- 838, 854, 311, 807, 823, 280, 776, 792,
- 249, 745, 761, 218, 714, 730, 187, 683,
- 699, 156, 652, 668, 125, 621, 637, 94,
- 590, 606, 63, 559, 575, 95, 591, 607,
- 126, 622, 638, 157, 653, 669, 188, 684,
- 700, 219, 715, 731, 250, 746, 762, 281,
- 777, 793, 312, 808, 824, 343, 839, 855,
- 374, 870, 886, 405, 901, 917, 436, 932,
- 948, 467, 963, 979, 498, 994, 1010, 499,
- 995, 1011, 468, 964, 980, 437, 933, 949,
- 406, 902, 918, 375, 871, 887, 344, 840,
-
- 856, 313, 809, 825, 282, 778, 794, 251,
- 747, 763, 220, 716, 732, 189, 685, 701,
- 158, 654, 670, 127, 623, 639, 159, 655,
- 671, 190, 686, 702, 221, 717, 733, 252,
- 748, 764, 283, 779, 795, 314, 810, 826,
- 345, 841, 857, 376, 872, 888, 407, 903,
- 919, 438, 934, 950, 469, 965, 981, 500,
- 996, 1012, 501, 997, 1013, 470, 966, 982,
- 439, 935, 951, 408, 904, 920, 377, 873,
- 889, 346, 842, 858, 315, 811, 827, 284,
- 780, 796, 253, 749, 765, 222, 718, 734,
- 191, 687, 703, 223, 719, 735, 254, 750,
- 766, 285, 781, 797, 316, 812, 828, 347,
- 843, 859, 378, 874, 890, 409, 905, 921,
- 440, 936, 952, 471, 967, 983, 502, 998,
- 1014, 503, 999, 1015, 472, 968, 984, 441,
- 937, 953, 410, 906, 922, 379, 875, 891,
- 348, 844, 860, 317, 813, 829, 286, 782,
- 798, 255, 751, 767, 287, 783, 799, 318,
- 814, 830, 349, 845, 861, 380, 876, 892,
- 411, 907, 923, 442, 938, 954, 473, 969,
- 985, 504, 1000, 1016, 505, 1001, 1017, 474,
- 970, 986, 443, 939, 955, 412, 908, 924,
- 381, 877, 893, 350, 846, 862, 319, 815,
- 831, 351, 847, 863, 382, 878, 894, 413,
- 909, 925, 444, 940, 956, 475, 971, 987,
- 506, 1002, 1018, 507, 1003, 1019, 476, 972,
- 988, 445, 941, 957, 414, 910, 926, 383,
- 879, 895, 415, 911, 927, 446, 942, 958,
- 477, 973, 989, 508, 1004, 1020, 509, 1005,
- 1021, 478, 974, 990, 447, 943, 959, 479,
- 975, 991, 510, 1006, 1022, 511, 1007, 1023,
-};
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5,
- 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4, 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 225, 194, 163, 132,
- 101, 70, 39, 71, 102, 133, 164, 195,
- 226, 227, 196, 165, 134, 103, 135, 166,
- 197, 228, 229, 198, 167, 199, 230, 231,
-
- 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328,
- 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298,
- 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361,
- 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269,
- 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393,
- 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394,
- 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270,
- 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364,
- 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488,
- 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396,
- 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335,
- 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459,
- 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429,
- 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430,
- 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462,
- 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495,
-
- 16, 512, 528, 17, 513, 529, 18, 514,
- 530, 19, 515, 531, 20, 516, 532, 21,
- 517, 533, 22, 518, 534, 23, 519, 535,
- 24, 520, 536, 25, 521, 537, 26, 522,
- 538, 27, 523, 539, 28, 524, 540, 29,
- 525, 541, 30, 526, 542, 31, 527, 543,
- 48, 544, 560, 49, 545, 561, 50, 546,
- 562, 51, 547, 563, 52, 548, 564, 53,
- 549, 565, 54, 550, 566, 55, 551, 567,
- 56, 552, 568, 57, 553, 569, 58, 554,
- 570, 59, 555, 571, 60, 556, 572, 61,
- 557, 573, 62, 558, 574, 63, 559, 575,
- 80, 576, 592, 81, 577, 593, 82, 578,
- 594, 83, 579, 595, 84, 580, 596, 85,
- 581, 597, 86, 582, 598, 87, 583, 599,
- 88, 584, 600, 89, 585, 601, 90, 586,
- 602, 91, 587, 603, 92, 588, 604, 93,
- 589, 605, 94, 590, 606, 95, 591, 607,
- 112, 608, 624, 113, 609, 625, 114, 610,
- 626, 115, 611, 627, 116, 612, 628, 117,
- 613, 629, 118, 614, 630, 119, 615, 631,
- 120, 616, 632, 121, 617, 633, 122, 618,
- 634, 123, 619, 635, 124, 620, 636, 125,
- 621, 637, 126, 622, 638, 127, 623, 639,
- 144, 640, 656, 145, 641, 657, 146, 642,
- 658, 147, 643, 659, 148, 644, 660, 149,
- 645, 661, 150, 646, 662, 151, 647, 663,
- 152, 648, 664, 153, 649, 665, 154, 650,
- 666, 155, 651, 667, 156, 652, 668, 157,
- 653, 669, 158, 654, 670, 159, 655, 671,
- 176, 672, 688, 177, 673, 689, 178, 674,
- 690, 179, 675, 691, 180, 676, 692, 181,
- 677, 693, 182, 678, 694, 183, 679, 695,
- 184, 680, 696, 185, 681, 697, 186, 682,
- 698, 187, 683, 699, 188, 684, 700, 189,
- 685, 701, 190, 686, 702, 191, 687, 703,
- 208, 704, 720, 209, 705, 721, 210, 706,
- 722, 211, 707, 723, 212, 708, 724, 213,
- 709, 725, 214, 710, 726, 215, 711, 727,
- 216, 712, 728, 217, 713, 729, 218, 714,
- 730, 219, 715, 731, 220, 716, 732, 221,
- 717, 733, 222, 718, 734, 223, 719, 735,
- 240, 736, 752, 241, 737, 753, 242, 738,
- 754, 243, 739, 755, 244, 740, 756, 245,
- 741, 757, 246, 742, 758, 247, 743, 759,
- 248, 744, 760, 249, 745, 761, 250, 746,
- 762, 251, 747, 763, 252, 748, 764, 253,
- 749, 765, 254, 750, 766, 255, 751, 767,
- 272, 768, 784, 273, 769, 785, 274, 770,
- 786, 275, 771, 787, 276, 772, 788, 277,
- 773, 789, 278, 774, 790, 279, 775, 791,
- 280, 776, 792, 281, 777, 793, 282, 778,
- 794, 283, 779, 795, 284, 780, 796, 285,
- 781, 797, 286, 782, 798, 287, 783, 799,
- 304, 800, 816, 305, 801, 817, 306, 802,
- 818, 307, 803, 819, 308, 804, 820, 309,
- 805, 821, 310, 806, 822, 311, 807, 823,
- 312, 808, 824, 313, 809, 825, 314, 810,
- 826, 315, 811, 827, 316, 812, 828, 317,
- 813, 829, 318, 814, 830, 319, 815, 831,
- 336, 832, 848, 337, 833, 849, 338, 834,
- 850, 339, 835, 851, 340, 836, 852, 341,
- 837, 853, 342, 838, 854, 343, 839, 855,
- 344, 840, 856, 345, 841, 857, 346, 842,
- 858, 347, 843, 859, 348, 844, 860, 349,
- 845, 861, 350, 846, 862, 351, 847, 863,
- 368, 864, 880, 369, 865, 881, 370, 866,
- 882, 371, 867, 883, 372, 868, 884, 373,
- 869, 885, 374, 870, 886, 375, 871, 887,
- 376, 872, 888, 377, 873, 889, 378, 874,
- 890, 379, 875, 891, 380, 876, 892, 381,
- 877, 893, 382, 878, 894, 383, 879, 895,
- 400, 896, 912, 401, 897, 913, 402, 898,
- 914, 403, 899, 915, 404, 900, 916, 405,
- 901, 917, 406, 902, 918, 407, 903, 919,
- 408, 904, 920, 409, 905, 921, 410, 906,
- 922, 411, 907, 923, 412, 908, 924, 413,
- 909, 925, 414, 910, 926, 415, 911, 927,
- 432, 928, 944, 433, 929, 945, 434, 930,
- 946, 435, 931, 947, 436, 932, 948, 437,
- 933, 949, 438, 934, 950, 439, 935, 951,
- 440, 936, 952, 441, 937, 953, 442, 938,
- 954, 443, 939, 955, 444, 940, 956, 445,
- 941, 957, 446, 942, 958, 447, 943, 959,
- 464, 960, 976, 465, 961, 977, 466, 962,
- 978, 467, 963, 979, 468, 964, 980, 469,
- 965, 981, 470, 966, 982, 471, 967, 983,
- 472, 968, 984, 473, 969, 985, 474, 970,
- 986, 475, 971, 987, 476, 972, 988, 477,
- 973, 989, 478, 974, 990, 479, 975, 991,
- 496, 992, 1008, 497, 993, 1009, 498, 994,
- 1010, 499, 995, 1011, 500, 996, 1012, 501,
- 997, 1013, 502, 998, 1014, 503, 999, 1015,
- 504, 1000, 1016, 505, 1001, 1017, 506, 1002,
- 1018, 507, 1003, 1019, 508, 1004, 1020, 509,
- 1005, 1021, 510, 1006, 1022, 511, 1007, 1023,
-};
-#endif
-
-#else
-
DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
@@ -865,7 +247,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892,
923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023,
};
-#endif // CONFIG_DWTDCTHYBRID
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
@@ -937,145 +318,28 @@ vp9_extra_bit_struct vp9_extra_bits[12] = {
#include "vp9/common/vp9_default_coef_probs.h"
-#if CONFIG_NEWCOEFCONTEXT
-
-// Neighborhood 5-tuples for various scans and blocksizes,
-// in {top, left, topleft, topright, bottomleft} order
-// for each position in raster scan order.
-// -1 indicates the neighbor does not exist.
-DECLARE_ALIGNED(16, int,
- vp9_default_zig_zag1d_4x4_neighbors[16 * MAX_NEIGHBORS]);
-DECLARE_ALIGNED(16, int,
- vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
-DECLARE_ALIGNED(16, int,
- vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
-DECLARE_ALIGNED(16, int,
- vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]);
-DECLARE_ALIGNED(16, int,
- vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]);
-DECLARE_ALIGNED(16, int,
- vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]);
-
-static int find_in_scan(const int *scan, int l, int m) {
- int i, l2 = l * l;
- for (i = 0; i < l2; ++i) {
- if (scan[i] == m)
- return i;
- }
- return -1;
-}
-
-static void init_scan_neighbors(const int *scan, int l, int *neighbors) {
- int l2 = l * l;
- int m, n, i, j, k;
- for (n = 0; n < l2; ++n) {
- int locn = find_in_scan(scan, l, n);
- int z = -1;
- i = n / l;
- j = n % l;
- for (k = 0; k < MAX_NEIGHBORS; ++k)
- neighbors[MAX_NEIGHBORS * n + k] = -1;
- if (i - 1 >= 0) {
- m = (i - 1) * l + j;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n] = m;
- if (m == 0) z = 0;
- }
- }
- if (j - 1 >= 0) {
- m = i * l + j - 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 1] = m;
- if (m == 0) z = 1;
- }
- }
- if (i - 1 >= 0 && j - 1 >= 0) {
- m = (i - 1) * l + j - 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 2] = m;
- if (m == 0) z = 2;
- }
- }
- if (i - 1 >= 0 && j + 1 < l) {
- m = (i - 1) * l + j + 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 3] = m;
- if (m == 0) z = 3;
- }
- }
- if (i + 1 < l && j - 1 >= 0) {
- m = (i + 1) * l + j - 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 4] = m;
- if (m == 0) z = 4;
- }
- }
- if (z != -1) { // zero exists
- int v = 0;
- for (k = 0; k < MAX_NEIGHBORS; ++k)
- v += (neighbors[MAX_NEIGHBORS * n + k] > 0);
- if (v) {
- neighbors[MAX_NEIGHBORS * n + z] = -1;
- }
- }
- }
-}
-
-void vp9_init_neighbors() {
- init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4,
- vp9_default_zig_zag1d_4x4_neighbors);
- init_scan_neighbors(vp9_row_scan_4x4, 4,
- vp9_row_scan_4x4_neighbors);
- init_scan_neighbors(vp9_col_scan_4x4, 4,
- vp9_col_scan_4x4_neighbors);
- init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8,
- vp9_default_zig_zag1d_8x8_neighbors);
- init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16,
- vp9_default_zig_zag1d_16x16_neighbors);
- init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32,
- vp9_default_zig_zag1d_32x32_neighbors);
-}
-
-const int *vp9_get_coef_neighbors_handle(const int *scan) {
- if (scan == vp9_default_zig_zag1d_4x4) {
- return vp9_default_zig_zag1d_4x4_neighbors;
- } else if (scan == vp9_row_scan_4x4) {
- return vp9_row_scan_4x4_neighbors;
- } else if (scan == vp9_col_scan_4x4) {
- return vp9_col_scan_4x4_neighbors;
- } else if (scan == vp9_default_zig_zag1d_8x8) {
- return vp9_default_zig_zag1d_8x8_neighbors;
- } else if (scan == vp9_default_zig_zag1d_16x16) {
- return vp9_default_zig_zag1d_16x16_neighbors;
- } else if (scan == vp9_default_zig_zag1d_32x32) {
- return vp9_default_zig_zag1d_32x32_neighbors;
+// This function updates and then returns n AC coefficient context
+// This is currently a placeholder function to allow experimentation
+// using various context models based on the energy earlier tokens
+// within the current block.
+//
+// For now it just returns the previously used context.
+int vp9_get_coef_context(int * recent_energy, int token) {
+ // int token_energy;
+ // int av_energy;
+
+ // Placeholder code for experiments with token energy
+ // as a coefficient context.
+ /*token_energy = ((token != DCT_EOB_TOKEN) ? token : 0);
+ if (token_energy) {
+ av_energy = (token_energy + *recent_energy + 1) >> 1;
+ } else {
+ av_energy = 0;
}
- return vp9_default_zig_zag1d_4x4_neighbors;
-}
+ *recent_energy = token_energy;*/
-int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc,
- const int *neigbor_handle, int rc) {
- static int neighbors_used = MAX_NEIGHBORS; // maximum is MAX_NEIGHBORS
- const int *nb = neigbor_handle + rc * MAX_NEIGHBORS;
- int i, v, val = 0, n = 0;
- for (i = 0; i < neighbors_used; ++i) {
- if (nb[i] == -1 || (nb[i] == 0 && nodc)) {
- continue;
- }
- v = abs(qcoeff_ptr[nb[i]]);
- val = (v > val ? v : val);
- n++;
- }
- if (n == 0)
- return 0;
- else if (val <= 1)
- return val;
- else if (val < 4)
- return 2;
- else
- return 3;
-}
-#endif /* CONFIG_NEWCOEFCONTEXT */
+ return vp9_prev_token_class[token];
+};
void vp9_default_coef_probs(VP9_COMMON *pc) {
vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4,
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 84e5255c2..1979638d4 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -106,9 +106,6 @@ typedef vp9_prob vp9_coeff_probs[COEF_BANDS][PREV_COEF_CONTEXTS]
#define SUBEXP_PARAM 4 /* Subexponential code parameter */
#define MODULUS_PARAM 13 /* Modulus parameter */
-extern DECLARE_ALIGNED(16, const uint8_t,
- vp9_prev_token_class[MAX_ENTROPY_TOKENS]);
-
struct VP9Common;
void vp9_default_coef_probs(struct VP9Common *);
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]);
@@ -129,26 +126,5 @@ static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
-#if CONFIG_NEWCOEFCONTEXT
-
-#define MAX_NEIGHBORS 5
-#define NEWCOEFCONTEXT_BAND_COND(b) ((b) >= 1)
-void vp9_init_neighbors(void);
-
-const int *vp9_get_coef_neighbors_handle(const int *scan);
-int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc,
- const int *neigbor_handle, int rc);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_4x4_neighbors[
- 16 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_row_scan_4x4_neighbors[
- 16 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_col_scan_4x4_neighbors[
- 16 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[
- 64 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[
- 256 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[
- 1024 * MAX_NEIGHBORS]);
-#endif // CONFIG_NEWCOEFCONTEXT
+extern int vp9_get_coef_context(int * recent_energy, int token);
#endif // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index 74fce7aad..c42aab1a5 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -98,7 +98,7 @@ static int left_block_mv(const MACROBLOCKD *xd,
b += 4;
}
- return (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+ return (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
static int left_block_second_mv(const MACROBLOCKD *xd,
@@ -117,8 +117,8 @@ static int left_block_second_mv(const MACROBLOCKD *xd,
}
return cur_mb->mbmi.second_ref_frame > 0 ?
- (cur_mb->bmi + b - 1)->as_mv.second.as_int :
- (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+ (cur_mb->bmi + b - 1)->as_mv[1].as_int :
+ (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
@@ -131,7 +131,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
b += 16;
}
- return (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+ return (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
@@ -146,8 +146,8 @@ static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
}
return cur_mb->mbmi.second_ref_frame > 0 ?
- (cur_mb->bmi + b - 4)->as_mv.second.as_int :
- (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+ (cur_mb->bmi + b - 4)->as_mv[1].as_int :
+ (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 2f847dc78..2fec98e50 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -476,12 +476,13 @@ void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) {
}
}
-void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
+void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,
uint8_t *dst_ptr,
int pitch, int stride) {
int r, c;
- short tmp[16];
- vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);
+ int16_t dc = input_dc;
+ int16_t tmp[16];
+ vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1);
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
@@ -1152,8 +1153,6 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
*output = (out + 32) >> 6;
}
-
-#if !CONFIG_DWTDCTHYBRID
void idct32_1d(int16_t *input, int16_t *output) {
int16_t step1[32], step2[32];
int temp1, temp2;
@@ -1521,7 +1520,6 @@ void idct32_1d(int16_t *input, int16_t *output) {
output[31] = step1[0] - step1[31];
}
-
void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
int16_t out[32 * 32];
int16_t *outptr = &out[0];
@@ -1554,792 +1552,3 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
out = dct_const_round_shift(tmp);
*output = (out + 32) >> 6;
}
-
-#else // !CONFIG_DWTDCTHYBRID
-
-#if DWT_TYPE == 53
-
-// Note: block length must be even for this implementation
-static void synthesis_53_row(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, *a, *b;
- int n;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ -= (r + (*b) + 1) >> 1;
- r = *b++;
- }
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *x++ = ((r = *a++) + 1) >> 1;
- *x++ = *b++ + ((r + (*a) + 2) >> 2);
- }
- *x++ = ((r = *a) + 1) >> 1;
- *x++ = *b + ((r + 1) >> 1);
-}
-
-static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, *a, *b;
- int n;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ -= (r + (*b) + 1) >> 1;
- r = *b++;
- }
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- r = *a++;
- *x++ = r;
- *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
- }
- *x++ = *a;
- *x++ = ((*b) << 1) + *a;
-}
-
-static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
- synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
- }
- }
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
- ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :
- -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);
- }
- }
-}
-
-#elif DWT_TYPE == 26
-
-// Note: block length must be even for this implementation
-static void synthesis_26_row(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, s, *a, *b;
- int i, n = length >> 1;
-
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ += (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b += (r - *a + 4) >> 3;
- }
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- s = *b++;
- r = *a++;
- *x++ = (r + s + 1) >> 1;
- *x++ = (r - s + 1) >> 1;
- }
-}
-
-static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, s, *a, *b;
- int i, n = length >> 1;
-
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ += (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b += (r - *a + 4) >> 3;
- }
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- s = *b++;
- r = *a++;
- *x++ = r + s;
- *x++ = r - s;
- }
-}
-
-static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- int16_t buffer[2 * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
- synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
- }
- }
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
- ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :
- -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);
- }
- }
-}
-
-#elif DWT_TYPE == 97
-
-static void synthesis_97(int length, double *lowpass, double *highpass,
- double *x) {
- static const double a_predict1 = -1.586134342;
- static const double a_update1 = -0.05298011854;
- static const double a_predict2 = 0.8829110762;
- static const double a_update2 = 0.4435068522;
- static const double s_low = 1.149604398;
- static const double s_high = 1/1.149604398;
- static const double inv_s_low = 1 / s_low;
- static const double inv_s_high = 1 / s_high;
- int i;
- double y[DWT_MAX_LENGTH];
- // Undo pack and scale
- for (i = 0; i < length / 2; i++) {
- y[i * 2] = lowpass[i] * inv_s_low;
- y[i * 2 + 1] = highpass[i] * inv_s_high;
- }
- memcpy(x, y, sizeof(*y) * length);
- // Undo update 2
- for (i = 2; i < length; i += 2) {
- x[i] -= a_update2 * (x[i-1] + x[i+1]);
- }
- x[0] -= 2 * a_update2 * x[1];
- // Undo predict 2
- for (i = 1; i < length - 2; i += 2) {
- x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] -= 2 * a_predict2 * x[length - 2];
- // Undo update 1
- for (i = 2; i < length; i += 2) {
- x[i] -= a_update1 * (x[i - 1] + x[i + 1]);
- }
- x[0] -= 2 * a_update1 * x[1];
- // Undo predict 1
- for (i = 1; i < length - 2; i += 2) {
- x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] -= 2 * a_predict1 * x[length - 2];
-}
-
-static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- double buffer[2 * DWT_MAX_LENGTH];
- double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_97(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
- synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);
- }
- }
- for (i = 0; i < height; i++)
- for (j = 0; j < width; j++)
- x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /
- (1 << DWT_PRECISION_BITS));
-}
-
-#endif // DWT_TYPE
-
-// TODO(debargha): Implement scaling differently so as not to have to use the
-// floating point 16x16 dct
-static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
-
- // step 1 and 2
- step[ 0] = input[0] + input[8];
- step[ 1] = input[0] - input[8];
-
- temp1 = input[4]*C12;
- temp2 = input[12]*C4;
-
- temp1 -= temp2;
- temp1 *= C8;
-
- step[ 2] = 2*(temp1);
-
- temp1 = input[4]*C4;
- temp2 = input[12]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- step[ 3] = 2*(temp1);
-
- temp1 = input[2]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] + input[10];
-
- step[ 4] = temp1 + temp2;
- step[ 5] = temp1 - temp2;
-
- temp1 = input[14]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] - input[10];
-
- step[ 6] = temp2 - temp1;
- step[ 7] = temp2 + temp1;
-
- // for odd input
- temp1 = input[3]*C12;
- temp2 = input[13]*C4;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[ 8] = 2*(temp1);
-
- temp1 = input[3]*C4;
- temp2 = input[13]*C12;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[ 9] = 2*(temp2);
-
- intermediate[10] = 2*(input[9]*C8);
- intermediate[11] = input[15] - input[1];
- intermediate[12] = input[15] + input[1];
- intermediate[13] = 2*((input[7]*C8));
-
- temp1 = input[11]*C12;
- temp2 = input[5]*C4;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[14] = 2*(temp2);
-
- temp1 = input[11]*C4;
- temp2 = input[5]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[15] = 2*(temp1);
-
- step[ 8] = intermediate[ 8] + intermediate[14];
- step[ 9] = intermediate[ 9] + intermediate[15];
- step[10] = intermediate[10] + intermediate[11];
- step[11] = intermediate[10] - intermediate[11];
- step[12] = intermediate[12] + intermediate[13];
- step[13] = intermediate[12] - intermediate[13];
- step[14] = intermediate[ 8] - intermediate[14];
- step[15] = intermediate[ 9] - intermediate[15];
-
- // step 3
- output[0] = step[ 0] + step[ 3];
- output[1] = step[ 1] + step[ 2];
- output[2] = step[ 1] - step[ 2];
- output[3] = step[ 0] - step[ 3];
-
- temp1 = step[ 4]*C14;
- temp2 = step[ 7]*C2;
- temp1 -= temp2;
- output[4] = (temp1);
-
- temp1 = step[ 4]*C2;
- temp2 = step[ 7]*C14;
- temp1 += temp2;
- output[7] = (temp1);
-
- temp1 = step[ 5]*C10;
- temp2 = step[ 6]*C6;
- temp1 -= temp2;
- output[5] = (temp1);
-
- temp1 = step[ 5]*C6;
- temp2 = step[ 6]*C10;
- temp1 += temp2;
- output[6] = (temp1);
-
- output[8] = step[ 8] + step[11];
- output[9] = step[ 9] + step[10];
- output[10] = step[ 9] - step[10];
- output[11] = step[ 8] - step[11];
- output[12] = step[12] + step[15];
- output[13] = step[13] + step[14];
- output[14] = step[13] - step[14];
- output[15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[0] + output[7];
- step[ 1] = output[1] + output[6];
- step[ 2] = output[2] + output[5];
- step[ 3] = output[3] + output[4];
- step[ 4] = output[3] - output[4];
- step[ 5] = output[2] - output[5];
- step[ 6] = output[1] - output[6];
- step[ 7] = output[0] - output[7];
-
- temp1 = output[8]*C7;
- temp2 = output[15]*C9;
- temp1 -= temp2;
- step[ 8] = (temp1);
-
- temp1 = output[9]*C11;
- temp2 = output[14]*C5;
- temp1 += temp2;
- step[ 9] = (temp1);
-
- temp1 = output[10]*C3;
- temp2 = output[13]*C13;
- temp1 -= temp2;
- step[10] = (temp1);
-
- temp1 = output[11]*C15;
- temp2 = output[12]*C1;
- temp1 += temp2;
- step[11] = (temp1);
-
- temp1 = output[11]*C1;
- temp2 = output[12]*C15;
- temp2 -= temp1;
- step[12] = (temp2);
-
- temp1 = output[10]*C13;
- temp2 = output[13]*C3;
- temp1 += temp2;
- step[13] = (temp1);
-
- temp1 = output[9]*C5;
- temp2 = output[14]*C11;
- temp2 -= temp1;
- step[14] = (temp2);
-
- temp1 = output[8]*C9;
- temp2 = output[15]*C7;
- temp1 += temp2;
- step[15] = (temp1);
-
- // step 5
- output[0] = (step[0] + step[15]);
- output[1] = (step[1] + step[14]);
- output[2] = (step[2] + step[13]);
- output[3] = (step[3] + step[12]);
- output[4] = (step[4] + step[11]);
- output[5] = (step[5] + step[10]);
- output[6] = (step[6] + step[ 9]);
- output[7] = (step[7] + step[ 8]);
-
- output[15] = (step[0] - step[15]);
- output[14] = (step[1] - step[14]);
- output[13] = (step[2] - step[13]);
- output[12] = (step[3] - step[12]);
- output[11] = (step[4] - step[11]);
- output[10] = (step[5] - step[10]);
- output[9] = (step[6] - step[ 9]);
- output[8] = (step[7] - step[ 8]);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch,
- int scale) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double out[16*16], out2[16*16];
- const int short_pitch = pitch >> 1;
- int i, j;
- // First transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j + i*short_pitch];
- butterfly_16x16_idct_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out[j + i*16] = temp_out[j];
- }
- // Then transform columns
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j*16 + i];
- butterfly_16x16_idct_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out2[j*16 + i] = temp_out[j];
- }
- for (i = 0; i < 16*16; ++i)
- output[i] = round(out2[i] / (128 >> scale));
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void idct8_1d_f(double *x) {
- int i, j;
- double t[8];
- static const double idctmat[64] = {
- 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127,
- 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064,
- 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064,
- -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098,
- 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162,
- -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127,
- 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098,
- 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162,
- 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098,
- 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162,
- 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161,
- -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127,
- 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065,
- -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098,
- 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127,
- 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064
- };
- for (i = 0; i < 8; ++i) {
- t[i] = 0;
- for (j = 0; j < 8; ++j)
- t[i] += idctmat[i * 8 + j] * x[j];
- }
- for (i = 0; i < 8; ++i) {
- x[i] = t[i];
- }
-}
-
-static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch,
- int scale) {
- double X[8 * 8], Y[8];
- int i, j;
- int shortpitch = pitch >> 1;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- X[i * 8 + j] = (double)coefs[i * shortpitch + j];
- }
- }
- for (i = 0; i < 8; i++)
- idct8_1d_f(X + 8 * i);
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; ++j)
- Y[j] = X[i + 8 * j];
- idct8_1d_f(Y);
- for (j = 0; j < 8; ++j)
- X[i + 8 * j] = Y[j];
- }
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale));
- }
- }
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n))
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[8 * 8];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct8x8_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8,
- sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8,
- sizeof(*buffer2) * 8);
- }
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#endif
-
-#if CONFIG_TX64X64
-void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 64x64 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[64 * 64];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16);
- }
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 16; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
-#elif DWTDCT_TYPE == DWTDCT16X16
- vp9_short_idct16x16_c_f(input + 16, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
-
- // Copying and scaling highest bands into buffer2
- for (i = 0; i < 32; ++i) {
- for (j = 32; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 32; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
-#endif // DWTDCT_TYPE
-
-#if DWT_TYPE == 26
- dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64);
-#endif
-}
-#endif // CONFIG_TX64X64
-#endif // !CONFIG_DWTDCTHYBRID
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index e7cfe207b..cb9a3db63 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -32,9 +32,9 @@ static void recon_dcblock_8x8(MACROBLOCKD *xd) {
void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
BLOCKD *b = &xd->block[block];
if (b->eob <= 1)
- xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
+ xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
else
- xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
+ xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
}
void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
@@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
if (has_2nd_order) {
/* do 2nd order transform on the dc block */
- vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff);
+ xd->inv_2ndtxm4x4(blockd[24].dqcoeff, blockd[24].diff);
recon_dcblock(xd);
}
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 0b7d98a58..d93b7d5fb 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -177,6 +177,7 @@ extern "C"
int arnr_type;
int tile_columns;
+ int tile_rows;
struct vpx_fixed_buf two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 5e57228b4..6295514ea 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -279,8 +279,10 @@ typedef struct VP9Common {
int error_resilient_mode;
int frame_parallel_decoding_mode;
- int tile_columns;
- int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_idx;
+ int tile_columns, log2_tile_columns;
+ int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_col_idx;
+ int tile_rows, log2_tile_rows;
+ int cur_tile_mb_row_start, cur_tile_mb_row_end, cur_tile_row_idx;
} VP9_COMMON;
static int get_free_fb(VP9_COMMON *cm) {
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index d4435d872..b75525e2c 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -154,7 +154,7 @@ void vp9_build_inter_predictors_b(BLOCKD *d, int pitch,
int_mv mv;
ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ mv.as_int = d->bmi.as_mv[0].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -179,7 +179,7 @@ void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
int_mv mv;
ptr_base = *(d->base_second_pre);
- mv.as_int = d->bmi.as_mv.second.as_int;
+ mv.as_int = d->bmi.as_mv[1].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -197,7 +197,7 @@ void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
int_mv mv;
ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ mv.as_int = d->bmi.as_mv[0].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -222,7 +222,7 @@ void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
int_mv mv;
ptr_base = *(d->base_second_pre);
- mv.as_int = d->bmi.as_mv.second.as_int;
+ mv.as_int = d->bmi.as_mv[1].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -240,7 +240,7 @@ static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
int_mv mv;
ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ mv.as_int = d->bmi.as_mv[0].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -264,38 +264,38 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
int voffset = 20 + i * 2 + j;
int temp;
- temp = blockd[yoffset ].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 1].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 4].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 5].bmi.as_mv.first.as_mv.row;
+ temp = blockd[yoffset ].bmi.as_mv[0].as_mv.row
+ + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row
+ + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row
+ + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row;
if (temp < 0) temp -= 4;
else temp += 4;
- xd->block[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
+ xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = blockd[yoffset ].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 1].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 4].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 5].bmi.as_mv.first.as_mv.col;
+ temp = blockd[yoffset ].bmi.as_mv[0].as_mv.col
+ + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col
+ + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col
+ + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col;
if (temp < 0) temp -= 4;
else temp += 4;
- blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
- blockd[voffset].bmi.as_mv.first.as_mv.row =
- blockd[uoffset].bmi.as_mv.first.as_mv.row;
- blockd[voffset].bmi.as_mv.first.as_mv.col =
- blockd[uoffset].bmi.as_mv.first.as_mv.col;
+ blockd[voffset].bmi.as_mv[0].as_mv.row =
+ blockd[uoffset].bmi.as_mv[0].as_mv.row;
+ blockd[voffset].bmi.as_mv[0].as_mv.col =
+ blockd[uoffset].bmi.as_mv[0].as_mv.col;
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- temp = blockd[yoffset ].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 1].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 4].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 5].bmi.as_mv.second.as_mv.row;
+ temp = blockd[yoffset ].bmi.as_mv[1].as_mv.row
+ + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row
+ + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row
+ + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row;
if (temp < 0) {
temp -= 4;
@@ -303,13 +303,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = blockd[yoffset ].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 1].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 4].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 5].bmi.as_mv.second.as_mv.col;
+ temp = blockd[yoffset ].bmi.as_mv[1].as_mv.col
+ + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col
+ + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col
+ + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col;
if (temp < 0) {
temp -= 4;
@@ -317,13 +317,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
- blockd[voffset].bmi.as_mv.second.as_mv.row =
- blockd[uoffset].bmi.as_mv.second.as_mv.row;
- blockd[voffset].bmi.as_mv.second.as_mv.col =
- blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ blockd[voffset].bmi.as_mv[1].as_mv.row =
+ blockd[uoffset].bmi.as_mv[1].as_mv.row;
+ blockd[voffset].bmi.as_mv[1].as_mv.col =
+ blockd[uoffset].bmi.as_mv[1].as_mv.col;
}
}
}
@@ -332,7 +332,7 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int)
build_inter_predictors2b(xd, d0, 8);
else {
vp9_build_inter_predictors_b(d0, 8, &xd->subpix);
@@ -717,15 +717,15 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
blockd[10].bmi = xd->mode_info_context->bmi[10];
if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[0].as_mv, xd);
if (mbmi->second_ref_frame > 0) {
- clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[1].as_mv, xd);
}
}
@@ -750,15 +750,15 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[0].as_mv, xd);
if (mbmi->second_ref_frame > 0) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[1].as_mv, xd);
}
}
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int)
build_inter_predictors2b(xd, d0, 16);
else {
vp9_build_inter_predictors_b(d0, 16, &xd->subpix);
@@ -776,7 +776,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int)
build_inter_predictors2b(xd, d0, 8);
else {
vp9_build_inter_predictors_b(d0, 8, &xd->subpix);
@@ -803,44 +803,44 @@ void build_4x4uvmvs(MACROBLOCKD *xd) {
int temp;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.row;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.row;
if (temp < 0) temp -= 4;
else temp += 4;
- blockd[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.col;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.col;
if (temp < 0) temp -= 4;
else temp += 4;
- blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
// if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+ clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd);
// if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+ clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd);
- blockd[voffset].bmi.as_mv.first.as_mv.row =
- blockd[uoffset].bmi.as_mv.first.as_mv.row;
- blockd[voffset].bmi.as_mv.first.as_mv.col =
- blockd[uoffset].bmi.as_mv.first.as_mv.col;
+ blockd[voffset].bmi.as_mv[0].as_mv.row =
+ blockd[uoffset].bmi.as_mv[0].as_mv.row;
+ blockd[voffset].bmi.as_mv[0].as_mv.col =
+ blockd[uoffset].bmi.as_mv[0].as_mv.col;
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.row;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.row;
if (temp < 0) {
temp -= 4;
@@ -848,13 +848,13 @@ void build_4x4uvmvs(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.col;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.col;
if (temp < 0) {
temp -= 4;
@@ -862,21 +862,21 @@ void build_4x4uvmvs(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
// if (mbmi->need_to_clamp_mvs)
clamp_uvmv_to_umv_border(
- &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+ &blockd[uoffset].bmi.as_mv[1].as_mv, xd);
// if (mbmi->need_to_clamp_mvs)
clamp_uvmv_to_umv_border(
- &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+ &blockd[uoffset].bmi.as_mv[1].as_mv, xd);
- blockd[voffset].bmi.as_mv.second.as_mv.row =
- blockd[uoffset].bmi.as_mv.second.as_mv.row;
- blockd[voffset].bmi.as_mv.second.as_mv.col =
- blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ blockd[voffset].bmi.as_mv[1].as_mv.row =
+ blockd[uoffset].bmi.as_mv[1].as_mv.row;
+ blockd[voffset].bmi.as_mv[1].as_mv.col =
+ blockd[uoffset].bmi.as_mv[1].as_mv.col;
}
}
}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 066989272..3bd1f250f 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -329,10 +329,15 @@ specialize vp9_dc_only_idct_add
if [ "$CONFIG_LOSSLESS" = "yes" ]; then
prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_inv_walsh4x4_1_x8
prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_inv_walsh4x4_x8
prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
+specialize vp9_dc_only_inv_walsh_add
prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out"
+specialize vp9_short_inv_walsh4x4_1_lossless
prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out"
+specialize vp9_short_inv_walsh4x4_lossless
fi
prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad"
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
new file mode 100644
index 000000000..29f89b618
--- /dev/null
+++ b/vp9/common/vp9_tile_common.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_tile_common.h"
+
+static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
+ int *max_tile_off, int tile_idx,
+ int log2_n_tiles, int n_mbs) {
+ const int n_sbs = (n_mbs + 3) >> 2;
+ const int sb_off1 = (tile_idx * n_sbs) >> log2_n_tiles;
+ const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;
+
+ *min_tile_off = (sb_off1 << 2) > n_mbs ? n_mbs : (sb_off1 << 2);
+ *max_tile_off = (sb_off2 << 2) > n_mbs ? n_mbs : (sb_off2 << 2);
+}
+
+void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx) {
+ cm->cur_tile_col_idx = tile_col_idx;
+ vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start,
+ &cm->cur_tile_mb_col_end, tile_col_idx,
+ cm->log2_tile_columns, cm->mb_cols);
+}
+
+void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) {
+ cm->cur_tile_row_idx = tile_row_idx;
+ vp9_get_tile_offsets(cm, &cm->cur_tile_mb_row_start,
+ &cm->cur_tile_mb_row_end, tile_row_idx,
+ cm->log2_tile_rows, cm->mb_rows);
+}
+
+#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
+#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
+
+void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,
+ int *delta_log2_n_tiles) {
+ const int sb_cols = (cm->mb_cols + 3) >> 2;
+ int min_log2_n_tiles, max_log2_n_tiles;
+
+ for (max_log2_n_tiles = 0;
+ (sb_cols >> max_log2_n_tiles) >= MIN_TILE_WIDTH_SBS;
+ max_log2_n_tiles++) {}
+ for (min_log2_n_tiles = 0;
+ (MAX_TILE_WIDTH_SBS << min_log2_n_tiles) < sb_cols;
+ min_log2_n_tiles++) {}
+
+ *min_log2_n_tiles_ptr = min_log2_n_tiles;
+ *delta_log2_n_tiles = max_log2_n_tiles - min_log2_n_tiles;
+}
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
new file mode 100644
index 000000000..92bf50897
--- /dev/null
+++ b/vp9/common/vp9_tile_common.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_TILE_COMMON_H_
+#define VP9_COMMON_VP9_TILE_COMMON_H_
+
+#include "vp9/common/vp9_onyxc_int.h"
+
+#define MIN_TILE_WIDTH 256
+#define MAX_TILE_WIDTH 4096
+
+extern void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx);
+
+extern void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx);
+
+extern void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles,
+ int *delta_log2_n_tiles);
+
+#endif // VP9_COMMON_VP9_TILE_COMMON_H_
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 3e2346f29..fbc95b6ce 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -65,6 +65,20 @@ void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
unsigned int output_height,
const short *filter);
+void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
@@ -87,6 +101,14 @@ void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
dst += 8;
w -= 8;
}
+ while (w >= 4) {
+ vp9_filter_block1d4_h8_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
if (w) {
vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
@@ -117,6 +139,14 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride,
dst += 8;
w -= 8;
}
+ while (w >= 4) {
+ vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
if (w) {
vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
@@ -156,6 +186,15 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
h, filter_y);
return;
}
+ if (w == 4) {
+ vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d4_v8_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
+ }
}
vp9_convolve8_c(src, src_stride, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index c6d65e904..5f039454a 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -30,6 +30,124 @@
; unsigned int output_height,
; short *filter
;)
+global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+ lea rbx, [rdx + rdx*4]
+ add rbx, rdx ;pitch * 6
+
+.vp9_filter_block1d4_v8_ssse3_loop:
+ movd xmm0, [rsi] ;A
+ movd xmm1, [rsi + rdx] ;B
+ movd xmm2, [rsi + rdx * 2] ;C
+ movd xmm3, [rax + rdx * 2] ;D
+ movd xmm4, [rsi + rdx * 4] ;E
+ movd xmm5, [rax + rdx * 4] ;F
+
+ punpcklbw xmm0, xmm1 ;A B
+ punpcklbw xmm2, xmm3 ;C D
+ punpcklbw xmm4, xmm5 ;E F
+
+ movd xmm6, [rsi + rbx] ;G
+ movd xmm7, [rax + rbx] ;H
+
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm2, k2k3
+ punpcklbw xmm6, xmm7 ;G H
+ pmaddubsw xmm4, k4k5
+ pmaddubsw xmm6, k6k7
+
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ paddsw xmm4, xmm6
+ paddsw xmm0, xmm4
+
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ add rsi, rdx
+ add rax, rdx
+
+ movd [rdi], xmm0
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d4_v8_ssse3_loop
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d8_v8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; short *filter
+;)
global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
sym(vp9_filter_block1d8_v8_ssse3):
push rbp
@@ -289,6 +407,110 @@ sym(vp9_filter_block1d16_v8_ssse3):
pop rbp
ret
+;void vp9_filter_block1d4_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+.filter_block1d4_h8_rowloop_ssse3:
+ movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
+
+ movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
+ punpcklqdq xmm0, xmm3
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf_t0t1)]
+ pmaddubsw xmm0, k0k1
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf_t2t3)]
+ pmaddubsw xmm1, k2k3
+
+ movdqa xmm4, xmm2
+ pshufb xmm2, [GLOBAL(shuf_t4t5)]
+ pmaddubsw xmm2, k4k5
+
+ pshufb xmm4, [GLOBAL(shuf_t6t7)]
+ pmaddubsw xmm4, k6k7
+
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ lea rsi, [rsi + rax]
+ movd [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz .filter_block1d4_h8_rowloop_ssse3
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
;void vp9_filter_block1d8_h8_ssse3
;(
; unsigned char *src_ptr,
@@ -340,7 +562,7 @@ sym(vp9_filter_block1d8_h8_ssse3):
pshufd xmm5, xmm5, 0
movdqa k4k5, xmm2
movdqa k6k7, xmm3
-; movdqa krd, xmm5
+ movdqa krd, xmm5
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rdx, dword ptr arg(3) ;output_pitch
@@ -349,10 +571,7 @@ sym(vp9_filter_block1d8_h8_ssse3):
.filter_block1d8_h8_rowloop_ssse3:
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
-; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
-;note: if we create a k0_k7 filter, we can save a pshufb
-; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
punpcklqdq xmm0, xmm3
movdqa xmm1, xmm0
@@ -371,9 +590,9 @@ sym(vp9_filter_block1d8_h8_ssse3):
pmaddubsw xmm4, k6k7
paddsw xmm0, xmm1
- paddsw xmm0, xmm2
- paddsw xmm0, xmm5
paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -456,10 +675,7 @@ sym(vp9_filter_block1d16_h8_ssse3):
.filter_block1d16_h8_rowloop_ssse3:
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
-; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
-;note: if we create a k0_k7 filter, we can save a pshufb
-; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
punpcklqdq xmm0, xmm3
movdqa xmm1, xmm0
@@ -486,10 +702,7 @@ sym(vp9_filter_block1d16_h8_ssse3):
movq xmm3, [rsi + 5]
-; movq xmm7, [rsi + 12]
movq xmm7, [rsi + 13]
-;note: same as above
-; punpcklbw xmm3, xmm7
punpcklqdq xmm3, xmm7
movdqa xmm1, xmm3
@@ -508,9 +721,9 @@ sym(vp9_filter_block1d16_h8_ssse3):
pmaddubsw xmm4, k6k7
paddsw xmm3, xmm1
+ paddsw xmm3, xmm4
paddsw xmm3, xmm2
paddsw xmm3, krd
- paddsw xmm3, xmm4
psraw xmm3, 7
packuswb xmm3, xmm3
punpcklqdq xmm0, xmm3
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 5d6a4a717..316bda33b 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -1041,9 +1041,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]];
do {
- mi->bmi[ *fill_offset].as_mv.first.as_int = blockmv.as_int;
+ mi->bmi[ *fill_offset].as_mv[0].as_int = blockmv.as_int;
if (mbmi->second_ref_frame > 0)
- mi->bmi[ *fill_offset].as_mv.second.as_int = secondmv.as_int;
+ mi->bmi[ *fill_offset].as_mv[1].as_int = secondmv.as_int;
fill_offset++;
} while (--fill_count);
}
@@ -1051,8 +1051,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
} while (++j < num_p);
}
- mv->as_int = mi->bmi[15].as_mv.first.as_int;
- mbmi->mv[1].as_int = mi->bmi[15].as_mv.second.as_int;
+ mv->as_int = mi->bmi[15].as_mv[0].as_int;
+ mbmi->mv[1].as_int = mi->bmi[15].as_mv[1].as_int;
break; /* done with SPLITMV */
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 9f4db6bf7..facd761f0 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -31,6 +31,7 @@
#include "vp9/decoder/vp9_dboolhuff.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9_rtcd.h"
#include <assert.h>
@@ -123,38 +124,30 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
xd->block[i].dequant = pc->Y1dequant[QIndex];
}
+ xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1;
+ xd->inv_txm4x4 = vp9_short_idct4x4llm;
+ xd->inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1;
+ xd->inv_2ndtxm4x4 = vp9_short_inv_walsh4x4;
+ xd->itxm_add = vp9_dequant_idct_add;
+ xd->dc_only_itxm_add = vp9_dc_only_idct_add_c;
+ xd->dc_itxm_add = vp9_dequant_dc_idct_add;
+ xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block;
+ xd->itxm_add_y_block = vp9_dequant_idct_add_y_block;
+ xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block;
#if CONFIG_LOSSLESS
- if (!QIndex) {
- pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
- pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
- pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
- pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
- pbi->idct_add = vp9_dequant_idct_add_lossless_c;
- pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c;
- pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
- pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
- pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
- } else {
- pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
- pbi->idct_add = vp9_dequant_idct_add;
- pbi->dc_idct_add = vp9_dequant_dc_idct_add;
- pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
- pbi->idct_add_y_block = vp9_dequant_idct_add_y_block;
- pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block;
+ if (xd->lossless) {
+ assert(QIndex == 0);
+ xd->inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8;
+ xd->inv_txm4x4 = vp9_short_inv_walsh4x4_x8;
+ xd->inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+ xd->inv_2ndtxm4x4 = vp9_short_inv_walsh4x4_lossless;
+ xd->itxm_add = vp9_dequant_idct_add_lossless_c;
+ xd->dc_only_itxm_add = vp9_dc_only_inv_walsh_add_c;
+ xd->dc_itxm_add = vp9_dequant_dc_idct_add_lossless_c;
+ xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
+ xd->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
+ xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
}
-#else
- pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
- pbi->idct_add = vp9_dequant_idct_add;
- pbi->dc_idct_add = vp9_dequant_dc_idct_add;
- pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
- pbi->idct_add_y_block = vp9_dequant_idct_add_y_block;
- pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block;
#endif
for (i = 16; i < 24; i++) {
@@ -345,15 +338,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
int i8x8mode = b->bmi.as_mode.first;
b = &xd->block[16 + i];
vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 8, b->dst_stride);
b = &xd->block[20 + i];
vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 8, b->dst_stride);
}
} else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs + 16);
} else {
@@ -400,17 +393,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
*(b->base_dst) + b->dst, 16,
b->dst_stride, b->eob);
} else {
- vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
}
}
b = &xd->block[16 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 8, b->dst_stride);
b = &xd->block[20 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 8, b->dst_stride);
}
} else if (mode == B_PRED) {
@@ -434,8 +427,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
*(b->base_dst) + b->dst, 16, b->dst_stride,
b->eob);
} else {
- vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
}
}
if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
@@ -444,7 +437,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->above_context->y2 = 0;
xd->left_context->y2 = 0;
vp9_build_intra_predictors_mbuv(xd);
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
xd->block[16].dequant,
xd->predictor + 16 * 16,
xd->dst.u_buffer,
@@ -453,13 +446,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->eobs + 16);
} else if (mode == SPLITMV || get_2nd_order_usage(xd) == 0) {
assert(get_2nd_order_usage(xd) == 0);
- pbi->idct_add_y_block(xd->qcoeff,
+ xd->itxm_add_y_block(xd->qcoeff,
xd->block[0].dequant,
xd->predictor,
xd->dst.y_buffer,
xd->dst.y_stride,
xd->eobs);
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
xd->block[16].dequant,
xd->predictor + 16 * 16,
xd->dst.u_buffer,
@@ -496,8 +489,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
*(b->base_dst) + b->dst, 16,
b->dst_stride, b->eob);
} else {
- vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
}
}
} else {
@@ -505,7 +498,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
assert(get_2nd_order_usage(xd) == 1);
vp9_dequantize_b(b);
if (xd->eobs[24] > 1) {
- vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
+ xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
((int *)b->qcoeff)[0] = 0;
((int *)b->qcoeff)[1] = 0;
((int *)b->qcoeff)[2] = 0;
@@ -515,11 +508,11 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
((int *)b->qcoeff)[6] = 0;
((int *)b->qcoeff)[7] = 0;
} else {
- xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+ xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
((int *)b->qcoeff)[0] = 0;
}
vp9_dequantize_b(b);
- pbi->dc_idct_add_y_block(xd->qcoeff,
+ xd->dc_itxm_add_y_block(xd->qcoeff,
xd->block[0].dequant,
xd->predictor,
xd->dst.y_buffer,
@@ -527,7 +520,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->eobs,
xd->block[24].diff);
}
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
xd->block[16].dequant,
xd->predictor + 16 * 16,
xd->dst.u_buffer,
@@ -645,7 +638,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ x_idx * 16 + (i & 3) * 4,
xd->dst.y_stride, xd->dst.y_stride, b->eob);
} else {
- vp9_dequant_idct_add_c(
+ xd->itxm_add(
b->qcoeff, b->dequant,
xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ x_idx * 16 + (i & 3) * 4,
@@ -657,7 +650,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
} else if (get_2nd_order_usage(xd) == 1) {
vp9_dequantize_b(b);
if (xd->eobs[24] > 1) {
- vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
+ xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
((int *)b->qcoeff)[0] = 0;
((int *)b->qcoeff)[1] = 0;
((int *)b->qcoeff)[2] = 0;
@@ -667,7 +660,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
((int *)b->qcoeff)[6] = 0;
((int *)b->qcoeff)[7] = 0;
} else {
- xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+ xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
((int *)b->qcoeff)[0] = 0;
}
vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(
@@ -1534,17 +1527,24 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
pc->sb64_coded = vp9_read_literal(&header_bc, 8);
pc->sb32_coded = vp9_read_literal(&header_bc, 8);
-
- /* Read the loop filter level and type */
- pc->txfm_mode = vp9_read_literal(&header_bc, 2);
- if (pc->txfm_mode == 3)
- pc->txfm_mode += vp9_read_bit(&header_bc);
- if (pc->txfm_mode == TX_MODE_SELECT) {
- pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
- pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
- pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+#if CONFIG_LOSSLESS
+ xd->lossless = vp9_read_bit(&header_bc);
+ if (xd->lossless) {
+ pc->txfm_mode = ONLY_4X4;
+ }
+ else
+#endif
+ {
+ /* Read the loop filter level and type */
+ pc->txfm_mode = vp9_read_literal(&header_bc, 2);
+ if (pc->txfm_mode == 3)
+ pc->txfm_mode += vp9_read_bit(&header_bc);
+ if (pc->txfm_mode == TX_MODE_SELECT) {
+ pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
+ pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
+ pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+ }
}
-
pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
pc->filter_level = vp9_read_literal(&header_bc, 6);
pc->sharpness_level = vp9_read_literal(&header_bc, 3);
@@ -1775,78 +1775,91 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
/* tile info */
{
- int log2_tile_cols;
const unsigned char *data_ptr = data + first_partition_length_in_bytes;
- int tile, mb_start, mb_end;
+ int tile_row, tile_col, delta_log2_tiles;
- log2_tile_cols = vp9_read_bit(&header_bc);
- if (log2_tile_cols) {
- log2_tile_cols += vp9_read_bit(&header_bc);
+ vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles);
+ while (delta_log2_tiles--) {
+ if (vp9_read_bit(&header_bc)) {
+ pc->log2_tile_columns++;
+ } else {
+ break;
+ }
}
- pc->tile_columns = 1 << log2_tile_cols;
+ pc->log2_tile_rows = vp9_read_bit(&header_bc);
+ if (pc->log2_tile_rows)
+ pc->log2_tile_rows += vp9_read_bit(&header_bc);
+ pc->tile_columns = 1 << pc->log2_tile_columns;
+ pc->tile_rows = 1 << pc->log2_tile_rows;
vpx_memset(pc->above_context, 0,
sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
if (pbi->oxcf.inv_tile_order) {
- const unsigned char *data_ptr2[4];
+ const int n_cols = pc->tile_columns;
+ const unsigned char *data_ptr2[4][1 << 6];
BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak);
- data_ptr2[0] = data_ptr;
- for (tile = 1; tile < pc->tile_columns; tile++) {
- int size = data_ptr2[tile - 1][0] + (data_ptr2[tile - 1][1] << 8) +
- (data_ptr2[tile - 1][2] << 16) + (data_ptr2[tile - 1][3] << 24);
- data_ptr2[tile - 1] += 4;
- data_ptr2[tile] = data_ptr2[tile - 1] + size;
+ // pre-initialize the offsets, we're going to read in inverse order
+ data_ptr2[0][0] = data_ptr;
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ if (tile_row) {
+ int size = data_ptr2[tile_row - 1][n_cols - 1][0] +
+ (data_ptr2[tile_row - 1][n_cols - 1][1] << 8) +
+ (data_ptr2[tile_row - 1][n_cols - 1][2] << 16) +
+ (data_ptr2[tile_row - 1][n_cols - 1][3] << 24);
+ data_ptr2[tile_row - 1][n_cols - 1] += 4;
+ data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size;
+ }
+
+ for (tile_col = 1; tile_col < n_cols; tile_col++) {
+ int size = data_ptr2[tile_row][tile_col - 1][0] +
+ (data_ptr2[tile_row][tile_col - 1][1] << 8) +
+ (data_ptr2[tile_row][tile_col - 1][2] << 16) +
+ (data_ptr2[tile_row][tile_col - 1][3] << 24);
+ data_ptr2[tile_row][tile_col - 1] += 4;
+ data_ptr2[tile_row][tile_col] =
+ data_ptr2[tile_row][tile_col - 1] + size;
+ }
}
- for (mb_end = pc->mb_cols, tile = pc->tile_columns - 1;
- tile >= 0; tile--) {
- // calculate end of tile column
- const int sb_cols = (pc->mb_cols + 3) >> 2;
- const int sb_start = (sb_cols * tile) >> log2_tile_cols;
- mb_start = ((sb_start << 2) > pc->mb_cols) ?
- pc->mb_cols : (sb_start << 2);
-
- pc->cur_tile_idx = tile;
- pc->cur_tile_mb_col_start = mb_start;
- pc->cur_tile_mb_col_end = mb_end;
-
- setup_token_decoder(pbi, data_ptr2[tile], &residual_bc);
-
- /* Decode a row of superblocks */
- for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) {
- decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
+
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+ setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], &residual_bc);
+
+ /* Decode a row of superblocks */
+ for (mb_row = pc->cur_tile_mb_row_start;
+ mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
+ decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
+ }
+ if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1)
+ bc_bak = residual_bc;
}
- mb_end = mb_start;
- if (tile == pc->tile_columns - 1)
- bc_bak = residual_bc;
}
residual_bc = bc_bak;
} else {
- for (mb_start = 0, tile = 0; tile < pc->tile_columns; tile++) {
- // calculate end of tile column
- const int sb_cols = (pc->mb_cols + 3) >> 2;
- const int sb_end = (sb_cols * (tile + 1)) >> log2_tile_cols;
- mb_end = ((sb_end << 2) > pc->mb_cols) ? pc->mb_cols : (sb_end << 2);
-
- pc->cur_tile_idx = tile;
- pc->cur_tile_mb_col_start = mb_start;
- pc->cur_tile_mb_col_end = mb_end;
-
- if (tile < pc->tile_columns - 1)
- setup_token_decoder(pbi, data_ptr + 4, &residual_bc);
- else
- setup_token_decoder(pbi, data_ptr, &residual_bc);
-
- /* Decode a row of superblocks */
- for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) {
- decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
- }
- mb_start = mb_end;
- if (tile < pc->tile_columns - 1) {
- int size = data_ptr[0] + (data_ptr[1] << 8) + (data_ptr[2] << 16) +
- (data_ptr[3] << 24);
- data_ptr += 4 + size;
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1)
+ setup_token_decoder(pbi, data_ptr + 4, &residual_bc);
+ else
+ setup_token_decoder(pbi, data_ptr, &residual_bc);
+
+ /* Decode a row of superblocks */
+ for (mb_row = pc->cur_tile_mb_row_start;
+ mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
+ decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
+ }
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
+ int size = data_ptr[0] + (data_ptr[1] << 8) + (data_ptr[2] << 16) +
+ (data_ptr[3] << 24);
+ data_ptr += 4 + size;
+ }
}
}
}
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
index 1f64767fa..92a9df84c 100644
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -357,21 +357,17 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
if (eob) {
input[0] = input[0] * dq[0] / 2;
-#if !CONFIG_DWTDCTHYBRID
if (eob == 1) {
vp9_short_idct1_32x32_c(input, output);
add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32);
input[0] = 0;
} else {
-#endif
for (i = 1; i < 1024; i++)
input[i] = input[i] * dq[1] / 2;
vp9_short_idct32x32_c(input, output, 64);
vpx_memset(input, 0, 2048);
add_residual(output, pred, pitch, dest, stride, 32, 32);
-#if !CONFIG_DWTDCTHYBRID
}
-#endif
}
}
diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h
index 2edbd6a3a..b7efb44f1 100644
--- a/vp9/decoder/vp9_dequantize.h
+++ b/vp9/decoder/vp9_dequantize.h
@@ -42,20 +42,6 @@ extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *
uint16_t *eobs);
#endif
-typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq,
- unsigned char *pred, unsigned char *output, int pitch, int stride);
-typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq,
- unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);
-
-typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
- unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs,
- const int16_t *dc);
-typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
- unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs);
-typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq,
- unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,
- uint16_t *eobs);
-
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride, uint16_t eobs);
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index acf69d906..bfdb486b8 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -63,24 +63,11 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) {
return decode_bool(br, 128) ? -value_to_sign : value_to_sign;
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#define INCREMENT_COUNT(token) \
- do { \
- coef_counts[type][coef_bands[c]][pn][token]++; \
- pn = pt = vp9_prev_token_class[token]; \
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(coef_bands[c + 1])) \
- pn = vp9_get_coef_neighbor_context( \
- qcoeff_ptr, nodc, neighbors, scan[c + 1]); \
- } while (0)
-#else
-#define PT pt
#define INCREMENT_COUNT(token) \
do { \
- coef_counts[type][coef_bands[c]][pt][token]++; \
- pt = vp9_prev_token_class[token]; \
+ coef_counts[type][coef_bands[c]][pt][token]++; \
+ pt = vp9_get_coef_context(&recent_energy, token); \
} while (0)
-#endif /* CONFIG_NEWCOEFCONTEXT */
#define WRITE_COEF_CONTINUE(val, token) \
{ \
@@ -108,10 +95,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
const int lidx = vp9_block2left[txfm_size][block_idx];
ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0;
FRAME_CONTEXT *const fc = &dx->common.fc;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
-#endif
+ int recent_energy = 0;
int nodc = (type == PLANE_TYPE_Y_NO_DC);
int pt, c = nodc;
vp9_coeff_probs *coef_probs;
@@ -192,15 +176,11 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
}
VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec);
-#if CONFIG_NEWCOEFCONTEXT
- pn = pt;
- neighbors = vp9_get_coef_neighbors_handle(scan);
-#endif
while (1) {
int val;
const uint8_t *cat6 = cat6_prob;
if (c >= seg_eob) break;
- prob = coef_probs[type][coef_bands[c]][PT];
+ prob = coef_probs[type][coef_bands[c]][pt];
if (!vp9_read(br, prob[EOB_CONTEXT_NODE]))
break;
SKIP_START:
@@ -208,7 +188,7 @@ SKIP_START:
if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
++c;
- prob = coef_probs[type][coef_bands[c]][PT];
+ prob = coef_probs[type][coef_bands[c]][pt];
goto SKIP_START;
}
// ONE_CONTEXT_NODE_0_
@@ -272,7 +252,7 @@ SKIP_START:
}
if (c < seg_eob)
- coef_counts[type][coef_bands[c]][PT][DCT_EOB_TOKEN]++;
+ coef_counts[type][coef_bands[c]][pt][DCT_EOB_TOKEN]++;
A0[aidx] = L0[lidx] = (c > !type);
if (txfm_size >= TX_8X8 && type != PLANE_TYPE_Y2) {
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index b350e4d68..80b301931 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
- vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]);
+ xd->dc_itxm_add(q, dq, dst, dst, stride, stride, dc[0]);
else
- vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride);
+ xd->dc_only_itxm_add(dc[0], dst, dst, stride, stride);
q += 16;
dst += 4;
@@ -168,9 +168,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1) {
- vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride);
+ xd->itxm_add(q, dq, dstu, dstu, stride, stride);
} else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride);
+ xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride);
((int *)q)[0] = 0;
}
@@ -184,9 +184,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1) {
- vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride);
+ xd->itxm_add(q, dq, dstv, dstv, stride, stride);
} else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride);
+ xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride);
((int *)q)[0] = 0;
}
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index e04b9f5e4..0b0b90356 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -70,12 +70,6 @@ typedef struct VP9Decompressor {
DETOK detoken;
- vp9_dequant_idct_add_fn_t idct_add;
- vp9_dequant_dc_idct_add_fn_t dc_idct_add;
- vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
- vp9_dequant_idct_add_y_block_fn_t idct_add_y_block;
- vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
-
int refresh_frame_flags;
vp9_prob prob_skip_false;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index a3c407865..257ddb2c5 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -14,6 +14,7 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/common/vp9_systemdependent.h"
#include <assert.h>
@@ -1088,14 +1089,15 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
}
static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
- TOKENEXTRA **tok) {
+ TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
VP9_COMMON *const c = &cpi->common;
const int mis = c->mode_info_stride;
- MODE_INFO *m, *m_ptr = c->mi + c->cur_tile_mb_col_start;
+ MODE_INFO *m, *m_ptr = c->mi;
int i, mb_row, mb_col;
- TOKENEXTRA *tok_end = *tok + cpi->tok_count;
- for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) {
+ m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis;
+ for (mb_row = c->cur_tile_mb_row_start;
+ mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) {
m = m_ptr;
for (mb_col = c->cur_tile_mb_col_start;
mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) {
@@ -1667,7 +1669,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_write_literal(&header_bc, pc->sb64_coded, 8);
pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
vp9_write_literal(&header_bc, pc->sb32_coded, 8);
-
+#if CONFIG_LOSSLESS
+ vp9_write_bit(&header_bc, cpi->oxcf.lossless);
+ if (cpi->oxcf.lossless) {
+ pc->txfm_mode = ONLY_4X4;
+ }
+ else
+#endif
{
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
@@ -2026,9 +2034,22 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
}
/* tiling */
- vp9_write(&header_bc, pc->tile_columns > 1, 128);
- if (pc->tile_columns > 1) {
- vp9_write(&header_bc, pc->tile_columns > 2, 128);
+ {
+ int min_log2_tiles, delta_log2_tiles, n_tile_bits, n;
+
+ vp9_get_tile_n_bits(pc, &min_log2_tiles, &delta_log2_tiles);
+ n_tile_bits = pc->log2_tile_columns - min_log2_tiles;
+ for (n = 0; n < delta_log2_tiles; n++) {
+ if (n_tile_bits--) {
+ vp9_write_bit(&header_bc, 1);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ break;
+ }
+ }
+ vp9_write_bit(&header_bc, pc->log2_tile_rows != 0);
+ if (pc->log2_tile_rows != 0)
+ vp9_write_bit(&header_bc, pc->log2_tile_rows != 1);
}
vp9_stop_encode(&header_bc);
@@ -2058,41 +2079,45 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
}
{
- int mb_start = 0, tile;
- int total_size = 0;
+ int tile_row, tile_col, total_size = 0;
unsigned char *data_ptr = cx_data + header_bc.pos;
- TOKENEXTRA *tok = cpi->tok;
-
- for (tile = 0; tile < pc->tile_columns; tile++) {
- // calculate end of tile column
- const int sb_cols = (pc->mb_cols + 3) >> 2;
- const int sb_end = (sb_cols * (tile + 1)) >> cpi->oxcf.tile_columns;
- const int mb_end = ((sb_end << 2) > pc->mb_cols) ?
- pc->mb_cols : (sb_end << 2);
-
- pc->cur_tile_idx = tile;
- pc->cur_tile_mb_col_start = mb_start;
- pc->cur_tile_mb_col_end = mb_end;
-
- if (tile < pc->tile_columns - 1)
- vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
- else
- vp9_start_encode(&residual_bc, data_ptr + total_size);
- write_modes(cpi, &residual_bc, &tok);
- vp9_stop_encode(&residual_bc);
- if (tile < pc->tile_columns - 1) {
- /* size of this tile */
- data_ptr[total_size + 0] = residual_bc.pos;
- data_ptr[total_size + 1] = residual_bc.pos >> 8;
- data_ptr[total_size + 2] = residual_bc.pos >> 16;
- data_ptr[total_size + 3] = residual_bc.pos >> 24;
- total_size += 4;
- }
+ TOKENEXTRA *tok[1 << 6], *tok_end;
+
+ tok[0] = cpi->tok;
+ for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
+ tok[tile_col] = tok[tile_col - 1] + cpi->tok_count[tile_col - 1];
+
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ tok_end = cpi->tok + cpi->tok_count[0];
+ for (tile_col = 0; tile_col < pc->tile_columns;
+ tile_col++, tok_end += cpi->tok_count[tile_col]) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1)
+ vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
+ else
+ vp9_start_encode(&residual_bc, data_ptr + total_size);
+ write_modes(cpi, &residual_bc, &tok[tile_col], tok_end);
+ vp9_stop_encode(&residual_bc);
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
+ /* size of this tile */
+ data_ptr[total_size + 0] = residual_bc.pos;
+ data_ptr[total_size + 1] = residual_bc.pos >> 8;
+ data_ptr[total_size + 2] = residual_bc.pos >> 16;
+ data_ptr[total_size + 3] = residual_bc.pos >> 24;
+ total_size += 4;
+ }
- mb_start = mb_end;
- total_size += residual_bc.pos;
+ total_size += residual_bc.pos;
+ }
}
+ assert((unsigned int)(tok[0] - cpi->tok) == cpi->tok_count[0]);
+ for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
+ assert((unsigned int)(tok[tile_col] - tok[tile_col - 1]) ==
+ cpi->tok_count[tile_col]);
+
*size += total_size;
}
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 91d4c4530..d5110c810 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -169,14 +169,14 @@ typedef struct macroblock {
PICK_MODE_CONTEXT sb32_context[4];
PICK_MODE_CONTEXT sb64_context;
- void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);
- void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);
- void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_2ndtxm4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_2ndtxm2x2)(int16_t *input, int16_t *output, int pitch);
void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
- void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);
- void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);
- void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);
void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 7af044fe4..746648291 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -323,6 +323,8 @@ static const int16_t adst_i16[256] = {
};
#endif
+#define NEW_FDCT8x8 1
+#if !NEW_FDCT8x8
static const int xC1S7 = 16069;
static const int xC2S6 = 15137;
static const int xC3S5 = 13623;
@@ -560,6 +562,7 @@ void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
op++;
}
}
+#endif
void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) {
/* [1 1; 1 -1] orthogonal transform */
@@ -836,6 +839,79 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
}
+#if NEW_FDCT8x8
+static void fdct8_1d(int16_t *input, int16_t *output) {
+ int16_t step[8];
+ int temp1, temp2;
+
+ // stage 1
+ step[0] = input[0] + input[7];
+ step[1] = input[1] + input[6];
+ step[2] = input[2] + input[5];
+ step[3] = input[3] + input[4];
+ step[4] = input[3] - input[4];
+ step[5] = input[2] - input[5];
+ step[6] = input[1] - input[6];
+ step[7] = input[0] - input[7];
+
+ fdct4_1d(step, step);
+
+ // Stage 2
+ output[4] = step[4];
+ temp1 = (-step[5] + step[6]) * cospi_16_64;
+ temp2 = (step[6] + step[5]) * cospi_16_64;
+ output[5] = dct_const_round_shift(temp1);
+ output[6] = dct_const_round_shift(temp2);
+ output[7] = step[7];
+
+ // Stage 3
+ step[4] = output[4] + output[5];
+ step[5] = -output[5] + output[4];
+ step[6] = -output[6] + output[7];
+ step[7] = output[7] + output[6];
+
+ // Stage 4
+ output[0] = step[0];
+ output[4] = step[2];
+ output[2] = step[1];
+ output[6] = step[3];
+
+ temp1 = step[4] * cospi_28_64 + step[7] * cospi_4_64;
+ temp2 = step[5] * cospi_12_64 + step[6] * cospi_20_64;
+ output[1] = dct_const_round_shift(temp1);
+ output[5] = dct_const_round_shift(temp2);
+ temp1 = step[6] * cospi_12_64 + step[5] * -cospi_20_64;
+ temp2 = step[7] * cospi_28_64 + step[4] * -cospi_4_64;
+ output[3] = dct_const_round_shift(temp1);
+ output[7] = dct_const_round_shift(temp2);
+}
+
+void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) {
+ int shortpitch = pitch >> 1;
+ int i, j;
+ int16_t out[64];
+ int16_t temp_in[8], temp_out[8];
+
+ // First transform columns
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++)
+ temp_in[j] = input[j * shortpitch + i] << 2;
+ fdct8_1d(temp_in, temp_out);
+ for (j = 0; j < 8; j++)
+ out[j * 8 + i] = temp_out[j];
+ }
+
+ // Then transform rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ fdct8_1d(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = temp_out[j] >> 1;
+ }
+}
+#endif
+
void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
int i;
int a1, b1, c1, d1;
@@ -1395,8 +1471,6 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {
#undef ROUNDING
#endif
-#if !CONFIG_DWTDCTHYBRID
-
#define TEST_INT_32x32_DCT 1
#if !TEST_INT_32x32_DCT
@@ -2134,706 +2208,3 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
}
#endif
-
-#else // CONFIG_DWTDCTHYBRID
-
-#if DWT_TYPE == 53
-
-// Note: block length must be even for this implementation
-static void analysis_53_row(int length, short *x,
- short *lowpass, short *highpass) {
- int n;
- short r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++) << 1;
- *b++ = *x - ((r + x[1] + 1) >> 1);
- x++;
- }
- *a = (r = *x++) << 1;
- *b = *x - r;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
- }
-}
-
-static void analysis_53_col(int length, short *x,
- short *lowpass, short *highpass) {
- int n;
- short r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++);
- *b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
- x++;
- }
- *a = (r = *x++);
- *b = (*x - r + 1) >> 1;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
- }
-}
-
-static void dyadic_analyze_53(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
- analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = c[i * pitch_c + j];
- analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i];
- }
- }
-}
-
-#elif DWT_TYPE == 26
-
-static void analysis_26_row(int length, short *x,
- short *lowpass, short *highpass) {
- int i, n;
- short r, s, *a, *b;
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- r = *x++;
- s = *x++;
- *a++ = r + s;
- *b++ = r - s;
- }
- n = length >> 1;
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ -= (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b -= (r - *a + 4) >> 3;
- }
-}
-
-static void analysis_26_col(int length, short *x,
- short *lowpass, short *highpass) {
- int i, n;
- short r, s, *a, *b;
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- r = *x++;
- s = *x++;
- *a++ = (r + s + 1) >> 1;
- *b++ = (r - s + 1) >> 1;
- }
- n = length >> 1;
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ -= (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b -= (r - *a + 4) >> 3;
- }
-}
-
-static void dyadic_analyze_26(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
- analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = c[i * pitch_c + j];
- analysis_26_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i];
- }
- }
-}
-
-#elif DWT_TYPE == 97
-
-static void analysis_97(int length, double *x,
- double *lowpass, double *highpass) {
- static const double a_predict1 = -1.586134342;
- static const double a_update1 = -0.05298011854;
- static const double a_predict2 = 0.8829110762;
- static const double a_update2 = 0.4435068522;
- static const double s_low = 1.149604398;
- static const double s_high = 1/1.149604398;
- int i;
- double y[DWT_MAX_LENGTH];
- // Predict 1
- for (i = 1; i < length - 2; i += 2) {
- x[i] += a_predict1 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] += 2 * a_predict1 * x[length - 2];
- // Update 1
- for (i = 2; i < length; i += 2) {
- x[i] += a_update1 * (x[i - 1] + x[i + 1]);
- }
- x[0] += 2 * a_update1 * x[1];
- // Predict 2
- for (i = 1; i < length - 2; i += 2) {
- x[i] += a_predict2 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] += 2 * a_predict2 * x[length - 2];
- // Update 2
- for (i = 2; i < length; i += 2) {
- x[i] += a_update2 * (x[i - 1] + x[i + 1]);
- }
- x[0] += 2 * a_update2 * x[1];
- memcpy(y, x, sizeof(*y) * length);
- // Scale and pack
- for (i = 0; i < length / 2; i++) {
- lowpass[i] = y[2 * i] * s_low;
- highpass[i] = y[2 * i + 1] * s_high;
- }
-}
-
-static void dyadic_analyze_97(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- double buffer[2 * DWT_MAX_LENGTH];
- double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
- analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH],
- &y[i * DWT_MAX_LENGTH] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = y[i * DWT_MAX_LENGTH + j];
- analysis_97(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = round(buffer[i]);
- }
- }
-}
-
-#endif // DWT_TYPE
-
-// TODO(debargha): Implement the scaling differently so as not to have to
-// use the floating point dct
-static void dct16x16_1d_f(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
-
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
-
- temp1 = step[ 8]*C7;
- temp2 = step[15]*C9;
- output[ 8] = temp1 + temp2;
-
- temp1 = step[ 9]*C11;
- temp2 = step[14]*C5;
- output[ 9] = temp1 - temp2;
-
- temp1 = step[10]*C3;
- temp2 = step[13]*C13;
- output[10] = temp1 + temp2;
-
- temp1 = step[11]*C15;
- temp2 = step[12]*C1;
- output[11] = temp1 - temp2;
-
- temp1 = step[11]*C1;
- temp2 = step[12]*C15;
- output[12] = temp2 + temp1;
-
- temp1 = step[10]*C13;
- temp2 = step[13]*C3;
- output[13] = temp2 - temp1;
-
- temp1 = step[ 9]*C5;
- temp2 = step[14]*C11;
- output[14] = temp2 + temp1;
-
- temp1 = step[ 8]*C9;
- temp2 = step[15]*C7;
- output[15] = temp2 - temp1;
-
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
-
- temp1 = output[4]*C14;
- temp2 = output[7]*C2;
- step[ 4] = temp1 + temp2;
-
- temp1 = output[5]*C10;
- temp2 = output[6]*C6;
- step[ 5] = temp1 + temp2;
-
- temp1 = output[5]*C6;
- temp2 = output[6]*C10;
- step[ 6] = temp2 - temp1;
-
- temp1 = output[4]*C2;
- temp2 = output[7]*C14;
- step[ 7] = temp2 - temp1;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1]);
- output[ 8] = (step[ 0] - step[ 1]);
-
- temp1 = step[2]*C12;
- temp2 = step[3]*C4;
- temp1 = temp1 + temp2;
- output[ 4] = 2*(temp1*C8);
-
- temp1 = step[2]*C4;
- temp2 = step[3]*C12;
- temp1 = temp2 - temp1;
- output[12] = 2*(temp1*C8);
-
- output[ 2] = 2*((step[4] + step[ 5])*C8);
- output[14] = 2*((step[7] - step[ 6])*C8);
-
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2);
- output[10] = (temp1 - temp2);
-
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
-
- temp1 = intermediate[8]*C12;
- temp2 = intermediate[9]*C4;
- temp1 = temp1 - temp2;
- output[3] = 2*(temp1*C8);
-
- temp1 = intermediate[8]*C4;
- temp2 = intermediate[9]*C12;
- temp1 = temp2 + temp1;
- output[13] = 2*(temp1*C8);
-
- output[ 9] = 2*((step[10] + step[11])*C8);
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
-
- output[15] = (intermediate[11] + intermediate[12]);
- output[ 1] = -(intermediate[11] - intermediate[12]);
-
- output[ 7] = 2*(intermediate[13]*C8);
-
- temp1 = intermediate[14]*C12;
- temp2 = intermediate[15]*C4;
- temp1 = temp1 - temp2;
- output[11] = -2*(temp1*C8);
-
- temp1 = intermediate[14]*C4;
- temp2 = intermediate[15]*C12;
- temp1 = temp2 + temp1;
- output[ 5] = 2*(temp1*C8);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch,
- int scale) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[256];
- // First transform columns
- for (i = 0; i < 16; i++) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct16x16_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; j++)
- output[j*16 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = output[j + i*16];
- dct16x16_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i*16] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 256; i++)
- out[i] = (short)round(output[i] / (2 << scale));
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) {
- int j1, i, j, k;
- float b[8];
- float b1[8];
- float d[8][8];
- float f0 = (float) .7071068;
- float f1 = (float) .4903926;
- float f2 = (float) .4619398;
- float f3 = (float) .4157348;
- float f4 = (float) .3535534;
- float f5 = (float) .2777851;
- float f6 = (float) .1913417;
- float f7 = (float) .0975452;
- pitch = pitch / 2;
- for (i = 0, k = 0; i < 8; i++, k += pitch) {
- for (j = 0; j < 8; j++) {
- b[j] = (float)(block[k + j] << (3 - scale));
- }
- /* Horizontal transform */
- for (j = 0; j < 4; j++) {
- j1 = 7 - j;
- b1[j] = b[j] + b[j1];
- b1[j1] = b[j] - b[j1];
- }
- b[0] = b1[0] + b1[3];
- b[1] = b1[1] + b1[2];
- b[2] = b1[1] - b1[2];
- b[3] = b1[0] - b1[3];
- b[4] = b1[4];
- b[5] = (b1[6] - b1[5]) * f0;
- b[6] = (b1[6] + b1[5]) * f0;
- b[7] = b1[7];
- d[i][0] = (b[0] + b[1]) * f4;
- d[i][4] = (b[0] - b[1]) * f4;
- d[i][2] = b[2] * f6 + b[3] * f2;
- d[i][6] = b[3] * f6 - b[2] * f2;
- b1[4] = b[4] + b[5];
- b1[7] = b[7] + b[6];
- b1[5] = b[4] - b[5];
- b1[6] = b[7] - b[6];
- d[i][1] = b1[4] * f7 + b1[7] * f1;
- d[i][5] = b1[5] * f3 + b1[6] * f5;
- d[i][7] = b1[7] * f7 - b1[4] * f1;
- d[i][3] = b1[6] * f3 - b1[5] * f5;
- }
- /* Vertical transform */
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 4; j++) {
- j1 = 7 - j;
- b1[j] = d[j][i] + d[j1][i];
- b1[j1] = d[j][i] - d[j1][i];
- }
- b[0] = b1[0] + b1[3];
- b[1] = b1[1] + b1[2];
- b[2] = b1[1] - b1[2];
- b[3] = b1[0] - b1[3];
- b[4] = b1[4];
- b[5] = (b1[6] - b1[5]) * f0;
- b[6] = (b1[6] + b1[5]) * f0;
- b[7] = b1[7];
- d[0][i] = (b[0] + b[1]) * f4;
- d[4][i] = (b[0] - b[1]) * f4;
- d[2][i] = b[2] * f6 + b[3] * f2;
- d[6][i] = b[3] * f6 - b[2] * f2;
- b1[4] = b[4] + b[5];
- b1[7] = b[7] + b[6];
- b1[5] = b[4] - b[5];
- b1[6] = b[7] - b[6];
- d[1][i] = b1[4] * f7 + b1[7] * f1;
- d[5][i] = b1[5] * f3 + b1[6] * f5;
- d[7][i] = b1[7] * f7 - b1[4] * f1;
- d[3][i] = b1[6] * f3 - b1[5] * f5;
- }
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- *(coefs + j + i * 8) = (short) floor(d[i][j] + 0.5);
- }
- }
- return;
-}
-
-#define divide_bits(d, n) ((n) < 0 ? (d) << (n) : (d) >> (n))
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
-}
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16);
-}
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[8 * 8];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(2, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(2, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(2, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct8x8_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 8, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 32 * 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 32 * 8, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 33 * 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 33 * 8, buffer + i * 8, sizeof(short) * 8);
-
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
-}
-
-#endif
-
-#if CONFIG_TX64X64
-void vp9_short_fdct64x64_c(short *input, short *out, int pitch) {
- // assume out is a 64x64 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16);
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 48; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 16; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
-#elif DWTDCT_TYPE == DWTDCT16X16
- vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16);
-
- // There is no dct used on the highest bands for now.
- // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS
- // TODO(debargha): experiment with turning these coeffs to 0
- for (i = 0; i < 32; ++i) {
- for (j = 32; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 32; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
-#endif // DWTDCT_TYPE
-}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_DWTDCTHYBRID
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 927a1b901..fe33f2ebf 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -28,6 +28,7 @@
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9_rtcd.h"
#include <stdio.h>
@@ -1230,8 +1231,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
-
- TOKENEXTRA *tp = cpi->tok;
int totalrate;
// printf("encode_frame_internal frame %d (%d)\n",
@@ -1312,26 +1311,19 @@ static void encode_frame_internal(VP9_COMP *cpi) {
{
// Take tiles into account and give start/end MB
- int tile, mb_start = 0;
+ int tile_col;
+ TOKENEXTRA *tp = cpi->tok;
- for (tile = 0; tile < cm->tile_columns; tile++) {
- // calculate end of tile column
- const int sb_cols = (cm->mb_cols + 3) >> 2;
- const int sb_end = (sb_cols * (tile + 1)) >> cpi->oxcf.tile_columns;
- const int mb_end = ((sb_end << 2) > cm->mb_cols) ?
- cm->mb_cols : (sb_end << 2);
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ TOKENEXTRA *tp_old = tp;
// For each row of SBs in the frame
- cm->cur_tile_idx = tile;
- cm->cur_tile_mb_col_start = mb_start;
- cm->cur_tile_mb_col_end = mb_end;
+ vp9_get_tile_col_offsets(cm, tile_col);
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) {
encode_sb_row(cpi, mb_row, &tp, &totalrate);
}
- mb_start = mb_end;
+ cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
}
-
- cpi->tok_count = (unsigned int)(tp - cpi->tok);
}
vpx_usec_timer_mark(&emr_timer);
@@ -1543,8 +1535,10 @@ void vp9_encode_frame(VP9_COMP *cpi) {
/* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
#if CONFIG_LOSSLESS
+ cpi->mb.e_mbd.lossless = 0;
if (cpi->oxcf.lossless) {
txfm_type = ONLY_4X4;
+ cpi->mb.e_mbd.lossless = 1;
} else
#endif
/* FIXME (rbultje)
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 1dd30130a..a52763080 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -62,7 +62,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
#endif
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b) ;
vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
}
@@ -165,7 +165,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
tx_type, 8, xd->block[idx].eob);
#endif
} else {
- x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
}
@@ -183,13 +183,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
#endif
} else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
i++;
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
}
@@ -222,7 +222,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
vp9_subtract_b(be, b, 8);
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 16);
x->quantize_b_4x4(be, b);
vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 52eabf129..12082a88d 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -188,11 +188,11 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
assert(has_2nd_order == 0);
vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
} else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
- x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+ x->fwd_txm8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 32);
i++;
} else {
- x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
+ x->fwd_txm4x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 32);
}
}
@@ -202,7 +202,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
build_dcblock_4x4(x);
// do 2nd order transform on the dc block
- x->short_walsh4x4(&x->block[24].src_diff[0],
+ x->fwd_2ndtxm4x4(&x->block[24].src_diff[0],
&x->block[24].coeff[0], 8);
} else {
vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@@ -213,7 +213,7 @@ void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
int i;
for (i = 16; i < 24; i += 2) {
- x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+ x->fwd_txm8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 16);
}
}
@@ -253,7 +253,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
assert(has_2nd_order == 0);
vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
} else {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+ x->fwd_txm8x8(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 32);
}
}
@@ -264,7 +264,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
assert(has_2nd_order == 0);
vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
} else {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+ x->fwd_txm8x8(&x->block[i].src_diff[0],
&x->block[i + 2].coeff[0], 32);
}
}
@@ -274,7 +274,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
build_dcblock_8x8(x);
// do 2nd order transform on the dc block
- x->short_fhaar2x2(&x->block[24].src_diff[0],
+ x->fwd_2ndtxm2x2(&x->block[24].src_diff[0],
&x->block[24].coeff[0], 8);
} else {
vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@@ -285,7 +285,7 @@ void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
int i;
for (i = 16; i < 24; i += 4) {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+ x->fwd_txm8x8(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 16);
}
}
@@ -303,7 +303,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) {
if (tx_type != DCT_DCT) {
vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
} else {
- x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
+ x->fwd_txm16x16(&x->block[0].src_diff[0],
&x->block[0].coeff[0], 32);
}
}
@@ -321,9 +321,9 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) {
void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
vp9_clear_system_state();
- x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
+ x->fwd_txm16x16(x_sb->src_diff + 1024,
x_sb->coeff + 1024, 32);
- x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
+ x->fwd_txm16x16(x_sb->src_diff + 1280,
x_sb->coeff + 1280, 32);
}
@@ -361,6 +361,13 @@ static const int plane_rd_mult[4] = {
}\
}
+// This function is a place holder for now but may ultimately need
+// to scan previous tokens to work out the correct context.
+static int trellis_get_coeff_context(int token) {
+ int recent_energy = 0;
+ return vp9_get_coef_context(&recent_energy, token);
+}
+
static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
@@ -380,9 +387,6 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
int err_mult = plane_rd_mult[type];
int default_eob;
int const *scan, *bands;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
-#endif
switch (tx_size) {
default:
@@ -424,9 +428,6 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
default_eob = 256;
break;
}
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
-#endif
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
rdmult = mb->rdmult * err_mult;
@@ -459,12 +460,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
/* Consider both possible successor states. */
if (next < default_eob) {
band = bands[i + 1];
- pt = vp9_prev_token_class[t0];
-#if CONFIG_NEWCOEFCONTEXT
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
-#endif
+ pt = trellis_get_coeff_context(t0);
rate0 +=
mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
rate1 +=
@@ -512,34 +508,12 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
if (next < default_eob) {
band = bands[i + 1];
if (t0 != DCT_EOB_TOKEN) {
-#if CONFIG_NEWCOEFCONTEXT
- int tmp = qcoeff_ptr[scan[i]];
- qcoeff_ptr[scan[i]] = x;
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
- else
- pt = vp9_prev_token_class[t0];
- qcoeff_ptr[scan[i]] = tmp;
-#else
- pt = vp9_prev_token_class[t0];
-#endif
+ pt = trellis_get_coeff_context(t0);
rate0 += mb->token_costs[tx_size][type][band][pt][
tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
-#if CONFIG_NEWCOEFCONTEXT
- int tmp = qcoeff_ptr[scan[i]];
- qcoeff_ptr[scan[i]] = x;
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
- else
- pt = vp9_prev_token_class[t1];
- qcoeff_ptr[scan[i]] = tmp;
-#else
- pt = vp9_prev_token_class[t1];
-#endif
+ pt = trellis_get_coeff_context(t1);
rate1 += mb->token_costs[tx_size][type][band][pt][
tokens[next][1].token];
}
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 4694a92c6..3791737d2 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1546,7 +1546,7 @@ int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
int bestsad = INT_MAX;
int r, c;
@@ -1641,7 +1641,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1770,7 +1770,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index ad5fe7819..3e5940f55 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -23,6 +23,7 @@
#include "vp9/common/vp9_extend.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
@@ -752,10 +753,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->quarter_pixel_search = 1;
sf->half_pixel_search = 1;
sf->iterative_sub_pixel = 1;
-#if CONFIG_LOSSLESS
- sf->optimize_coefficients = 0;
-#else
sf->optimize_coefficients = 1;
+#if CONFIG_LOSSLESS
+ if (cpi->oxcf.lossless)
+ sf->optimize_coefficients = 0;
#endif
sf->no_skip_block4x4_search = 1;
sf->first_step = 0;
@@ -840,20 +841,18 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
}
}
- cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
- cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
- cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
- cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
- cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+ cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16;
+ cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8;
+ cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
+ cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4;
+ cpi->mb.fwd_2ndtxm2x2 = vp9_short_fhaar2x2;
#if CONFIG_LOSSLESS
if (cpi->oxcf.lossless) {
- cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
- cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
- cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
+ cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8;
+ cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8;
+ cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4_lossless;
}
#endif
@@ -949,7 +948,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
-
vpx_free(cpi->tok);
{
@@ -1107,6 +1105,22 @@ rescale(int val, int num, int denom) {
return (int)(llval * llnum / llden);
}
+static void set_tile_limits(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int min_log2_tiles, max_log2_tiles;
+
+ cm->log2_tile_columns = cpi->oxcf.tile_columns;
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+ vp9_get_tile_n_bits(cm, &min_log2_tiles, &max_log2_tiles);
+ max_log2_tiles += min_log2_tiles;
+ if (cm->log2_tile_columns < min_log2_tiles)
+ cm->log2_tile_columns = min_log2_tiles;
+ else if (cm->log2_tile_columns > max_log2_tiles)
+ cm->log2_tile_columns = max_log2_tiles;
+ cm->tile_columns = 1 << cm->log2_tile_columns;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+}
static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
@@ -1145,7 +1159,7 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->gld_fb_idx = 1;
cpi->alt_fb_idx = 2;
- cm->tile_columns = 1 << cpi->oxcf.tile_columns;
+ set_tile_limits(cpi);
#if VP9_TEMPORAL_ALT_REF
{
@@ -1206,18 +1220,18 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
- cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm;
+ cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1;
+ cpi->mb.e_mbd.inv_2ndtxm4x4 = vp9_short_inv_walsh4x4;
#if CONFIG_LOSSLESS
cpi->oxcf.lossless = oxcf->lossless;
if (cpi->oxcf.lossless) {
- cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
- cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
- cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
- cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8;
+ cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+ cpi->mb.e_mbd.inv_2ndtxm4x4 = vp9_short_inv_walsh4x4_lossless;
}
#endif
@@ -1372,7 +1386,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->last_frame_distortion = 0;
#endif
- cm->tile_columns = 1 << cpi->oxcf.tile_columns;
+ set_tile_limits(cpi);
}
#define M_LOG2_E 0.693147180559945309417
@@ -2619,10 +2633,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// For 2 Pass Only used where GF/ARF prediction quality
// is above a threshold
cpi->zbin_mode_boost = 0;
-#if CONFIG_LOSSLESS
- cpi->zbin_mode_boost_enabled = FALSE;
-#else
cpi->zbin_mode_boost_enabled = TRUE;
+#if CONFIG_LOSSLESS
+ if (cpi->oxcf.lossless)
+ cpi->zbin_mode_boost_enabled = FALSE;
#endif
if (cpi->gfu_boost <= 400) {
cpi->zbin_mode_boost_enabled = FALSE;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 7acaef472..1476de4da 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -347,7 +347,7 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tok;
- unsigned int tok_count;
+ unsigned int tok_count[1 << 6];
unsigned int frames_since_key;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index b5dbef0b3..e66db7499 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -228,43 +228,71 @@ void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) {
}
void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int zero_run = 0;
- int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int16_t *coeff_ptr = b->coeff;
- int16_t *zbin_ptr = b->zbin;
- int16_t *round_ptr = b->round;
- int16_t *quant_ptr = b->quant;
- uint8_t *quant_shift_ptr = b->quant_shift;
int16_t *qcoeff_ptr = d->qcoeff;
int16_t *dqcoeff_ptr = d->dqcoeff;
- int16_t *dequant_ptr = d->dequant;
- int zbin_oq_value = b->zbin_extra;
vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
- eob = -1;
-
if (!b->skip_block) {
- for (i = 0; i < 64; i++) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ int zero_run;
+ int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
+ int16_t *coeff_ptr = b->coeff;
+ int16_t *zbin_ptr = b->zbin;
+ int16_t *round_ptr = b->round;
+ int16_t *quant_ptr = b->quant;
+ uint8_t *quant_shift_ptr = b->quant_shift;
+ int16_t *dequant_ptr = d->dequant;
+ int zbin_oq_value = b->zbin_extra;
+
+ eob = -1;
+
+ // Special case for DC as it is the one triggering access in various
+ // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0]
+ {
+ z = coeff_ptr[0];
+ zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value);
+ zero_run = 1;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[0]);
+ y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x))
+ >> quant_shift_ptr[0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[0] = x; // write to destination
+ dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value
+
+ if (y) {
+ eob = 0; // last nonzero coeffs
+ zero_run = 0;
+ }
+ }
+ }
+ for (i = 1; i < 64; i++) {
rc = vp9_default_zig_zag1d_8x8[i];
z = coeff_ptr[rc];
- zbin = (zbin_ptr[rc != 0] + zbin_boost_ptr[zero_run] + zbin_oq_value);
- zero_run += (zero_run < 15);
+ zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
+ // The original code was incrementing zero_run while keeping it at
+ // maximum 15 by adding "(zero_run < 15)". The same is achieved by
+ // removing the opposite of the sign mask of "(zero_run - 15)".
+ zero_run -= (zero_run - 15) >> 31;
sz = (z >> 31); // sign of z
x = (z ^ sz) - sz; // x = abs(z)
if (x >= zbin) {
x += (round_ptr[rc != 0]);
- y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
- >> quant_shift_ptr[rc != 0]; // quantize (x)
+ y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x))
+ >> quant_shift_ptr[1]; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value
+ dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value
if (y) {
eob = i; // last nonzero coeffs
@@ -272,8 +300,10 @@ void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
}
}
}
+ d->eob = eob + 1;
+ } else {
+ d->eob = 0;
}
- d->eob = eob + 1;
}
void vp9_quantize_mby_8x8(MACROBLOCK *x) {
@@ -460,18 +490,14 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
14, 16, 20, 24, 28, 32, 36, 40 };
-
- int qrounding_factor = 48;
-
for (Q = 0; Q < QINDEX_RANGE; Q++) {
int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
+ int qrounding_factor = 48;
#if CONFIG_LOSSLESS
- if (cpi->oxcf.lossless) {
- if (Q == 0) {
- qzbin_factor = 64;
- qrounding_factor = 64;
- }
+ if (cpi->oxcf.lossless && Q == 0) {
+ qzbin_factor = 64;
+ qrounding_factor = 64;
}
#endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 29893b819..8385a1872 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -419,11 +419,6 @@ int vp9_uvsse(MACROBLOCK *x) {
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#else
-#define PT pt
-#endif
static INLINE int cost_coeffs(MACROBLOCK *mb,
BLOCKD *b, PLANE_TYPE type,
ENTROPY_CONTEXT *a,
@@ -443,11 +438,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
(tx_type == DCT_DCT) ? mb->token_costs[tx_size][type] :
mb->hybrid_token_costs[tx_size][type];
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
-#endif
-
ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
switch (tx_size) {
@@ -495,50 +485,34 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
- pn = pt;
-#endif
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
if (tx_type != DCT_DCT) {
+ int recent_energy = 0;
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
- cost += token_costs[band[c]][PT][t];
+ cost += token_costs[band[c]][pt][t];
cost += vp9_dct_value_cost_ptr[v];
- pt = vp9_prev_token_class[t];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
-#endif
+ pt = vp9_get_coef_context(&recent_energy, t);
}
if (c < seg_eob)
cost += mb->hybrid_token_costs[tx_size][type][band[c]]
- [PT][DCT_EOB_TOKEN];
+ [pt][DCT_EOB_TOKEN];
} else {
+ int recent_energy = 0;
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
cost += token_costs[band[c]][pt][t];
cost += vp9_dct_value_cost_ptr[v];
- pt = vp9_prev_token_class[t];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
-#endif
+ pt = vp9_get_coef_context(&recent_energy, t);
}
if (c < seg_eob)
cost += mb->token_costs[tx_size][type][band[c]]
- [PT][DCT_EOB_TOKEN];
+ [pt][DCT_EOB_TOKEN];
}
// is eob first coefficient;
@@ -698,7 +672,8 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
// TODO(jingning) is it possible to quickly determine whether to force
// trailing coefficients to be zero, instead of running trellis
// optimization in the rate-distortion optimization loop?
- if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
+ if (mb->optimize &&
+ xd->mode_info_context->mbmi.mode < I8X8_PRED)
vp9_optimize_mby_16x16(mb);
d = vp9_mbblock_error(mb, 0);
@@ -859,21 +834,18 @@ static void super_block_yrd_32x32(MACROBLOCK *x,
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
MACROBLOCKD * const xd = &x->e_mbd;
SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
-#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
+#if DEBUG_ERROR
int16_t out[1024];
#endif
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
-#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
+#if DEBUG_ERROR
vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif
-#if !CONFIG_DWTDCTHYBRID
*distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
-#else
- *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4;
-#endif
+
#if DEBUG_ERROR
printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
@@ -1140,7 +1112,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
vp9_ht_quantize_b_4x4(be, b, tx_type);
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
}
@@ -1176,7 +1148,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
#endif
else
- xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+ xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -1440,7 +1412,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
if (tx_type != DCT_DCT)
vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
else
- x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
// compute quantization mse of 8x8 block
@@ -1474,11 +1446,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
vp9_ht_quantize_b_4x4(be, b, tx_type);
} else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
do_two = 1;
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
}
distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
@@ -2166,17 +2138,17 @@ static int labels2mode(
}
break;
case LEFT4X4:
- this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int :
+ this_mv->as_int = col ? d[-1].bmi.as_mv[0].as_int :
left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int :
+ this_second_mv->as_int = col ? d[-1].bmi.as_mv[1].as_int :
left_block_second_mv(xd, mic, i);
break;
case ABOVE4X4:
- this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int :
+ this_mv->as_int = row ? d[-4].bmi.as_mv[0].as_int :
above_block_mv(mic, i, mis);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int :
+ this_second_mv->as_int = row ? d[-4].bmi.as_mv[1].as_int :
above_block_second_mv(mic, i, mis);
break;
case ZERO4X4:
@@ -2192,10 +2164,10 @@ static int labels2mode(
int_mv left_mv, left_second_mv;
left_second_mv.as_int = 0;
- left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int :
+ left_mv.as_int = col ? d[-1].bmi.as_mv[0].as_int :
left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
- left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int :
+ left_second_mv.as_int = col ? d[-1].bmi.as_mv[1].as_int :
left_block_second_mv(xd, mic, i);
if (left_mv.as_int == this_mv->as_int &&
@@ -2212,9 +2184,9 @@ static int labels2mode(
#endif
}
- d->bmi.as_mv.first.as_int = this_mv->as_int;
+ d->bmi.as_mv[0].as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
- d->bmi.as_mv.second.as_int = this_second_mv->as_int;
+ d->bmi.as_mv[1].as_int = this_second_mv->as_int;
x->partition_info->bmi[i].mode = m;
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
@@ -2248,7 +2220,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
if (xd->mode_info_context->mbmi.second_ref_frame > 0)
vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix);
vp9_subtract_b(be, bd, 16);
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, bd);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
@@ -2300,7 +2272,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
if (otherrd) {
- x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+ x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
x->quantize_b_8x8(be2, bd2);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
@@ -2312,7 +2284,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
for (j = 0; j < 4; j += 2) {
bd = &xd->block[ib + iblock[j]];
be = &x->block[ib + iblock[j]];
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
@@ -2330,7 +2302,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
for (j = 0; j < 4; j += 2) {
BLOCKD *bd = &xd->block[ib + iblock[j]];
BLOCK *be = &x->block[ib + iblock[j]];
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
@@ -2344,7 +2316,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
TX_4X4);
}
}
- x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+ x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
x->quantize_b_8x8(be2, bd2);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
@@ -2500,9 +2472,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
// use previous block's result as next block's MV predictor.
if (segmentation == PARTITIONING_4X4 && i > 0) {
- bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
+ bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv[0].as_int;
if (i == 4 || i == 8 || i == 12)
- bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
+ bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv[0].as_int;
step_param = 2;
}
}
@@ -2541,11 +2513,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (thissme < bestsme) {
bestsme = thissme;
- mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
+ mode_mv[NEW4X4].as_int = e->bmi.as_mv[0].as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
- e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
+ e->bmi.as_mv[0].as_int = mode_mv[NEW4X4].as_int;
}
}
}
@@ -2885,9 +2857,9 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < 16; i++) {
BLOCKD *bd = &x->e_mbd.block[i];
- bd->bmi.as_mv.first.as_int = bsi.mvs[i].as_int;
+ bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int;
if (mbmi->second_ref_frame > 0)
- bd->bmi.as_mv.second.as_int = bsi.second_mvs[i].as_int;
+ bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int;
bd->eob = bsi.eobs[i];
}
@@ -3307,8 +3279,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
- d->bmi.as_mv.first.as_int = tmp_mv.as_int;
- frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int;
+ d->bmi.as_mv[0].as_int = tmp_mv.as_int;
+ frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv[0].as_int;
// Add the new motion vector cost to our rolling cost variable
*rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
@@ -4251,10 +4223,12 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (best_mbmode.mode == SPLITMV) {
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv.first.as_int = best_bmodes[i].as_mv.first.as_int;
+ xd->mode_info_context->bmi[i].as_mv[0].as_int =
+ best_bmodes[i].as_mv[0].as_int;
if (mbmi->second_ref_frame > 0)
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv.second.as_int = best_bmodes[i].as_mv.second.as_int;
+ xd->mode_info_context->bmi[i].as_mv[1].as_int =
+ best_bmodes[i].as_mv[1].as_int;
vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 17d8f25bd..b125a486e 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -13,6 +13,7 @@
#include "vpx_mem/vpx_mem.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_tile_common.h"
void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) {
int mb_row, mb_col;
@@ -254,7 +255,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
int t_pred_cost = INT_MAX;
int i;
- int tile, mb_row, mb_col, mb_start = 0;
+ int tile_col, mb_row, mb_col;
int temporal_predictor_count[PREDICTION_PROBS][2];
int no_pred_segcounts[MAX_MB_SEGMENTS];
@@ -282,21 +283,13 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// First of all generate stats regarding how well the last segment map
// predicts this one
- for (tile = 0; tile < cm->tile_columns; tile++) {
- // calculate end of tile column
- const int sb_cols = (cm->mb_cols + 3) >> 2;
- const int sb_end = (sb_cols * (tile + 1)) >> cpi->oxcf.tile_columns;
- const int mb_end = ((sb_end << 2) > cm->mb_cols) ?
- cm->mb_cols : (sb_end << 2);
-
- cm->cur_tile_idx = tile;
- cm->cur_tile_mb_col_start = mb_start;
- cm->cur_tile_mb_col_end = mb_end;
-
- mi_ptr = cm->mi + mb_start;
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ vp9_get_tile_col_offsets(cm, tile_col);
+ mi_ptr = cm->mi + cm->cur_tile_mb_col_start;
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) {
mi = mi_ptr;
- for (mb_col = mb_start; mb_col < mb_end; mb_col += 4, mi += 4) {
+ for (mb_col = cm->cur_tile_mb_col_start;
+ mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) {
if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, 4, mb_row, mb_col);
@@ -338,8 +331,6 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
}
}
}
-
- mb_start = mb_end;
}
// Work out probability tree for coding segments without prediction
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 7bca01e05..164709009 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -171,7 +171,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
/*cpi->sf.search_method == HEX*/
// TODO Check that the 16x16 vf & sdf are selected here
// Ignore mv costing by sending NULL pointer instead of cost arrays
- bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first,
+ bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv[0],
step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16],
NULL, NULL, NULL, NULL,
&best_ref_mv1);
@@ -183,7 +183,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int distortion;
unsigned int sse;
// Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first,
+ bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv[0],
&best_ref_mv1,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
@@ -263,8 +263,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (cpi->frames[frame] == NULL)
continue;
- mbd->block[0].bmi.as_mv.first.as_mv.row = 0;
- mbd->block[0].bmi.as_mv.first.as_mv.col = 0;
+ mbd->block[0].bmi.as_mv[0].as_mv.row = 0;
+ mbd->block[0].bmi.as_mv[0].as_mv.col = 0;
if (frame == alt_ref_index) {
filter_weight = 2;
@@ -297,8 +297,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
cpi->frames[frame]->u_buffer + mb_uv_offset,
cpi->frames[frame]->v_buffer + mb_uv_offset,
cpi->frames[frame]->y_stride,
- mbd->block[0].bmi.as_mv.first.as_mv.row,
- mbd->block[0].bmi.as_mv.first.as_mv.col,
+ mbd->block[0].bmi.as_mv[0].as_mv.row,
+ mbd->block[0].bmi.as_mv[0].as_mv.col,
predictor);
// Apply the filter (YUV)
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 2dedb1a51..12fee9037 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -100,12 +100,6 @@ static void fill_value_tokens() {
vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#else
-#define PT pt
-#endif
-
static void tokenize_b(VP9_COMP *cpi,
MACROBLOCKD *xd,
const int ib,
@@ -115,6 +109,7 @@ static void tokenize_b(VP9_COMP *cpi,
int dry_run) {
int pt; /* near block/prev token context index */
int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
+ int recent_energy = 0;
const BLOCKD * const b = xd->block + ib;
const int eob = b->eob; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
@@ -126,10 +121,6 @@ static void tokenize_b(VP9_COMP *cpi,
vp9_coeff_probs *probs;
const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type(xd, b) : DCT_DCT;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
-#endif
ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
vp9_block2above[tx_size][ib];
@@ -228,10 +219,6 @@ static void tokenize_b(VP9_COMP *cpi,
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
- pn = pt;
-#endif
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
@@ -252,21 +239,15 @@ static void tokenize_b(VP9_COMP *cpi,
}
t->Token = token;
- t->context_tree = probs[type][band][PT];
+ t->context_tree = probs[type][band][pt];
t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
(band > 1 && type == PLANE_TYPE_Y_NO_DC));
assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
- ++counts[type][band][PT][token];
+ ++counts[type][band][pt][token];
}
- pt = vp9_prev_token_class[token];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(bands[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
-#endif
+
+ pt = vp9_get_coef_context(&recent_energy, token);
++t;
} while (c < eob && ++c < seg_eob);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index d8d95a136..eb152f521 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -59,6 +59,8 @@ VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.h
VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.h
VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h
VP9_COMMON_SRCS-yes += common/vp9_textblit.h
+VP9_COMMON_SRCS-yes += common/vp9_tile_common.h
+VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
VP9_COMMON_SRCS-yes += common/vp9_treecoder.h
VP9_COMMON_SRCS-yes += common/vp9_invtrans.c
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 0b8677285..81f02ee6b 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -27,6 +27,7 @@ struct vp8_extracfg {
unsigned int Sharpness;
unsigned int static_thresh;
unsigned int tile_columns;
+ unsigned int tile_rows;
unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */
unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
unsigned int arnr_type; /* alt_ref filter type */
@@ -54,7 +55,8 @@ static const struct extraconfig_map extracfg_map[] = {
0, /* noise_sensitivity */
0, /* Sharpness */
0, /* static_thresh */
- VP8_ONE_TILE_COLUMN, /* tile_columns */
+ 0, /* tile_columns */
+ 0, /* tile_rows */
0, /* arnr_max_frames */
3, /* arnr_strength */
3, /* arnr_type*/
@@ -171,8 +173,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
- RANGE_CHECK(vp8_cfg, tile_columns,
- VP8_ONE_TILE_COLUMN, VP8_FOUR_TILE_COLUMNS);
+ RANGE_CHECK(vp8_cfg, tile_columns, 0, 6);
+ RANGE_CHECK(vp8_cfg, tile_rows, 0, 2);
RANGE_CHECK_HI(vp8_cfg, Sharpness, 7);
RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
@@ -310,6 +312,7 @@ static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf,
oxcf->tuning = vp8_cfg.tuning;
oxcf->tile_columns = vp8_cfg.tile_columns;
+ oxcf->tile_rows = vp8_cfg.tile_rows;
#if CONFIG_LOSSLESS
oxcf->lossless = vp8_cfg.lossless;
@@ -417,6 +420,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
MAP(VP8E_SET_SHARPNESS, xcfg.Sharpness);
MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh);
MAP(VP9E_SET_TILE_COLUMNS, xcfg.tile_columns);
+ MAP(VP9E_SET_TILE_ROWS, xcfg.tile_rows);
MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames);
MAP(VP8E_SET_ARNR_STRENGTH, xcfg.arnr_strength);
@@ -1007,6 +1011,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {
{VP8E_SET_SHARPNESS, set_param},
{VP8E_SET_STATIC_THRESHOLD, set_param},
{VP9E_SET_TILE_COLUMNS, set_param},
+ {VP9E_SET_TILE_ROWS, set_param},
{VP8E_GET_LAST_QUANTIZER, get_param},
{VP8E_GET_LAST_QUANTIZER_64, get_param},
{VP8E_SET_ARNR_MAXFRAMES, set_param},