diff options
Diffstat (limited to 'vp9')
40 files changed, 1015 insertions, 2882 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 01fa63fdb..c3d6dae93 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -219,8 +219,4 @@ void vp9_initialize_common() { vp9_entropy_mode_init(); vp9_entropy_mv_init(); - -#if CONFIG_NEWCOEFCONTEXT - vp9_init_neighbors(); -#endif } diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3351e6928..054d58dba 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -47,18 +47,6 @@ void vpx_log(const char *format, ...); #define MAX_MV_REFS 9 #define MAX_MV_REF_CANDIDATES 4 -#if CONFIG_DWTDCTHYBRID -#define DWT_MAX_LENGTH 64 -#define DWT_TYPE 26 // 26/53/97 -#define DWT_PRECISION_BITS 2 -#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) - -#define DWTDCT16X16 0 -#define DWTDCT16X16_LEAN 1 -#define DWTDCT8X8 2 -#define DWTDCT_TYPE DWTDCT16X16_LEAN -#endif - typedef struct { int r, c; } POS; @@ -218,10 +206,7 @@ union b_mode_info { B_PREDICTION_MODE context; #endif } as_mode; - struct { - int_mv first; - int_mv second; - } as_mv; + int_mv as_mv[2]; // first, second inter predictor motion vectors }; typedef enum { @@ -386,11 +371,28 @@ typedef struct macroblockd { unsigned int frames_since_golden; unsigned int frames_till_alt_ref_frame; +#if CONFIG_LOSSLESS + int lossless; +#endif /* Inverse transform function pointers. */ - void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch); - void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch); - void (*inv_walsh4x4_1)(int16_t *in, int16_t *out); - void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out); + void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch); + void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch); + void (*inv_2ndtxm4x4_1)(int16_t *in, int16_t *out); + void (*inv_2ndtxm4x4)(int16_t *in, int16_t *out); + void (*itxm_add)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, int pitch, int stride); + void (*dc_itxm_add)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, int pitch, int stride, int dc); + void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride); + void (*dc_itxm_add_y_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, + const int16_t *dc); + void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs); + void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride, + uint16_t *eobs); struct subpix_fn_table subpix; @@ -501,6 +503,10 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; +#if CONFIG_LOSSLESS + if (xd->lossless) + return DCT_DCT; +#endif // TODO(rbultje, debargha): Explore ADST usage for superblocks if (xd->mode_info_context->mbmi.sb_type) return tx_type; diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index f21f1d84e..b87c410df 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -7,12 +7,15 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include "vp9/common/vp9_convolve.h" + #include <assert.h> #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" #include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" #define VP9_FILTER_WEIGHT 128 #define VP9_FILTER_SHIFT 7 @@ -293,9 +296,21 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - convolve_avg_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8); + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_avg(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); } void vp9_convolve_copy(const uint8_t *src, int src_stride, diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c index 5ea7736b7..1953d60c6 100644 --- a/vp9/common/vp9_debugmodes.c +++ b/vp9/common/vp9_debugmodes.c @@ -129,8 +129,8 @@ void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); bindex = (b_row & 3) * 4 + (b_col & 3); fprintf(mvs, "%3d:%-3d ", - mi[mb_index].bmi[bindex].as_mv.first.as_mv.row, - mi[mb_index].bmi[bindex].as_mv.first.as_mv.col); + mi[mb_index].bmi[bindex].as_mv[0].as_mv.row, + mi[mb_index].bmi[bindex].as_mv[0].as_mv.col); } diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 352e17c0c..e21eaba83 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -143,624 +143,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 237, 252, 253, 238, 223, 239, 254, 255, }; -#if CONFIG_DWTDCTHYBRID - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, - 65, 96, 128, 97, 66, 35, 4, 5, - 36, 67, 98, 129, 160, 192, 161, 130, - 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 256, 225, 194, 163, - 132, 101, 70, 39, 8, 9, 40, 71, - 102, 133, 164, 195, 226, 257, 288, 320, - 289, 258, 227, 196, 165, 134, 103, 72, - 41, 10, 11, 42, 73, 104, 135, 166, - 197, 228, 259, 290, 321, 352, 384, 353, - 322, 291, 260, 229, 198, 167, 136, 105, - 74, 43, 12, 13, 44, 75, 106, 137, - 168, 199, 230, 261, 292, 323, 354, 385, - 416, 448, 417, 386, 355, 324, 293, 262, - 231, 200, 169, 138, 107, 76, 45, 14, - 15, 46, 77, 108, 139, 170, 201, 232, - 263, 294, 325, 356, 387, 418, 449, 480, - 481, 450, 419, 388, 357, 326, 295, 264, - 233, 202, 171, 140, 109, 78, 47, 79, - 110, 141, 172, 203, 234, 265, 296, 327, - 358, 389, 420, 451, 482, 483, 452, 421, - 390, 359, 328, 297, 266, 235, 204, 173, - 142, 111, 143, 174, 205, 236, 267, 298, - 329, 360, 391, 422, 453, 484, 485, 454, - 423, 392, 361, 330, 299, 268, 237, 206, - 175, 207, 238, 269, 300, 331, 362, 393, - 424, 455, 486, 487, 456, 425, 394, 363, - 332, 301, 270, 239, 271, 302, 333, 364, - 395, 426, 457, 488, 489, 458, 427, 396, - 365, 334, 303, 335, 366, 397, 428, 459, - 490, 491, 460, 429, 398, 367, 399, 430, - 461, 492, 493, 462, 431, 463, 494, 495, - - 16, 512, 528, 17, 513, 529, 48, 544, - 560, 80, 576, 592, 49, 545, 561, 18, - 514, 530, 19, 515, 531, 50, 546, 562, - 81, 577, 593, 112, 608, 624, 144, 640, - 656, 113, 609, 625, 82, 578, 594, 51, - 547, 563, 20, 516, 532, 21, 517, 533, - 52, 548, 564, 83, 579, 595, 114, 610, - 626, 145, 641, 657, 176, 672, 688, 208, - 704, 720, 177, 673, 689, 146, 642, 658, - 115, 611, 627, 84, 580, 596, 53, 549, - 565, 22, 518, 534, 23, 519, 535, 54, - 550, 566, 85, 581, 597, 116, 612, 628, - 147, 643, 659, 178, 674, 690, 209, 705, - 721, 240, 736, 752, 272, 768, 784, 241, - 737, 753, 210, 706, 722, 179, 675, 691, - 148, 644, 660, 117, 613, 629, 86, 582, - 598, 55, 551, 567, 24, 520, 536, 25, - 521, 537, 56, 552, 568, 87, 583, 599, - 118, 614, 630, 149, 645, 661, 180, 676, - 692, 211, 707, 723, 242, 738, 754, 273, - 769, 785, 304, 800, 816, 336, 832, 848, - 305, 801, 817, 274, 770, 786, 243, 739, - 755, 212, 708, 724, 181, 677, 693, 150, - 646, 662, 119, 615, 631, 88, 584, 600, - 57, 553, 569, 26, 522, 538, 27, 523, - 539, 58, 554, 570, 89, 585, 601, 120, - 616, 632, 151, 647, 663, 182, 678, 694, - 213, 709, 725, 244, 740, 756, 275, 771, - 787, 306, 802, 818, 337, 833, 849, 368, - 864, 880, 400, 896, 912, 369, 865, 881, - 338, 834, 850, 307, 803, 819, 276, 772, - 788, 245, 741, 757, 214, 710, 726, 183, - - 679, 695, 152, 648, 664, 121, 617, 633, - 90, 586, 602, 59, 555, 571, 28, 524, - 540, 29, 525, 541, 60, 556, 572, 91, - 587, 603, 122, 618, 634, 153, 649, 665, - 184, 680, 696, 215, 711, 727, 246, 742, - 758, 277, 773, 789, 308, 804, 820, 339, - 835, 851, 370, 866, 882, 401, 897, 913, - 432, 928, 944, 464, 960, 976, 433, 929, - 945, 402, 898, 914, 371, 867, 883, 340, - 836, 852, 309, 805, 821, 278, 774, 790, - 247, 743, 759, 216, 712, 728, 185, 681, - 697, 154, 650, 666, 123, 619, 635, 92, - 588, 604, 61, 557, 573, 30, 526, 542, - 31, 527, 543, 62, 558, 574, 93, 589, - 605, 124, 620, 636, 155, 651, 667, 186, - 682, 698, 217, 713, 729, 248, 744, 760, - 279, 775, 791, 310, 806, 822, 341, 837, - 853, 372, 868, 884, 403, 899, 915, 434, - 930, 946, 465, 961, 977, 496, 992, 1008, - 497, 993, 1009, 466, 962, 978, 435, 931, - 947, 404, 900, 916, 373, 869, 885, 342, - 838, 854, 311, 807, 823, 280, 776, 792, - 249, 745, 761, 218, 714, 730, 187, 683, - 699, 156, 652, 668, 125, 621, 637, 94, - 590, 606, 63, 559, 575, 95, 591, 607, - 126, 622, 638, 157, 653, 669, 188, 684, - 700, 219, 715, 731, 250, 746, 762, 281, - 777, 793, 312, 808, 824, 343, 839, 855, - 374, 870, 886, 405, 901, 917, 436, 932, - 948, 467, 963, 979, 498, 994, 1010, 499, - 995, 1011, 468, 964, 980, 437, 933, 949, - 406, 902, 918, 375, 871, 887, 344, 840, - - 856, 313, 809, 825, 282, 778, 794, 251, - 747, 763, 220, 716, 732, 189, 685, 701, - 158, 654, 670, 127, 623, 639, 159, 655, - 671, 190, 686, 702, 221, 717, 733, 252, - 748, 764, 283, 779, 795, 314, 810, 826, - 345, 841, 857, 376, 872, 888, 407, 903, - 919, 438, 934, 950, 469, 965, 981, 500, - 996, 1012, 501, 997, 1013, 470, 966, 982, - 439, 935, 951, 408, 904, 920, 377, 873, - 889, 346, 842, 858, 315, 811, 827, 284, - 780, 796, 253, 749, 765, 222, 718, 734, - 191, 687, 703, 223, 719, 735, 254, 750, - 766, 285, 781, 797, 316, 812, 828, 347, - 843, 859, 378, 874, 890, 409, 905, 921, - 440, 936, 952, 471, 967, 983, 502, 998, - 1014, 503, 999, 1015, 472, 968, 984, 441, - 937, 953, 410, 906, 922, 379, 875, 891, - 348, 844, 860, 317, 813, 829, 286, 782, - 798, 255, 751, 767, 287, 783, 799, 318, - 814, 830, 349, 845, 861, 380, 876, 892, - 411, 907, 923, 442, 938, 954, 473, 969, - 985, 504, 1000, 1016, 505, 1001, 1017, 474, - 970, 986, 443, 939, 955, 412, 908, 924, - 381, 877, 893, 350, 846, 862, 319, 815, - 831, 351, 847, 863, 382, 878, 894, 413, - 909, 925, 444, 940, 956, 475, 971, 987, - 506, 1002, 1018, 507, 1003, 1019, 476, 972, - 988, 445, 941, 957, 414, 910, 926, 383, - 879, 895, 415, 911, 927, 446, 942, 958, - 477, 973, 989, 508, 1004, 1020, 509, 1005, - 1021, 478, 974, 990, 447, 943, 959, 479, - 975, 991, 510, 1006, 1022, 511, 1007, 1023, -}; - -#elif DWTDCT_TYPE == DWTDCT16X16 - -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, - 6, 6, 6, - 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, - 65, 96, 128, 97, 66, 35, 4, - 16, 512, 528, - 5, - 36, 67, 98, 129, 160, 192, 161, 130, - 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 256, 225, 194, 163, - 132, 101, 70, 39, 8, 9, 40, 71, - 102, 133, 164, 195, 226, 257, 288, 320, - 289, 258, 227, 196, 165, 134, 103, 72, - 41, 10, 11, 42, 73, 104, 135, 166, - 197, 228, 259, 290, 321, 352, 384, 353, - 322, 291, 260, 229, 198, 167, 136, 105, - 74, 43, 12, 13, 44, 75, 106, 137, - 168, 199, 230, 261, 292, 323, 354, 385, - 416, 448, 417, 386, 355, 324, 293, 262, - 231, 200, 169, 138, 107, 76, 45, 14, - 15, 46, 77, 108, 139, 170, 201, 232, - 263, 294, 325, 356, 387, 418, 449, 480, - 481, 450, 419, 388, 357, 326, 295, 264, - 233, 202, 171, 140, 109, 78, 47, 79, - 110, 141, 172, 203, 234, 265, 296, 327, - 358, 389, 420, 451, 482, 483, 452, 421, - 390, 359, 328, 297, 266, 235, 204, 173, - 142, 111, 143, 174, 205, 236, 267, 298, - 329, 360, 391, 422, 453, 484, 485, 454, - 423, 392, 361, 330, 299, 268, 237, 206, - 175, 207, 238, 269, 300, 331, 362, 393, - 424, 455, 486, 487, 456, 425, 394, 363, - 332, 301, 270, 239, 271, 302, 333, 364, - 395, 426, 457, 488, 489, 458, 427, 396, - 365, 334, 303, 335, 366, 397, 428, 459, - 490, 491, 460, 429, 398, 367, 399, 430, - 461, 492, 493, 462, 431, 463, 494, 495, - - 17, 513, 529, 48, 544, - 560, 80, 576, 592, 49, 545, 561, 18, - 514, 530, 19, 515, 531, 50, 546, 562, - 81, 577, 593, 112, 608, 624, 144, 640, - 656, 113, 609, 625, 82, 578, 594, 51, - 547, 563, 20, 516, 532, 21, 517, 533, - 52, 548, 564, 83, 579, 595, 114, 610, - 626, 145, 641, 657, 176, 672, 688, 208, - 704, 720, 177, 673, 689, 146, 642, 658, - 115, 611, 627, 84, 580, 596, 53, 549, - 565, 22, 518, 534, 23, 519, 535, 54, - 550, 566, 85, 581, 597, 116, 612, 628, - 147, 643, 659, 178, 674, 690, 209, 705, - 721, 240, 736, 752, 272, 768, 784, 241, - 737, 753, 210, 706, 722, 179, 675, 691, - 148, 644, 660, 117, 613, 629, 86, 582, - 598, 55, 551, 567, 24, 520, 536, 25, - 521, 537, 56, 552, 568, 87, 583, 599, - 118, 614, 630, 149, 645, 661, 180, 676, - 692, 211, 707, 723, 242, 738, 754, 273, - 769, 785, 304, 800, 816, 336, 832, 848, - 305, 801, 817, 274, 770, 786, 243, 739, - 755, 212, 708, 724, 181, 677, 693, 150, - 646, 662, 119, 615, 631, 88, 584, 600, - 57, 553, 569, 26, 522, 538, 27, 523, - 539, 58, 554, 570, 89, 585, 601, 120, - 616, 632, 151, 647, 663, 182, 678, 694, - 213, 709, 725, 244, 740, 756, 275, 771, - 787, 306, 802, 818, 337, 833, 849, 368, - 864, 880, 400, 896, 912, 369, 865, 881, - 338, 834, 850, 307, 803, 819, 276, 772, - 788, 245, 741, 757, 214, 710, 726, 183, - - 679, 695, 152, 648, 664, 121, 617, 633, - 90, 586, 602, 59, 555, 571, 28, 524, - 540, 29, 525, 541, 60, 556, 572, 91, - 587, 603, 122, 618, 634, 153, 649, 665, - 184, 680, 696, 215, 711, 727, 246, 742, - 758, 277, 773, 789, 308, 804, 820, 339, - 835, 851, 370, 866, 882, 401, 897, 913, - 432, 928, 944, 464, 960, 976, 433, 929, - 945, 402, 898, 914, 371, 867, 883, 340, - 836, 852, 309, 805, 821, 278, 774, 790, - 247, 743, 759, 216, 712, 728, 185, 681, - 697, 154, 650, 666, 123, 619, 635, 92, - 588, 604, 61, 557, 573, 30, 526, 542, - 31, 527, 543, 62, 558, 574, 93, 589, - 605, 124, 620, 636, 155, 651, 667, 186, - 682, 698, 217, 713, 729, 248, 744, 760, - 279, 775, 791, 310, 806, 822, 341, 837, - 853, 372, 868, 884, 403, 899, 915, 434, - 930, 946, 465, 961, 977, 496, 992, 1008, - 497, 993, 1009, 466, 962, 978, 435, 931, - 947, 404, 900, 916, 373, 869, 885, 342, - 838, 854, 311, 807, 823, 280, 776, 792, - 249, 745, 761, 218, 714, 730, 187, 683, - 699, 156, 652, 668, 125, 621, 637, 94, - 590, 606, 63, 559, 575, 95, 591, 607, - 126, 622, 638, 157, 653, 669, 188, 684, - 700, 219, 715, 731, 250, 746, 762, 281, - 777, 793, 312, 808, 824, 343, 839, 855, - 374, 870, 886, 405, 901, 917, 436, 932, - 948, 467, 963, 979, 498, 994, 1010, 499, - 995, 1011, 468, 964, 980, 437, 933, 949, - 406, 902, 918, 375, 871, 887, 344, 840, - - 856, 313, 809, 825, 282, 778, 794, 251, - 747, 763, 220, 716, 732, 189, 685, 701, - 158, 654, 670, 127, 623, 639, 159, 655, - 671, 190, 686, 702, 221, 717, 733, 252, - 748, 764, 283, 779, 795, 314, 810, 826, - 345, 841, 857, 376, 872, 888, 407, 903, - 919, 438, 934, 950, 469, 965, 981, 500, - 996, 1012, 501, 997, 1013, 470, 966, 982, - 439, 935, 951, 408, 904, 920, 377, 873, - 889, 346, 842, 858, 315, 811, 827, 284, - 780, 796, 253, 749, 765, 222, 718, 734, - 191, 687, 703, 223, 719, 735, 254, 750, - 766, 285, 781, 797, 316, 812, 828, 347, - 843, 859, 378, 874, 890, 409, 905, 921, - 440, 936, 952, 471, 967, 983, 502, 998, - 1014, 503, 999, 1015, 472, 968, 984, 441, - 937, 953, 410, 906, 922, 379, 875, 891, - 348, 844, 860, 317, 813, 829, 286, 782, - 798, 255, 751, 767, 287, 783, 799, 318, - 814, 830, 349, 845, 861, 380, 876, 892, - 411, 907, 923, 442, 938, 954, 473, 969, - 985, 504, 1000, 1016, 505, 1001, 1017, 474, - 970, 986, 443, 939, 955, 412, 908, 924, - 381, 877, 893, 350, 846, 862, 319, 815, - 831, 351, 847, 863, 382, 878, 894, 413, - 909, 925, 444, 940, 956, 475, 971, 987, - 506, 1002, 1018, 507, 1003, 1019, 476, 972, - 988, 445, 941, 957, 414, 910, 926, 383, - 879, 895, 415, 911, 927, 446, 942, 958, - 477, 973, 989, 508, 1004, 1020, 509, 1005, - 1021, 478, 974, 990, 447, 943, 959, 479, - 975, 991, 510, 1006, 1022, 511, 1007, 1023, -}; - -#elif DWTDCT_TYPE == DWTDCT8X8 - -DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { - 0, 1, 2, 3, 5, 4, 4, 5, - 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - - 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, - 65, 96, 128, 97, 66, 35, 4, 5, - 36, 67, 98, 129, 160, 192, 161, 130, - 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 225, 194, 163, 132, - 101, 70, 39, 71, 102, 133, 164, 195, - 226, 227, 196, 165, 134, 103, 135, 166, - 197, 228, 229, 198, 167, 199, 230, 231, - - 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328, - 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298, - 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361, - 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269, - 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393, - 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394, - 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270, - 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364, - 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488, - 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396, - 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335, - 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459, - 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429, - 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430, - 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462, - 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495, - - 16, 512, 528, 17, 513, 529, 18, 514, - 530, 19, 515, 531, 20, 516, 532, 21, - 517, 533, 22, 518, 534, 23, 519, 535, - 24, 520, 536, 25, 521, 537, 26, 522, - 538, 27, 523, 539, 28, 524, 540, 29, - 525, 541, 30, 526, 542, 31, 527, 543, - 48, 544, 560, 49, 545, 561, 50, 546, - 562, 51, 547, 563, 52, 548, 564, 53, - 549, 565, 54, 550, 566, 55, 551, 567, - 56, 552, 568, 57, 553, 569, 58, 554, - 570, 59, 555, 571, 60, 556, 572, 61, - 557, 573, 62, 558, 574, 63, 559, 575, - 80, 576, 592, 81, 577, 593, 82, 578, - 594, 83, 579, 595, 84, 580, 596, 85, - 581, 597, 86, 582, 598, 87, 583, 599, - 88, 584, 600, 89, 585, 601, 90, 586, - 602, 91, 587, 603, 92, 588, 604, 93, - 589, 605, 94, 590, 606, 95, 591, 607, - 112, 608, 624, 113, 609, 625, 114, 610, - 626, 115, 611, 627, 116, 612, 628, 117, - 613, 629, 118, 614, 630, 119, 615, 631, - 120, 616, 632, 121, 617, 633, 122, 618, - 634, 123, 619, 635, 124, 620, 636, 125, - 621, 637, 126, 622, 638, 127, 623, 639, - 144, 640, 656, 145, 641, 657, 146, 642, - 658, 147, 643, 659, 148, 644, 660, 149, - 645, 661, 150, 646, 662, 151, 647, 663, - 152, 648, 664, 153, 649, 665, 154, 650, - 666, 155, 651, 667, 156, 652, 668, 157, - 653, 669, 158, 654, 670, 159, 655, 671, - 176, 672, 688, 177, 673, 689, 178, 674, - 690, 179, 675, 691, 180, 676, 692, 181, - 677, 693, 182, 678, 694, 183, 679, 695, - 184, 680, 696, 185, 681, 697, 186, 682, - 698, 187, 683, 699, 188, 684, 700, 189, - 685, 701, 190, 686, 702, 191, 687, 703, - 208, 704, 720, 209, 705, 721, 210, 706, - 722, 211, 707, 723, 212, 708, 724, 213, - 709, 725, 214, 710, 726, 215, 711, 727, - 216, 712, 728, 217, 713, 729, 218, 714, - 730, 219, 715, 731, 220, 716, 732, 221, - 717, 733, 222, 718, 734, 223, 719, 735, - 240, 736, 752, 241, 737, 753, 242, 738, - 754, 243, 739, 755, 244, 740, 756, 245, - 741, 757, 246, 742, 758, 247, 743, 759, - 248, 744, 760, 249, 745, 761, 250, 746, - 762, 251, 747, 763, 252, 748, 764, 253, - 749, 765, 254, 750, 766, 255, 751, 767, - 272, 768, 784, 273, 769, 785, 274, 770, - 786, 275, 771, 787, 276, 772, 788, 277, - 773, 789, 278, 774, 790, 279, 775, 791, - 280, 776, 792, 281, 777, 793, 282, 778, - 794, 283, 779, 795, 284, 780, 796, 285, - 781, 797, 286, 782, 798, 287, 783, 799, - 304, 800, 816, 305, 801, 817, 306, 802, - 818, 307, 803, 819, 308, 804, 820, 309, - 805, 821, 310, 806, 822, 311, 807, 823, - 312, 808, 824, 313, 809, 825, 314, 810, - 826, 315, 811, 827, 316, 812, 828, 317, - 813, 829, 318, 814, 830, 319, 815, 831, - 336, 832, 848, 337, 833, 849, 338, 834, - 850, 339, 835, 851, 340, 836, 852, 341, - 837, 853, 342, 838, 854, 343, 839, 855, - 344, 840, 856, 345, 841, 857, 346, 842, - 858, 347, 843, 859, 348, 844, 860, 349, - 845, 861, 350, 846, 862, 351, 847, 863, - 368, 864, 880, 369, 865, 881, 370, 866, - 882, 371, 867, 883, 372, 868, 884, 373, - 869, 885, 374, 870, 886, 375, 871, 887, - 376, 872, 888, 377, 873, 889, 378, 874, - 890, 379, 875, 891, 380, 876, 892, 381, - 877, 893, 382, 878, 894, 383, 879, 895, - 400, 896, 912, 401, 897, 913, 402, 898, - 914, 403, 899, 915, 404, 900, 916, 405, - 901, 917, 406, 902, 918, 407, 903, 919, - 408, 904, 920, 409, 905, 921, 410, 906, - 922, 411, 907, 923, 412, 908, 924, 413, - 909, 925, 414, 910, 926, 415, 911, 927, - 432, 928, 944, 433, 929, 945, 434, 930, - 946, 435, 931, 947, 436, 932, 948, 437, - 933, 949, 438, 934, 950, 439, 935, 951, - 440, 936, 952, 441, 937, 953, 442, 938, - 954, 443, 939, 955, 444, 940, 956, 445, - 941, 957, 446, 942, 958, 447, 943, 959, - 464, 960, 976, 465, 961, 977, 466, 962, - 978, 467, 963, 979, 468, 964, 980, 469, - 965, 981, 470, 966, 982, 471, 967, 983, - 472, 968, 984, 473, 969, 985, 474, 970, - 986, 475, 971, 987, 476, 972, 988, 477, - 973, 989, 478, 974, 990, 479, 975, 991, - 496, 992, 1008, 497, 993, 1009, 498, 994, - 1010, 499, 995, 1011, 500, 996, 1012, 501, - 997, 1013, 502, 998, 1014, 503, 999, 1015, - 504, 1000, 1016, 505, 1001, 1017, 506, 1002, - 1018, 507, 1003, 1019, 508, 1004, 1020, 509, - 1005, 1021, 510, 1006, 1022, 511, 1007, 1023, -}; -#endif - -#else - DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -865,7 +247,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, }; -#endif // CONFIG_DWTDCTHYBRID /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -937,145 +318,28 @@ vp9_extra_bit_struct vp9_extra_bits[12] = { #include "vp9/common/vp9_default_coef_probs.h" -#if CONFIG_NEWCOEFCONTEXT - -// Neighborhood 5-tuples for various scans and blocksizes, -// in {top, left, topleft, topright, bottomleft} order -// for each position in raster scan order. -// -1 indicates the neighbor does not exist. -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_4x4_neighbors[16 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); -DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); - -static int find_in_scan(const int *scan, int l, int m) { - int i, l2 = l * l; - for (i = 0; i < l2; ++i) { - if (scan[i] == m) - return i; - } - return -1; -} - -static void init_scan_neighbors(const int *scan, int l, int *neighbors) { - int l2 = l * l; - int m, n, i, j, k; - for (n = 0; n < l2; ++n) { - int locn = find_in_scan(scan, l, n); - int z = -1; - i = n / l; - j = n % l; - for (k = 0; k < MAX_NEIGHBORS; ++k) - neighbors[MAX_NEIGHBORS * n + k] = -1; - if (i - 1 >= 0) { - m = (i - 1) * l + j; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n] = m; - if (m == 0) z = 0; - } - } - if (j - 1 >= 0) { - m = i * l + j - 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 1] = m; - if (m == 0) z = 1; - } - } - if (i - 1 >= 0 && j - 1 >= 0) { - m = (i - 1) * l + j - 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 2] = m; - if (m == 0) z = 2; - } - } - if (i - 1 >= 0 && j + 1 < l) { - m = (i - 1) * l + j + 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 3] = m; - if (m == 0) z = 3; - } - } - if (i + 1 < l && j - 1 >= 0) { - m = (i + 1) * l + j - 1; - if (find_in_scan(scan, l, m) < locn) { - neighbors[MAX_NEIGHBORS * n + 4] = m; - if (m == 0) z = 4; - } - } - if (z != -1) { // zero exists - int v = 0; - for (k = 0; k < MAX_NEIGHBORS; ++k) - v += (neighbors[MAX_NEIGHBORS * n + k] > 0); - if (v) { - neighbors[MAX_NEIGHBORS * n + z] = -1; - } - } - } -} - -void vp9_init_neighbors() { - init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4, - vp9_default_zig_zag1d_4x4_neighbors); - init_scan_neighbors(vp9_row_scan_4x4, 4, - vp9_row_scan_4x4_neighbors); - init_scan_neighbors(vp9_col_scan_4x4, 4, - vp9_col_scan_4x4_neighbors); - init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8, - vp9_default_zig_zag1d_8x8_neighbors); - init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, - vp9_default_zig_zag1d_16x16_neighbors); - init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, - vp9_default_zig_zag1d_32x32_neighbors); -} - -const int *vp9_get_coef_neighbors_handle(const int *scan) { - if (scan == vp9_default_zig_zag1d_4x4) { - return vp9_default_zig_zag1d_4x4_neighbors; - } else if (scan == vp9_row_scan_4x4) { - return vp9_row_scan_4x4_neighbors; - } else if (scan == vp9_col_scan_4x4) { - return vp9_col_scan_4x4_neighbors; - } else if (scan == vp9_default_zig_zag1d_8x8) { - return vp9_default_zig_zag1d_8x8_neighbors; - } else if (scan == vp9_default_zig_zag1d_16x16) { - return vp9_default_zig_zag1d_16x16_neighbors; - } else if (scan == vp9_default_zig_zag1d_32x32) { - return vp9_default_zig_zag1d_32x32_neighbors; +// This function updates and then returns n AC coefficient context +// This is currently a placeholder function to allow experimentation +// using various context models based on the energy earlier tokens +// within the current block. +// +// For now it just returns the previously used context. +int vp9_get_coef_context(int * recent_energy, int token) { + // int token_energy; + // int av_energy; + + // Placeholder code for experiments with token energy + // as a coefficient context. + /*token_energy = ((token != DCT_EOB_TOKEN) ? token : 0); + if (token_energy) { + av_energy = (token_energy + *recent_energy + 1) >> 1; + } else { + av_energy = 0; } - return vp9_default_zig_zag1d_4x4_neighbors; -} + *recent_energy = token_energy;*/ -int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc, - const int *neigbor_handle, int rc) { - static int neighbors_used = MAX_NEIGHBORS; // maximum is MAX_NEIGHBORS - const int *nb = neigbor_handle + rc * MAX_NEIGHBORS; - int i, v, val = 0, n = 0; - for (i = 0; i < neighbors_used; ++i) { - if (nb[i] == -1 || (nb[i] == 0 && nodc)) { - continue; - } - v = abs(qcoeff_ptr[nb[i]]); - val = (v > val ? v : val); - n++; - } - if (n == 0) - return 0; - else if (val <= 1) - return val; - else if (val < 4) - return 2; - else - return 3; -} -#endif /* CONFIG_NEWCOEFCONTEXT */ + return vp9_prev_token_class[token]; +}; void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4, diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 84e5255c2..1979638d4 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -106,9 +106,6 @@ typedef vp9_prob vp9_coeff_probs[COEF_BANDS][PREV_COEF_CONTEXTS] #define SUBEXP_PARAM 4 /* Subexponential code parameter */ #define MODULUS_PARAM 13 /* Modulus parameter */ -extern DECLARE_ALIGNED(16, const uint8_t, - vp9_prev_token_class[MAX_ENTROPY_TOKENS]); - struct VP9Common; void vp9_default_coef_probs(struct VP9Common *); extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]); @@ -129,26 +126,5 @@ static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } -#if CONFIG_NEWCOEFCONTEXT - -#define MAX_NEIGHBORS 5 -#define NEWCOEFCONTEXT_BAND_COND(b) ((b) >= 1) -void vp9_init_neighbors(void); - -const int *vp9_get_coef_neighbors_handle(const int *scan); -int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc, - const int *neigbor_handle, int rc); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_4x4_neighbors[ - 16 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_row_scan_4x4_neighbors[ - 16 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_col_scan_4x4_neighbors[ - 16 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[ - 64 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[ - 256 * MAX_NEIGHBORS]); -extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[ - 1024 * MAX_NEIGHBORS]); -#endif // CONFIG_NEWCOEFCONTEXT +extern int vp9_get_coef_context(int * recent_energy, int token); #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index 74fce7aad..c42aab1a5 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -98,7 +98,7 @@ static int left_block_mv(const MACROBLOCKD *xd, b += 4; } - return (cur_mb->bmi + b - 1)->as_mv.first.as_int; + return (cur_mb->bmi + b - 1)->as_mv[0].as_int; } static int left_block_second_mv(const MACROBLOCKD *xd, @@ -117,8 +117,8 @@ static int left_block_second_mv(const MACROBLOCKD *xd, } return cur_mb->mbmi.second_ref_frame > 0 ? - (cur_mb->bmi + b - 1)->as_mv.second.as_int : - (cur_mb->bmi + b - 1)->as_mv.first.as_int; + (cur_mb->bmi + b - 1)->as_mv[1].as_int : + (cur_mb->bmi + b - 1)->as_mv[0].as_int; } static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { @@ -131,7 +131,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { b += 16; } - return (cur_mb->bmi + b - 4)->as_mv.first.as_int; + return (cur_mb->bmi + b - 4)->as_mv[0].as_int; } static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { @@ -146,8 +146,8 @@ static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) } return cur_mb->mbmi.second_ref_frame > 0 ? - (cur_mb->bmi + b - 4)->as_mv.second.as_int : - (cur_mb->bmi + b - 4)->as_mv.first.as_int; + (cur_mb->bmi + b - 4)->as_mv[1].as_int : + (cur_mb->bmi + b - 4)->as_mv[0].as_int; } static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 2f847dc78..2fec98e50 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -476,12 +476,13 @@ void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) { } } -void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr, +void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride) { int r, c; - short tmp[16]; - vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1); + int16_t dc = input_dc; + int16_t tmp[16]; + vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1); for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { @@ -1152,8 +1153,6 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { *output = (out + 32) >> 6; } - -#if !CONFIG_DWTDCTHYBRID void idct32_1d(int16_t *input, int16_t *output) { int16_t step1[32], step2[32]; int temp1, temp2; @@ -1521,7 +1520,6 @@ void idct32_1d(int16_t *input, int16_t *output) { output[31] = step1[0] - step1[31]; } - void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { int16_t out[32 * 32]; int16_t *outptr = &out[0]; @@ -1554,792 +1552,3 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { out = dct_const_round_shift(tmp); *output = (out + 32) >> 6; } - -#else // !CONFIG_DWTDCTHYBRID - -#if DWT_TYPE == 53 - -// Note: block length must be even for this implementation -static void synthesis_53_row(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, *a, *b; - int n; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ -= (r + (*b) + 1) >> 1; - r = *b++; - } - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *x++ = ((r = *a++) + 1) >> 1; - *x++ = *b++ + ((r + (*a) + 2) >> 2); - } - *x++ = ((r = *a) + 1) >> 1; - *x++ = *b + ((r + 1) >> 1); -} - -static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, *a, *b; - int n; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ -= (r + (*b) + 1) >> 1; - r = *b++; - } - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - r = *a++; - *x++ = r; - *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1); - } - *x++ = *a; - *x++ = ((*b) << 1) + *a; -} - -static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { - int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; - - th[0] = hh; - tw[0] = hw; - for (i = 1; i <= levels; i++) { - th[i] = (th[i - 1] + 1) >> 1; - tw[i] = (tw[i - 1] + 1) >> 1; - } - for (lv = levels - 1; lv >= 0; lv--) { - nh = th[lv]; - nw = tw[lv]; - hh = th[lv + 1]; - hw = tw[lv + 1]; - if ((nh < 2) || (nw < 2)) continue; - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i] = c[i * pitch_c + j]; - synthesis_53_col(nh, buffer, buffer + hh, buffer + nh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i + nh]; - } - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer)); - synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]); - } - } - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ? - ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) : - -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS); - } - } -} - -#elif DWT_TYPE == 26 - -// Note: block length must be even for this implementation -static void synthesis_26_row(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, s, *a, *b; - int i, n = length >> 1; - - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ += (r - a[1] + 4) >> 3; - r = *a++; - } - *b += (r - *a + 4) >> 3; - } - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - s = *b++; - r = *a++; - *x++ = (r + s + 1) >> 1; - *x++ = (r - s + 1) >> 1; - } -} - -static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass, - int16_t *x) { - int16_t r, s, *a, *b; - int i, n = length >> 1; - - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ += (r - a[1] + 4) >> 3; - r = *a++; - } - *b += (r - *a + 4) >> 3; - } - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - s = *b++; - r = *a++; - *x++ = r + s; - *x++ = r - s; - } -} - -static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { - int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - int16_t buffer[2 * DWT_MAX_LENGTH]; - - th[0] = hh; - tw[0] = hw; - for (i = 1; i <= levels; i++) { - th[i] = (th[i - 1] + 1) >> 1; - tw[i] = (tw[i - 1] + 1) >> 1; - } - for (lv = levels - 1; lv >= 0; lv--) { - nh = th[lv]; - nw = tw[lv]; - hh = th[lv + 1]; - hw = tw[lv + 1]; - if ((nh < 2) || (nw < 2)) continue; - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i] = c[i * pitch_c + j]; - synthesis_26_col(nh, buffer, buffer + hh, buffer + nh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i + nh]; - } - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer)); - synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]); - } - } - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ? - ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) : - -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS); - } - } -} - -#elif DWT_TYPE == 97 - -static void synthesis_97(int length, double *lowpass, double *highpass, - double *x) { - static const double a_predict1 = -1.586134342; - static const double a_update1 = -0.05298011854; - static const double a_predict2 = 0.8829110762; - static const double a_update2 = 0.4435068522; - static const double s_low = 1.149604398; - static const double s_high = 1/1.149604398; - static const double inv_s_low = 1 / s_low; - static const double inv_s_high = 1 / s_high; - int i; - double y[DWT_MAX_LENGTH]; - // Undo pack and scale - for (i = 0; i < length / 2; i++) { - y[i * 2] = lowpass[i] * inv_s_low; - y[i * 2 + 1] = highpass[i] * inv_s_high; - } - memcpy(x, y, sizeof(*y) * length); - // Undo update 2 - for (i = 2; i < length; i += 2) { - x[i] -= a_update2 * (x[i-1] + x[i+1]); - } - x[0] -= 2 * a_update2 * x[1]; - // Undo predict 2 - for (i = 1; i < length - 2; i += 2) { - x[i] -= a_predict2 * (x[i - 1] + x[i + 1]); - } - x[length - 1] -= 2 * a_predict2 * x[length - 2]; - // Undo update 1 - for (i = 2; i < length; i += 2) { - x[i] -= a_update1 * (x[i - 1] + x[i + 1]); - } - x[0] -= 2 * a_update1 * x[1]; - // Undo predict 1 - for (i = 1; i < length - 2; i += 2) { - x[i] -= a_predict1 * (x[i - 1] + x[i + 1]); - } - x[length - 1] -= 2 * a_predict1 * x[length - 2]; -} - -static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { - int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - double buffer[2 * DWT_MAX_LENGTH]; - double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; - - th[0] = hh; - tw[0] = hw; - for (i = 1; i <= levels; i++) { - th[i] = (th[i - 1] + 1) >> 1; - tw[i] = (tw[i - 1] + 1) >> 1; - } - for (lv = levels - 1; lv >= 0; lv--) { - nh = th[lv]; - nw = tw[lv]; - hh = th[lv + 1]; - hw = tw[lv + 1]; - if ((nh < 2) || (nw < 2)) continue; - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i] = c[i * pitch_c + j]; - synthesis_97(nh, buffer, buffer + hh, buffer + nh); - for (i = 0; i < nh; i++) - y[i * DWT_MAX_LENGTH + j] = buffer[i + nh]; - } - for (i = 0; i < nh; i++) { - memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer)); - synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]); - } - } - for (i = 0; i < height; i++) - for (j = 0; j < width; j++) - x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] / - (1 << DWT_PRECISION_BITS)); -} - -#endif // DWT_TYPE - -// TODO(debargha): Implement scaling differently so as not to have to use the -// floating point 16x16 dct -static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) { - static const double C1 = 0.995184726672197; - static const double C2 = 0.98078528040323; - static const double C3 = 0.956940335732209; - static const double C4 = 0.923879532511287; - static const double C5 = 0.881921264348355; - static const double C6 = 0.831469612302545; - static const double C7 = 0.773010453362737; - static const double C8 = 0.707106781186548; - static const double C9 = 0.634393284163646; - static const double C10 = 0.555570233019602; - static const double C11 = 0.471396736825998; - static const double C12 = 0.38268343236509; - static const double C13 = 0.290284677254462; - static const double C14 = 0.195090322016128; - static const double C15 = 0.098017140329561; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double step[16]; - double intermediate[16]; - double temp1, temp2; - - - // step 1 and 2 - step[ 0] = input[0] + input[8]; - step[ 1] = input[0] - input[8]; - - temp1 = input[4]*C12; - temp2 = input[12]*C4; - - temp1 -= temp2; - temp1 *= C8; - - step[ 2] = 2*(temp1); - - temp1 = input[4]*C4; - temp2 = input[12]*C12; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - step[ 3] = 2*(temp1); - - temp1 = input[2]*C8; - temp1 = 2*(temp1); - temp2 = input[6] + input[10]; - - step[ 4] = temp1 + temp2; - step[ 5] = temp1 - temp2; - - temp1 = input[14]*C8; - temp1 = 2*(temp1); - temp2 = input[6] - input[10]; - - step[ 6] = temp2 - temp1; - step[ 7] = temp2 + temp1; - - // for odd input - temp1 = input[3]*C12; - temp2 = input[13]*C4; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - intermediate[ 8] = 2*(temp1); - - temp1 = input[3]*C4; - temp2 = input[13]*C12; - temp2 -= temp1; - temp2 = (temp2); - temp2 *= C8; - intermediate[ 9] = 2*(temp2); - - intermediate[10] = 2*(input[9]*C8); - intermediate[11] = input[15] - input[1]; - intermediate[12] = input[15] + input[1]; - intermediate[13] = 2*((input[7]*C8)); - - temp1 = input[11]*C12; - temp2 = input[5]*C4; - temp2 -= temp1; - temp2 = (temp2); - temp2 *= C8; - intermediate[14] = 2*(temp2); - - temp1 = input[11]*C4; - temp2 = input[5]*C12; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - intermediate[15] = 2*(temp1); - - step[ 8] = intermediate[ 8] + intermediate[14]; - step[ 9] = intermediate[ 9] + intermediate[15]; - step[10] = intermediate[10] + intermediate[11]; - step[11] = intermediate[10] - intermediate[11]; - step[12] = intermediate[12] + intermediate[13]; - step[13] = intermediate[12] - intermediate[13]; - step[14] = intermediate[ 8] - intermediate[14]; - step[15] = intermediate[ 9] - intermediate[15]; - - // step 3 - output[0] = step[ 0] + step[ 3]; - output[1] = step[ 1] + step[ 2]; - output[2] = step[ 1] - step[ 2]; - output[3] = step[ 0] - step[ 3]; - - temp1 = step[ 4]*C14; - temp2 = step[ 7]*C2; - temp1 -= temp2; - output[4] = (temp1); - - temp1 = step[ 4]*C2; - temp2 = step[ 7]*C14; - temp1 += temp2; - output[7] = (temp1); - - temp1 = step[ 5]*C10; - temp2 = step[ 6]*C6; - temp1 -= temp2; - output[5] = (temp1); - - temp1 = step[ 5]*C6; - temp2 = step[ 6]*C10; - temp1 += temp2; - output[6] = (temp1); - - output[8] = step[ 8] + step[11]; - output[9] = step[ 9] + step[10]; - output[10] = step[ 9] - step[10]; - output[11] = step[ 8] - step[11]; - output[12] = step[12] + step[15]; - output[13] = step[13] + step[14]; - output[14] = step[13] - step[14]; - output[15] = step[12] - step[15]; - - // output 4 - step[ 0] = output[0] + output[7]; - step[ 1] = output[1] + output[6]; - step[ 2] = output[2] + output[5]; - step[ 3] = output[3] + output[4]; - step[ 4] = output[3] - output[4]; - step[ 5] = output[2] - output[5]; - step[ 6] = output[1] - output[6]; - step[ 7] = output[0] - output[7]; - - temp1 = output[8]*C7; - temp2 = output[15]*C9; - temp1 -= temp2; - step[ 8] = (temp1); - - temp1 = output[9]*C11; - temp2 = output[14]*C5; - temp1 += temp2; - step[ 9] = (temp1); - - temp1 = output[10]*C3; - temp2 = output[13]*C13; - temp1 -= temp2; - step[10] = (temp1); - - temp1 = output[11]*C15; - temp2 = output[12]*C1; - temp1 += temp2; - step[11] = (temp1); - - temp1 = output[11]*C1; - temp2 = output[12]*C15; - temp2 -= temp1; - step[12] = (temp2); - - temp1 = output[10]*C13; - temp2 = output[13]*C3; - temp1 += temp2; - step[13] = (temp1); - - temp1 = output[9]*C5; - temp2 = output[14]*C11; - temp2 -= temp1; - step[14] = (temp2); - - temp1 = output[8]*C9; - temp2 = output[15]*C7; - temp1 += temp2; - step[15] = (temp1); - - // step 5 - output[0] = (step[0] + step[15]); - output[1] = (step[1] + step[14]); - output[2] = (step[2] + step[13]); - output[3] = (step[3] + step[12]); - output[4] = (step[4] + step[11]); - output[5] = (step[5] + step[10]); - output[6] = (step[6] + step[ 9]); - output[7] = (step[7] + step[ 8]); - - output[15] = (step[0] - step[15]); - output[14] = (step[1] - step[14]); - output[13] = (step[2] - step[13]); - output[12] = (step[3] - step[12]); - output[11] = (step[4] - step[11]); - output[10] = (step[5] - step[10]); - output[9] = (step[6] - step[ 9]); - output[8] = (step[7] - step[ 8]); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch, - int scale) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double out[16*16], out2[16*16]; - const int short_pitch = pitch >> 1; - int i, j; - // First transform rows - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = input[j + i*short_pitch]; - butterfly_16x16_idct_1d_f(temp_in, temp_out); - for (j = 0; j < 16; ++j) - out[j + i*16] = temp_out[j]; - } - // Then transform columns - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - butterfly_16x16_idct_1d_f(temp_in, temp_out); - for (j = 0; j < 16; ++j) - out2[j*16 + i] = temp_out[j]; - } - for (i = 0; i < 16*16; ++i) - output[i] = round(out2[i] / (128 >> scale)); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -static void idct8_1d_f(double *x) { - int i, j; - double t[8]; - static const double idctmat[64] = { - 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127, - 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064, - 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064, - -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098, - 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162, - -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127, - 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098, - 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162, - 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098, - 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162, - 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161, - -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127, - 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065, - -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098, - 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127, - 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064 - }; - for (i = 0; i < 8; ++i) { - t[i] = 0; - for (j = 0; j < 8; ++j) - t[i] += idctmat[i * 8 + j] * x[j]; - } - for (i = 0; i < 8; ++i) { - x[i] = t[i]; - } -} - -static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch, - int scale) { - double X[8 * 8], Y[8]; - int i, j; - int shortpitch = pitch >> 1; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - X[i * 8 + j] = (double)coefs[i * shortpitch + j]; - } - } - for (i = 0; i < 8; i++) - idct8_1d_f(X + 8 * i); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; ++j) - Y[j] = X[i + 8 * j]; - idct8_1d_f(Y); - for (j = 0; j < 8; ++j) - X[i + 8 * j] = Y[j]; - } - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale)); - } - } - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n)) - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 32x32 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[16 * 16]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[32 * 32]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); - } - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } -#if DWT_TYPE == 26 - dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); -#endif -} - -#elif DWTDCT_TYPE == DWTDCT16X16 - -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 32x32 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[16 * 16]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[32 * 32]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16, - sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16, - sizeof(*buffer2) * 16); - } -#if DWT_TYPE == 26 - dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); -#endif -} - -#elif DWTDCT_TYPE == DWTDCT8X8 - -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 32x32 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[8 * 8]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[32 * 32]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct8x8_c_f(input, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8); - } - vp9_short_idct8x8_c_f(input + 8, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8); - } - vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8, - sizeof(*buffer2) * 8); - } - vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch, - 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) { - vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8, - sizeof(*buffer2) * 8); - } - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 32 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); - } - } -#if DWT_TYPE == 26 - dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32); -#endif -} - -#endif - -#if CONFIG_TX64X64 -void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { - // assume output is a 64x64 buffer - // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - int16_t buffer[16 * 16]; - // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - int16_t buffer2[64 * 64]; - // Note: pitch is in bytes, short_pitch is in short units - const int short_pitch = pitch >> 1; - int i, j; - - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16); - } -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - for (i = 0; i < 16; ++i) { - for (j = 16; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 16; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } -#elif DWTDCT_TYPE == DWTDCT16X16 - vp9_short_idct16x16_c_f(input + 16, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16, - sizeof(*buffer2) * 16); - } - vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch, - 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16, - sizeof(*buffer2) * 16); - } - - // Copying and scaling highest bands into buffer2 - for (i = 0; i < 32; ++i) { - for (j = 32; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 32; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - buffer2[i * 64 + j] = - multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); - } - } -#endif // DWTDCT_TYPE - -#if DWT_TYPE == 26 - dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64); -#elif DWT_TYPE == 97 - dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64); -#elif DWT_TYPE == 53 - dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64); -#endif -} -#endif // CONFIG_TX64X64 -#endif // !CONFIG_DWTDCTHYBRID diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index e7cfe207b..cb9a3db63 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -32,9 +32,9 @@ static void recon_dcblock_8x8(MACROBLOCKD *xd) { void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) { BLOCKD *b = &xd->block[block]; if (b->eob <= 1) - xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch); + xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch); else - xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch); + xd->inv_txm4x4(b->dqcoeff, b->diff, pitch); } void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { @@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { if (has_2nd_order) { /* do 2nd order transform on the dc block */ - vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff); + xd->inv_2ndtxm4x4(blockd[24].dqcoeff, blockd[24].diff); recon_dcblock(xd); } diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 0b7d98a58..d93b7d5fb 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -177,6 +177,7 @@ extern "C" int arnr_type; int tile_columns; + int tile_rows; struct vpx_fixed_buf two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 5e57228b4..6295514ea 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -279,8 +279,10 @@ typedef struct VP9Common { int error_resilient_mode; int frame_parallel_decoding_mode; - int tile_columns; - int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_idx; + int tile_columns, log2_tile_columns; + int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_col_idx; + int tile_rows, log2_tile_rows; + int cur_tile_mb_row_start, cur_tile_mb_row_end, cur_tile_row_idx; } VP9_COMMON; static int get_free_fb(VP9_COMMON *cm) { diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index d4435d872..b75525e2c 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -154,7 +154,7 @@ void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, int_mv mv; ptr_base = *(d->base_pre); - mv.as_int = d->bmi.as_mv.first.as_int; + mv.as_int = d->bmi.as_mv[0].as_int; ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + (mv.as_mv.col >> 3); @@ -179,7 +179,7 @@ void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch, int_mv mv; ptr_base = *(d->base_second_pre); - mv.as_int = d->bmi.as_mv.second.as_int; + mv.as_int = d->bmi.as_mv[1].as_int; ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + (mv.as_mv.col >> 3); @@ -197,7 +197,7 @@ void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { int_mv mv; ptr_base = *(d->base_pre); - mv.as_int = d->bmi.as_mv.first.as_int; + mv.as_int = d->bmi.as_mv[0].as_int; ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + (mv.as_mv.col >> 3); @@ -222,7 +222,7 @@ void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd, int_mv mv; ptr_base = *(d->base_second_pre); - mv.as_int = d->bmi.as_mv.second.as_int; + mv.as_int = d->bmi.as_mv[1].as_int; ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + (mv.as_mv.col >> 3); @@ -240,7 +240,7 @@ static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { int_mv mv; ptr_base = *(d->base_pre); - mv.as_int = d->bmi.as_mv.first.as_int; + mv.as_int = d->bmi.as_mv[0].as_int; ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride + (mv.as_mv.col >> 3); @@ -264,38 +264,38 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { int voffset = 20 + i * 2 + j; int temp; - temp = blockd[yoffset ].bmi.as_mv.first.as_mv.row - + blockd[yoffset + 1].bmi.as_mv.first.as_mv.row - + blockd[yoffset + 4].bmi.as_mv.first.as_mv.row - + blockd[yoffset + 5].bmi.as_mv.first.as_mv.row; + temp = blockd[yoffset ].bmi.as_mv[0].as_mv.row + + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row + + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row + + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row; if (temp < 0) temp -= 4; else temp += 4; - xd->block[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) & + xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) & xd->fullpixel_mask; - temp = blockd[yoffset ].bmi.as_mv.first.as_mv.col - + blockd[yoffset + 1].bmi.as_mv.first.as_mv.col - + blockd[yoffset + 4].bmi.as_mv.first.as_mv.col - + blockd[yoffset + 5].bmi.as_mv.first.as_mv.col; + temp = blockd[yoffset ].bmi.as_mv[0].as_mv.col + + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col + + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col + + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col; if (temp < 0) temp -= 4; else temp += 4; - blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) & + blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) & xd->fullpixel_mask; - blockd[voffset].bmi.as_mv.first.as_mv.row = - blockd[uoffset].bmi.as_mv.first.as_mv.row; - blockd[voffset].bmi.as_mv.first.as_mv.col = - blockd[uoffset].bmi.as_mv.first.as_mv.col; + blockd[voffset].bmi.as_mv[0].as_mv.row = + blockd[uoffset].bmi.as_mv[0].as_mv.row; + blockd[voffset].bmi.as_mv[0].as_mv.col = + blockd[uoffset].bmi.as_mv[0].as_mv.col; if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - temp = blockd[yoffset ].bmi.as_mv.second.as_mv.row - + blockd[yoffset + 1].bmi.as_mv.second.as_mv.row - + blockd[yoffset + 4].bmi.as_mv.second.as_mv.row - + blockd[yoffset + 5].bmi.as_mv.second.as_mv.row; + temp = blockd[yoffset ].bmi.as_mv[1].as_mv.row + + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row + + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row + + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row; if (temp < 0) { temp -= 4; @@ -303,13 +303,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { temp += 4; } - blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) & + blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) & xd->fullpixel_mask; - temp = blockd[yoffset ].bmi.as_mv.second.as_mv.col - + blockd[yoffset + 1].bmi.as_mv.second.as_mv.col - + blockd[yoffset + 4].bmi.as_mv.second.as_mv.col - + blockd[yoffset + 5].bmi.as_mv.second.as_mv.col; + temp = blockd[yoffset ].bmi.as_mv[1].as_mv.col + + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col + + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col + + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col; if (temp < 0) { temp -= 4; @@ -317,13 +317,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { temp += 4; } - blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) & + blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) & xd->fullpixel_mask; - blockd[voffset].bmi.as_mv.second.as_mv.row = - blockd[uoffset].bmi.as_mv.second.as_mv.row; - blockd[voffset].bmi.as_mv.second.as_mv.col = - blockd[uoffset].bmi.as_mv.second.as_mv.col; + blockd[voffset].bmi.as_mv[1].as_mv.row = + blockd[uoffset].bmi.as_mv[1].as_mv.row; + blockd[voffset].bmi.as_mv[1].as_mv.col = + blockd[uoffset].bmi.as_mv[1].as_mv.col; } } } @@ -332,7 +332,7 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; - if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int) + if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int) build_inter_predictors2b(xd, d0, 8); else { vp9_build_inter_predictors_b(d0, 8, &xd->subpix); @@ -717,15 +717,15 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { blockd[10].bmi = xd->mode_info_context->bmi[10]; if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.first.as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.first.as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.first.as_mv, xd); - clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.first.as_mv, xd); + clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[0].as_mv, xd); + clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[0].as_mv, xd); + clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[0].as_mv, xd); + clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[0].as_mv, xd); if (mbmi->second_ref_frame > 0) { - clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.second.as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.second.as_mv, xd); - clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.second.as_mv, xd); - clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.second.as_mv, xd); + clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[1].as_mv, xd); + clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[1].as_mv, xd); + clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[1].as_mv, xd); + clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[1].as_mv, xd); } } @@ -750,15 +750,15 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.first.as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.first.as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[0].as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[0].as_mv, xd); if (mbmi->second_ref_frame > 0) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.second.as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.second.as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[1].as_mv, xd); + clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[1].as_mv, xd); } } - if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int) + if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int) build_inter_predictors2b(xd, d0, 16); else { vp9_build_inter_predictors_b(d0, 16, &xd->subpix); @@ -776,7 +776,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; - if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int) + if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int) build_inter_predictors2b(xd, d0, 8); else { vp9_build_inter_predictors_b(d0, 8, &xd->subpix); @@ -803,44 +803,44 @@ void build_4x4uvmvs(MACROBLOCKD *xd) { int temp; - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.row - + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.row - + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.row - + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.row; + temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.row + + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.row + + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.row + + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.row; if (temp < 0) temp -= 4; else temp += 4; - blockd[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) & + blockd[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) & xd->fullpixel_mask; - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.col - + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.col - + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.col - + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.col; + temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.col + + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.col + + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.col + + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.col; if (temp < 0) temp -= 4; else temp += 4; - blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) & + blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) & xd->fullpixel_mask; // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd); + clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd); // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd); + clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd); - blockd[voffset].bmi.as_mv.first.as_mv.row = - blockd[uoffset].bmi.as_mv.first.as_mv.row; - blockd[voffset].bmi.as_mv.first.as_mv.col = - blockd[uoffset].bmi.as_mv.first.as_mv.col; + blockd[voffset].bmi.as_mv[0].as_mv.row = + blockd[uoffset].bmi.as_mv[0].as_mv.row; + blockd[voffset].bmi.as_mv[0].as_mv.col = + blockd[uoffset].bmi.as_mv[0].as_mv.col; if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.row - + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.row - + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.row - + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.row; + temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.row + + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.row + + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.row + + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.row; if (temp < 0) { temp -= 4; @@ -848,13 +848,13 @@ void build_4x4uvmvs(MACROBLOCKD *xd) { temp += 4; } - blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) & + blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) & xd->fullpixel_mask; - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.col - + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.col - + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.col - + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.col; + temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.col + + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.col + + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.col + + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.col; if (temp < 0) { temp -= 4; @@ -862,21 +862,21 @@ void build_4x4uvmvs(MACROBLOCKD *xd) { temp += 4; } - blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) & + blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) & xd->fullpixel_mask; // if (mbmi->need_to_clamp_mvs) clamp_uvmv_to_umv_border( - &blockd[uoffset].bmi.as_mv.second.as_mv, xd); + &blockd[uoffset].bmi.as_mv[1].as_mv, xd); // if (mbmi->need_to_clamp_mvs) clamp_uvmv_to_umv_border( - &blockd[uoffset].bmi.as_mv.second.as_mv, xd); + &blockd[uoffset].bmi.as_mv[1].as_mv, xd); - blockd[voffset].bmi.as_mv.second.as_mv.row = - blockd[uoffset].bmi.as_mv.second.as_mv.row; - blockd[voffset].bmi.as_mv.second.as_mv.col = - blockd[uoffset].bmi.as_mv.second.as_mv.col; + blockd[voffset].bmi.as_mv[1].as_mv.row = + blockd[uoffset].bmi.as_mv[1].as_mv.row; + blockd[voffset].bmi.as_mv[1].as_mv.col = + blockd[uoffset].bmi.as_mv[1].as_mv.col; } } } diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 066989272..3bd1f250f 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -329,10 +329,15 @@ specialize vp9_dc_only_idct_add if [ "$CONFIG_LOSSLESS" = "yes" ]; then prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_inv_walsh4x4_1_x8 prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_inv_walsh4x4_x8 prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" +specialize vp9_dc_only_inv_walsh_add prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out" +specialize vp9_short_inv_walsh4x4_1_lossless prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out" +specialize vp9_short_inv_walsh4x4_lossless fi prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad" diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c new file mode 100644 index 000000000..29f89b618 --- /dev/null +++ b/vp9/common/vp9_tile_common.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_tile_common.h" + +static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off, + int *max_tile_off, int tile_idx, + int log2_n_tiles, int n_mbs) { + const int n_sbs = (n_mbs + 3) >> 2; + const int sb_off1 = (tile_idx * n_sbs) >> log2_n_tiles; + const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles; + + *min_tile_off = (sb_off1 << 2) > n_mbs ? n_mbs : (sb_off1 << 2); + *max_tile_off = (sb_off2 << 2) > n_mbs ? n_mbs : (sb_off2 << 2); +} + +void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx) { + cm->cur_tile_col_idx = tile_col_idx; + vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start, + &cm->cur_tile_mb_col_end, tile_col_idx, + cm->log2_tile_columns, cm->mb_cols); +} + +void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) { + cm->cur_tile_row_idx = tile_row_idx; + vp9_get_tile_offsets(cm, &cm->cur_tile_mb_row_start, + &cm->cur_tile_mb_row_end, tile_row_idx, + cm->log2_tile_rows, cm->mb_rows); +} + +#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6) +#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6) + +void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr, + int *delta_log2_n_tiles) { + const int sb_cols = (cm->mb_cols + 3) >> 2; + int min_log2_n_tiles, max_log2_n_tiles; + + for (max_log2_n_tiles = 0; + (sb_cols >> max_log2_n_tiles) >= MIN_TILE_WIDTH_SBS; + max_log2_n_tiles++) {} + for (min_log2_n_tiles = 0; + (MAX_TILE_WIDTH_SBS << min_log2_n_tiles) < sb_cols; + min_log2_n_tiles++) {} + + *min_log2_n_tiles_ptr = min_log2_n_tiles; + *delta_log2_n_tiles = max_log2_n_tiles - min_log2_n_tiles; +} diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h new file mode 100644 index 000000000..92bf50897 --- /dev/null +++ b/vp9/common/vp9_tile_common.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_TILE_COMMON_H_ +#define VP9_COMMON_VP9_TILE_COMMON_H_ + +#include "vp9/common/vp9_onyxc_int.h" + +#define MIN_TILE_WIDTH 256 +#define MAX_TILE_WIDTH 4096 + +extern void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx); + +extern void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx); + +extern void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles, + int *delta_log2_n_tiles); + +#endif // VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 3e2346f29..fbc95b6ce 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -65,6 +65,20 @@ void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr, unsigned int output_height, const short *filter); +void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -87,6 +101,14 @@ void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, dst += 8; w -= 8; } + while (w >= 4) { + vp9_filter_block1d4_h8_ssse3(src, src_stride, + dst, dst_stride, + h, filter_x); + src += 4; + dst += 4; + w -= 4; + } } if (w) { vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, @@ -117,6 +139,14 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride, dst += 8; w -= 8; } + while (w >= 4) { + vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride, + dst, dst_stride, + h, filter_y); + src += 4; + dst += 4; + w -= 4; + } } if (w) { vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, @@ -156,6 +186,15 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, h, filter_y); return; } + if (w == 4) { + vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, + fdata2, 16, + h + 7, filter_x); + vp9_filter_block1d4_v8_ssse3(fdata2, 16, + dst, dst_stride, + h, filter_y); + return; + } } vp9_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index c6d65e904..5f039454a 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -30,6 +30,124 @@ ; unsigned int output_height, ; short *filter ;) +global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE +sym(vp9_filter_block1d4_v8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm4, [rdx] ;load filters + movd xmm5, rcx + packsswb xmm4, xmm4 + pshuflw xmm0, xmm4, 0b ;k0_k1 + pshuflw xmm1, xmm4, 01010101b ;k2_k3 + pshuflw xmm2, xmm4, 10101010b ;k4_k5 + pshuflw xmm3, xmm4, 11111111b ;k6_k7 + + punpcklqdq xmm0, xmm0 + punpcklqdq xmm1, xmm1 + punpcklqdq xmm2, xmm2 + punpcklqdq xmm3, xmm3 + + movdqa k0k1, xmm0 + movdqa k2k3, xmm1 + pshufd xmm5, xmm5, 0 + movdqa k4k5, xmm2 + movdqa k6k7, xmm3 + movdqa krd, xmm5 + + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ;out_pitch +%endif + mov rax, rsi + movsxd rcx, DWORD PTR arg(4) ;output_height + add rax, rdx + + lea rbx, [rdx + rdx*4] + add rbx, rdx ;pitch * 6 + +.vp9_filter_block1d4_v8_ssse3_loop: + movd xmm0, [rsi] ;A + movd xmm1, [rsi + rdx] ;B + movd xmm2, [rsi + rdx * 2] ;C + movd xmm3, [rax + rdx * 2] ;D + movd xmm4, [rsi + rdx * 4] ;E + movd xmm5, [rax + rdx * 4] ;F + + punpcklbw xmm0, xmm1 ;A B + punpcklbw xmm2, xmm3 ;C D + punpcklbw xmm4, xmm5 ;E F + + movd xmm6, [rsi + rbx] ;G + movd xmm7, [rax + rbx] ;H + + pmaddubsw xmm0, k0k1 + pmaddubsw xmm2, k2k3 + punpcklbw xmm6, xmm7 ;G H + pmaddubsw xmm4, k4k5 + pmaddubsw xmm6, k6k7 + + paddsw xmm0, xmm2 + paddsw xmm0, krd + paddsw xmm4, xmm6 + paddsw xmm0, xmm4 + + psraw xmm0, 7 + packuswb xmm0, xmm0 + + add rsi, rdx + add rax, rdx + + movd [rdi], xmm0 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;out_pitch +%else + add rdi, r8 +%endif + dec rcx + jnz .vp9_filter_block1d4_v8_ssse3_loop + + add rsp, 16*5 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp9_filter_block1d8_v8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; short *filter +;) global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE sym(vp9_filter_block1d8_v8_ssse3): push rbp @@ -289,6 +407,110 @@ sym(vp9_filter_block1d16_v8_ssse3): pop rbp ret +;void vp9_filter_block1d4_h8_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE +sym(vp9_filter_block1d4_h8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16*5 + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + %define krd [rsp + 16*4] + + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm4, [rdx] ;load filters + movd xmm5, rcx + packsswb xmm4, xmm4 + pshuflw xmm0, xmm4, 0b ;k0_k1 + pshuflw xmm1, xmm4, 01010101b ;k2_k3 + pshuflw xmm2, xmm4, 10101010b ;k4_k5 + pshuflw xmm3, xmm4, 11111111b ;k6_k7 + + punpcklqdq xmm0, xmm0 + punpcklqdq xmm1, xmm1 + punpcklqdq xmm2, xmm2 + punpcklqdq xmm3, xmm3 + + movdqa k0k1, xmm0 + movdqa k2k3, xmm1 + pshufd xmm5, xmm5, 0 + movdqa k4k5, xmm2 + movdqa k6k7, xmm3 + movdqa krd, xmm5 + + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rdx, dword ptr arg(3) ;output_pitch + movsxd rcx, dword ptr arg(4) ;output_height + +.filter_block1d4_h8_rowloop_ssse3: + movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 + + movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 + punpcklqdq xmm0, xmm3 + + movdqa xmm1, xmm0 + pshufb xmm0, [GLOBAL(shuf_t0t1)] + pmaddubsw xmm0, k0k1 + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf_t2t3)] + pmaddubsw xmm1, k2k3 + + movdqa xmm4, xmm2 + pshufb xmm2, [GLOBAL(shuf_t4t5)] + pmaddubsw xmm2, k4k5 + + pshufb xmm4, [GLOBAL(shuf_t6t7)] + pmaddubsw xmm4, k6k7 + + paddsw xmm0, xmm1 + paddsw xmm0, xmm4 + paddsw xmm0, xmm2 + paddsw xmm0, krd + psraw xmm0, 7 + packuswb xmm0, xmm0 + + lea rsi, [rsi + rax] + movd [rdi], xmm0 + + lea rdi, [rdi + rdx] + dec rcx + jnz .filter_block1d4_h8_rowloop_ssse3 + + add rsp, 16*5 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + ;void vp9_filter_block1d8_h8_ssse3 ;( ; unsigned char *src_ptr, @@ -340,7 +562,7 @@ sym(vp9_filter_block1d8_h8_ssse3): pshufd xmm5, xmm5, 0 movdqa k4k5, xmm2 movdqa k6k7, xmm3 -; movdqa krd, xmm5 + movdqa krd, xmm5 movsxd rax, dword ptr arg(1) ;src_pixels_per_line movsxd rdx, dword ptr arg(3) ;output_pitch @@ -349,10 +571,7 @@ sym(vp9_filter_block1d8_h8_ssse3): .filter_block1d8_h8_rowloop_ssse3: movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 -; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 -;note: if we create a k0_k7 filter, we can save a pshufb -; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11 punpcklqdq xmm0, xmm3 movdqa xmm1, xmm0 @@ -371,9 +590,9 @@ sym(vp9_filter_block1d8_h8_ssse3): pmaddubsw xmm4, k6k7 paddsw xmm0, xmm1 - paddsw xmm0, xmm2 - paddsw xmm0, xmm5 paddsw xmm0, xmm4 + paddsw xmm0, xmm2 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -456,10 +675,7 @@ sym(vp9_filter_block1d16_h8_ssse3): .filter_block1d16_h8_rowloop_ssse3: movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 -; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 -;note: if we create a k0_k7 filter, we can save a pshufb -; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11 punpcklqdq xmm0, xmm3 movdqa xmm1, xmm0 @@ -486,10 +702,7 @@ sym(vp9_filter_block1d16_h8_ssse3): movq xmm3, [rsi + 5] -; movq xmm7, [rsi + 12] movq xmm7, [rsi + 13] -;note: same as above -; punpcklbw xmm3, xmm7 punpcklqdq xmm3, xmm7 movdqa xmm1, xmm3 @@ -508,9 +721,9 @@ sym(vp9_filter_block1d16_h8_ssse3): pmaddubsw xmm4, k6k7 paddsw xmm3, xmm1 + paddsw xmm3, xmm4 paddsw xmm3, xmm2 paddsw xmm3, krd - paddsw xmm3, xmm4 psraw xmm3, 7 packuswb xmm3, xmm3 punpcklqdq xmm0, xmm3 diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 5d6a4a717..316bda33b 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -1041,9 +1041,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]]; do { - mi->bmi[ *fill_offset].as_mv.first.as_int = blockmv.as_int; + mi->bmi[ *fill_offset].as_mv[0].as_int = blockmv.as_int; if (mbmi->second_ref_frame > 0) - mi->bmi[ *fill_offset].as_mv.second.as_int = secondmv.as_int; + mi->bmi[ *fill_offset].as_mv[1].as_int = secondmv.as_int; fill_offset++; } while (--fill_count); } @@ -1051,8 +1051,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } while (++j < num_p); } - mv->as_int = mi->bmi[15].as_mv.first.as_int; - mbmi->mv[1].as_int = mi->bmi[15].as_mv.second.as_int; + mv->as_int = mi->bmi[15].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[15].as_mv[1].as_int; break; /* done with SPLITMV */ diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 9f4db6bf7..facd761f0 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -31,6 +31,7 @@ #include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_tile_common.h" #include "vp9_rtcd.h" #include <assert.h> @@ -123,38 +124,30 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->block[i].dequant = pc->Y1dequant[QIndex]; } + xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1; + xd->inv_txm4x4 = vp9_short_idct4x4llm; + xd->inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1; + xd->inv_2ndtxm4x4 = vp9_short_inv_walsh4x4; + xd->itxm_add = vp9_dequant_idct_add; + xd->dc_only_itxm_add = vp9_dc_only_idct_add_c; + xd->dc_itxm_add = vp9_dequant_dc_idct_add; + xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block; + xd->itxm_add_y_block = vp9_dequant_idct_add_y_block; + xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; #if CONFIG_LOSSLESS - if (!QIndex) { - pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8; - pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless; - pbi->idct_add = vp9_dequant_idct_add_lossless_c; - pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; - } else { - pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; + if (xd->lossless) { + assert(QIndex == 0); + xd->inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; + xd->inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + xd->inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless; + xd->inv_2ndtxm4x4 = vp9_short_inv_walsh4x4_lossless; + xd->itxm_add = vp9_dequant_idct_add_lossless_c; + xd->dc_only_itxm_add = vp9_dc_only_inv_walsh_add_c; + xd->dc_itxm_add = vp9_dequant_dc_idct_add_lossless_c; + xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c; + xd->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; + xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; } -#else - pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; - pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; #endif for (i = 16; i < 24; i++) { @@ -345,15 +338,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, int i8x8mode = b->bmi.as_mode.first; b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } else { @@ -400,17 +393,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); } } else if (mode == B_PRED) { @@ -434,8 +427,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } if (!xd->mode_info_context->mbmi.mb_skip_coeff) { @@ -444,7 +437,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context->y2 = 0; xd->left_context->y2 = 0; vp9_build_intra_predictors_mbuv(xd); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -453,13 +446,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs + 16); } else if (mode == SPLITMV || get_2nd_order_usage(xd) == 0) { assert(get_2nd_order_usage(xd) == 0); - pbi->idct_add_y_block(xd->qcoeff, + xd->itxm_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -496,8 +489,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } } else { @@ -505,7 +498,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, assert(get_2nd_order_usage(xd) == 1); vp9_dequantize_b(b); if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); + xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; @@ -515,11 +508,11 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); + xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; } vp9_dequantize_b(b); - pbi->dc_idct_add_y_block(xd->qcoeff, + xd->dc_itxm_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, @@ -527,7 +520,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs, xd->block[24].diff); } - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -645,7 +638,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 3) * 4, xd->dst.y_stride, xd->dst.y_stride, b->eob); } else { - vp9_dequant_idct_add_c( + xd->itxm_add( b->qcoeff, b->dequant, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, @@ -657,7 +650,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, } else if (get_2nd_order_usage(xd) == 1) { vp9_dequantize_b(b); if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); + xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; @@ -667,7 +660,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); + xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; } vp9_dequant_dc_idct_add_y_block_4x4_inplace_c( @@ -1534,17 +1527,24 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->sb64_coded = vp9_read_literal(&header_bc, 8); pc->sb32_coded = vp9_read_literal(&header_bc, 8); - - /* Read the loop filter level and type */ - pc->txfm_mode = vp9_read_literal(&header_bc, 2); - if (pc->txfm_mode == 3) - pc->txfm_mode += vp9_read_bit(&header_bc); - if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); - pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); - pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); +#if CONFIG_LOSSLESS + xd->lossless = vp9_read_bit(&header_bc); + if (xd->lossless) { + pc->txfm_mode = ONLY_4X4; + } + else +#endif + { + /* Read the loop filter level and type */ + pc->txfm_mode = vp9_read_literal(&header_bc, 2); + if (pc->txfm_mode == 3) + pc->txfm_mode += vp9_read_bit(&header_bc); + if (pc->txfm_mode == TX_MODE_SELECT) { + pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); + pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); + pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); + } } - pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc); pc->filter_level = vp9_read_literal(&header_bc, 6); pc->sharpness_level = vp9_read_literal(&header_bc, 3); @@ -1775,78 +1775,91 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { /* tile info */ { - int log2_tile_cols; const unsigned char *data_ptr = data + first_partition_length_in_bytes; - int tile, mb_start, mb_end; + int tile_row, tile_col, delta_log2_tiles; - log2_tile_cols = vp9_read_bit(&header_bc); - if (log2_tile_cols) { - log2_tile_cols += vp9_read_bit(&header_bc); + vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles); + while (delta_log2_tiles--) { + if (vp9_read_bit(&header_bc)) { + pc->log2_tile_columns++; + } else { + break; + } } - pc->tile_columns = 1 << log2_tile_cols; + pc->log2_tile_rows = vp9_read_bit(&header_bc); + if (pc->log2_tile_rows) + pc->log2_tile_rows += vp9_read_bit(&header_bc); + pc->tile_columns = 1 << pc->log2_tile_columns; + pc->tile_rows = 1 << pc->log2_tile_rows; vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); if (pbi->oxcf.inv_tile_order) { - const unsigned char *data_ptr2[4]; + const int n_cols = pc->tile_columns; + const unsigned char *data_ptr2[4][1 << 6]; BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak); - data_ptr2[0] = data_ptr; - for (tile = 1; tile < pc->tile_columns; tile++) { - int size = data_ptr2[tile - 1][0] + (data_ptr2[tile - 1][1] << 8) + - (data_ptr2[tile - 1][2] << 16) + (data_ptr2[tile - 1][3] << 24); - data_ptr2[tile - 1] += 4; - data_ptr2[tile] = data_ptr2[tile - 1] + size; + // pre-initialize the offsets, we're going to read in inverse order + data_ptr2[0][0] = data_ptr; + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + if (tile_row) { + int size = data_ptr2[tile_row - 1][n_cols - 1][0] + + (data_ptr2[tile_row - 1][n_cols - 1][1] << 8) + + (data_ptr2[tile_row - 1][n_cols - 1][2] << 16) + + (data_ptr2[tile_row - 1][n_cols - 1][3] << 24); + data_ptr2[tile_row - 1][n_cols - 1] += 4; + data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size; + } + + for (tile_col = 1; tile_col < n_cols; tile_col++) { + int size = data_ptr2[tile_row][tile_col - 1][0] + + (data_ptr2[tile_row][tile_col - 1][1] << 8) + + (data_ptr2[tile_row][tile_col - 1][2] << 16) + + (data_ptr2[tile_row][tile_col - 1][3] << 24); + data_ptr2[tile_row][tile_col - 1] += 4; + data_ptr2[tile_row][tile_col] = + data_ptr2[tile_row][tile_col - 1] + size; + } } - for (mb_end = pc->mb_cols, tile = pc->tile_columns - 1; - tile >= 0; tile--) { - // calculate end of tile column - const int sb_cols = (pc->mb_cols + 3) >> 2; - const int sb_start = (sb_cols * tile) >> log2_tile_cols; - mb_start = ((sb_start << 2) > pc->mb_cols) ? - pc->mb_cols : (sb_start << 2); - - pc->cur_tile_idx = tile; - pc->cur_tile_mb_col_start = mb_start; - pc->cur_tile_mb_col_end = mb_end; - - setup_token_decoder(pbi, data_ptr2[tile], &residual_bc); - - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) { + vp9_get_tile_col_offsets(pc, tile_col); + setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], &residual_bc); + + /* Decode a row of superblocks */ + for (mb_row = pc->cur_tile_mb_row_start; + mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { + decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + } + if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1) + bc_bak = residual_bc; } - mb_end = mb_start; - if (tile == pc->tile_columns - 1) - bc_bak = residual_bc; } residual_bc = bc_bak; } else { - for (mb_start = 0, tile = 0; tile < pc->tile_columns; tile++) { - // calculate end of tile column - const int sb_cols = (pc->mb_cols + 3) >> 2; - const int sb_end = (sb_cols * (tile + 1)) >> log2_tile_cols; - mb_end = ((sb_end << 2) > pc->mb_cols) ? pc->mb_cols : (sb_end << 2); - - pc->cur_tile_idx = tile; - pc->cur_tile_mb_col_start = mb_start; - pc->cur_tile_mb_col_end = mb_end; - - if (tile < pc->tile_columns - 1) - setup_token_decoder(pbi, data_ptr + 4, &residual_bc); - else - setup_token_decoder(pbi, data_ptr, &residual_bc); - - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); - } - mb_start = mb_end; - if (tile < pc->tile_columns - 1) { - int size = data_ptr[0] + (data_ptr[1] << 8) + (data_ptr[2] << 16) + - (data_ptr[3] << 24); - data_ptr += 4 + size; + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) { + vp9_get_tile_col_offsets(pc, tile_col); + + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) + setup_token_decoder(pbi, data_ptr + 4, &residual_bc); + else + setup_token_decoder(pbi, data_ptr, &residual_bc); + + /* Decode a row of superblocks */ + for (mb_row = pc->cur_tile_mb_row_start; + mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { + decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); + } + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { + int size = data_ptr[0] + (data_ptr[1] << 8) + (data_ptr[2] << 16) + + (data_ptr[3] << 24); + data_ptr += 4 + size; + } } } } diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 1f64767fa..92a9df84c 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -357,21 +357,17 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, if (eob) { input[0] = input[0] * dq[0] / 2; -#if !CONFIG_DWTDCTHYBRID if (eob == 1) { vp9_short_idct1_32x32_c(input, output); add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32); input[0] = 0; } else { -#endif for (i = 1; i < 1024; i++) input[i] = input[i] * dq[1] / 2; vp9_short_idct32x32_c(input, output, 64); vpx_memset(input, 0, 2048); add_residual(output, pred, pitch, dest, stride, 32, 32); -#if !CONFIG_DWTDCTHYBRID } -#endif } } diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 2edbd6a3a..b7efb44f1 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -42,20 +42,6 @@ extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t * uint16_t *eobs); #endif -typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride); -typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); - -typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs, - const int16_t *dc); -typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs); -typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - uint16_t *eobs); - void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, uint16_t eobs); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index acf69d906..bfdb486b8 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -63,24 +63,11 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { return decode_bool(br, 128) ? -value_to_sign : value_to_sign; } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#define INCREMENT_COUNT(token) \ - do { \ - coef_counts[type][coef_bands[c]][pn][token]++; \ - pn = pt = vp9_prev_token_class[token]; \ - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(coef_bands[c + 1])) \ - pn = vp9_get_coef_neighbor_context( \ - qcoeff_ptr, nodc, neighbors, scan[c + 1]); \ - } while (0) -#else -#define PT pt #define INCREMENT_COUNT(token) \ do { \ - coef_counts[type][coef_bands[c]][pt][token]++; \ - pt = vp9_prev_token_class[token]; \ + coef_counts[type][coef_bands[c]][pt][token]++; \ + pt = vp9_get_coef_context(&recent_energy, token); \ } while (0) -#endif /* CONFIG_NEWCOEFCONTEXT */ #define WRITE_COEF_CONTINUE(val, token) \ { \ @@ -108,10 +95,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, const int lidx = vp9_block2left[txfm_size][block_idx]; ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0; FRAME_CONTEXT *const fc = &dx->common.fc; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif + int recent_energy = 0; int nodc = (type == PLANE_TYPE_Y_NO_DC); int pt, c = nodc; vp9_coeff_probs *coef_probs; @@ -192,15 +176,11 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, } VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec); -#if CONFIG_NEWCOEFCONTEXT - pn = pt; - neighbors = vp9_get_coef_neighbors_handle(scan); -#endif while (1) { int val; const uint8_t *cat6 = cat6_prob; if (c >= seg_eob) break; - prob = coef_probs[type][coef_bands[c]][PT]; + prob = coef_probs[type][coef_bands[c]][pt]; if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; SKIP_START: @@ -208,7 +188,7 @@ SKIP_START: if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[type][coef_bands[c]][PT]; + prob = coef_probs[type][coef_bands[c]][pt]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ @@ -272,7 +252,7 @@ SKIP_START: } if (c < seg_eob) - coef_counts[type][coef_bands[c]][PT][DCT_EOB_TOKEN]++; + coef_counts[type][coef_bands[c]][pt][DCT_EOB_TOKEN]++; A0[aidx] = L0[lidx] = (c > !type); if (txfm_size >= TX_8X8 && type != PLANE_TYPE_Y2) { diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index b350e4d68..80b301931 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { if (*eobs++ > 1) - vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]); + xd->dc_itxm_add(q, dq, dst, dst, stride, stride, dc[0]); else - vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride); + xd->dc_only_itxm_add(dc[0], dst, dst, stride, stride); q += 16; dst += 4; @@ -168,9 +168,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride); + xd->itxm_add(q, dq, dstu, dstu, stride, stride); } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride); + xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride); ((int *)q)[0] = 0; } @@ -184,9 +184,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride); + xd->itxm_add(q, dq, dstv, dstv, stride, stride); } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride); + xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride); ((int *)q)[0] = 0; } diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index e04b9f5e4..0b0b90356 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -70,12 +70,6 @@ typedef struct VP9Decompressor { DETOK detoken; - vp9_dequant_idct_add_fn_t idct_add; - vp9_dequant_dc_idct_add_fn_t dc_idct_add; - vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; - vp9_dequant_idct_add_y_block_fn_t idct_add_y_block; - vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block; - int refresh_frame_flags; vp9_prob prob_skip_false; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a3c407865..257ddb2c5 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -14,6 +14,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_tile_common.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/common/vp9_systemdependent.h" #include <assert.h> @@ -1088,14 +1089,15 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, } static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, - TOKENEXTRA **tok) { + TOKENEXTRA **tok, TOKENEXTRA *tok_end) { VP9_COMMON *const c = &cpi->common; const int mis = c->mode_info_stride; - MODE_INFO *m, *m_ptr = c->mi + c->cur_tile_mb_col_start; + MODE_INFO *m, *m_ptr = c->mi; int i, mb_row, mb_col; - TOKENEXTRA *tok_end = *tok + cpi->tok_count; - for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) { + m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis; + for (mb_row = c->cur_tile_mb_row_start; + mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) { m = m_ptr; for (mb_col = c->cur_tile_mb_col_start; mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) { @@ -1667,7 +1669,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_write_literal(&header_bc, pc->sb64_coded, 8); pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); vp9_write_literal(&header_bc, pc->sb32_coded, 8); - +#if CONFIG_LOSSLESS + vp9_write_bit(&header_bc, cpi->oxcf.lossless); + if (cpi->oxcf.lossless) { + pc->txfm_mode = ONLY_4X4; + } + else +#endif { if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] + @@ -2026,9 +2034,22 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } /* tiling */ - vp9_write(&header_bc, pc->tile_columns > 1, 128); - if (pc->tile_columns > 1) { - vp9_write(&header_bc, pc->tile_columns > 2, 128); + { + int min_log2_tiles, delta_log2_tiles, n_tile_bits, n; + + vp9_get_tile_n_bits(pc, &min_log2_tiles, &delta_log2_tiles); + n_tile_bits = pc->log2_tile_columns - min_log2_tiles; + for (n = 0; n < delta_log2_tiles; n++) { + if (n_tile_bits--) { + vp9_write_bit(&header_bc, 1); + } else { + vp9_write_bit(&header_bc, 0); + break; + } + } + vp9_write_bit(&header_bc, pc->log2_tile_rows != 0); + if (pc->log2_tile_rows != 0) + vp9_write_bit(&header_bc, pc->log2_tile_rows != 1); } vp9_stop_encode(&header_bc); @@ -2058,41 +2079,45 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } { - int mb_start = 0, tile; - int total_size = 0; + int tile_row, tile_col, total_size = 0; unsigned char *data_ptr = cx_data + header_bc.pos; - TOKENEXTRA *tok = cpi->tok; - - for (tile = 0; tile < pc->tile_columns; tile++) { - // calculate end of tile column - const int sb_cols = (pc->mb_cols + 3) >> 2; - const int sb_end = (sb_cols * (tile + 1)) >> cpi->oxcf.tile_columns; - const int mb_end = ((sb_end << 2) > pc->mb_cols) ? - pc->mb_cols : (sb_end << 2); - - pc->cur_tile_idx = tile; - pc->cur_tile_mb_col_start = mb_start; - pc->cur_tile_mb_col_end = mb_end; - - if (tile < pc->tile_columns - 1) - vp9_start_encode(&residual_bc, data_ptr + total_size + 4); - else - vp9_start_encode(&residual_bc, data_ptr + total_size); - write_modes(cpi, &residual_bc, &tok); - vp9_stop_encode(&residual_bc); - if (tile < pc->tile_columns - 1) { - /* size of this tile */ - data_ptr[total_size + 0] = residual_bc.pos; - data_ptr[total_size + 1] = residual_bc.pos >> 8; - data_ptr[total_size + 2] = residual_bc.pos >> 16; - data_ptr[total_size + 3] = residual_bc.pos >> 24; - total_size += 4; - } + TOKENEXTRA *tok[1 << 6], *tok_end; + + tok[0] = cpi->tok; + for (tile_col = 1; tile_col < pc->tile_columns; tile_col++) + tok[tile_col] = tok[tile_col - 1] + cpi->tok_count[tile_col - 1]; + + for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(pc, tile_row); + tok_end = cpi->tok + cpi->tok_count[0]; + for (tile_col = 0; tile_col < pc->tile_columns; + tile_col++, tok_end += cpi->tok_count[tile_col]) { + vp9_get_tile_col_offsets(pc, tile_col); + + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) + vp9_start_encode(&residual_bc, data_ptr + total_size + 4); + else + vp9_start_encode(&residual_bc, data_ptr + total_size); + write_modes(cpi, &residual_bc, &tok[tile_col], tok_end); + vp9_stop_encode(&residual_bc); + if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { + /* size of this tile */ + data_ptr[total_size + 0] = residual_bc.pos; + data_ptr[total_size + 1] = residual_bc.pos >> 8; + data_ptr[total_size + 2] = residual_bc.pos >> 16; + data_ptr[total_size + 3] = residual_bc.pos >> 24; + total_size += 4; + } - mb_start = mb_end; - total_size += residual_bc.pos; + total_size += residual_bc.pos; + } } + assert((unsigned int)(tok[0] - cpi->tok) == cpi->tok_count[0]); + for (tile_col = 1; tile_col < pc->tile_columns; tile_col++) + assert((unsigned int)(tok[tile_col] - tok[tile_col - 1]) == + cpi->tok_count[tile_col]); + *size += total_size; } } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 91d4c4530..d5110c810 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -169,14 +169,14 @@ typedef struct macroblock { PICK_MODE_CONTEXT sb32_context[4]; PICK_MODE_CONTEXT sb64_context; - void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); - void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch); - void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_2ndtxm4x4)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch); + void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); + void (*fwd_2ndtxm2x2)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d); void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); - void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch); - void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch); - void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d); void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 7af044fe4..746648291 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -323,6 +323,8 @@ static const int16_t adst_i16[256] = { }; #endif +#define NEW_FDCT8x8 1 +#if !NEW_FDCT8x8 static const int xC1S7 = 16069; static const int xC2S6 = 15137; static const int xC3S5 = 13623; @@ -560,6 +562,7 @@ void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) { op++; } } +#endif void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) { /* [1 1; 1 -1] orthogonal transform */ @@ -836,6 +839,79 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch) vp9_short_fdct4x4_c(input + 4, output + 16, pitch); } +#if NEW_FDCT8x8 +static void fdct8_1d(int16_t *input, int16_t *output) { + int16_t step[8]; + int temp1, temp2; + + // stage 1 + step[0] = input[0] + input[7]; + step[1] = input[1] + input[6]; + step[2] = input[2] + input[5]; + step[3] = input[3] + input[4]; + step[4] = input[3] - input[4]; + step[5] = input[2] - input[5]; + step[6] = input[1] - input[6]; + step[7] = input[0] - input[7]; + + fdct4_1d(step, step); + + // Stage 2 + output[4] = step[4]; + temp1 = (-step[5] + step[6]) * cospi_16_64; + temp2 = (step[6] + step[5]) * cospi_16_64; + output[5] = dct_const_round_shift(temp1); + output[6] = dct_const_round_shift(temp2); + output[7] = step[7]; + + // Stage 3 + step[4] = output[4] + output[5]; + step[5] = -output[5] + output[4]; + step[6] = -output[6] + output[7]; + step[7] = output[7] + output[6]; + + // Stage 4 + output[0] = step[0]; + output[4] = step[2]; + output[2] = step[1]; + output[6] = step[3]; + + temp1 = step[4] * cospi_28_64 + step[7] * cospi_4_64; + temp2 = step[5] * cospi_12_64 + step[6] * cospi_20_64; + output[1] = dct_const_round_shift(temp1); + output[5] = dct_const_round_shift(temp2); + temp1 = step[6] * cospi_12_64 + step[5] * -cospi_20_64; + temp2 = step[7] * cospi_28_64 + step[4] * -cospi_4_64; + output[3] = dct_const_round_shift(temp1); + output[7] = dct_const_round_shift(temp2); +} + +void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) { + int shortpitch = pitch >> 1; + int i, j; + int16_t out[64]; + int16_t temp_in[8], temp_out[8]; + + // First transform columns + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) + temp_in[j] = input[j * shortpitch + i] << 2; + fdct8_1d(temp_in, temp_out); + for (j = 0; j < 8; j++) + out[j * 8 + i] = temp_out[j]; + } + + // Then transform rows + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j + i * 8]; + fdct8_1d(temp_in, temp_out); + for (j = 0; j < 8; ++j) + output[j + i * 8] = temp_out[j] >> 1; + } +} +#endif + void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; @@ -1395,8 +1471,6 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { #undef ROUNDING #endif -#if !CONFIG_DWTDCTHYBRID - #define TEST_INT_32x32_DCT 1 #if !TEST_INT_32x32_DCT @@ -2134,706 +2208,3 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { } #endif - -#else // CONFIG_DWTDCTHYBRID - -#if DWT_TYPE == 53 - -// Note: block length must be even for this implementation -static void analysis_53_row(int length, short *x, - short *lowpass, short *highpass) { - int n; - short r, *a, *b; - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *a++ = (r = *x++) << 1; - *b++ = *x - ((r + x[1] + 1) >> 1); - x++; - } - *a = (r = *x++) << 1; - *b = *x - r; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ += (r + (*b) + 1) >> 1; - r = *b++; - } -} - -static void analysis_53_col(int length, short *x, - short *lowpass, short *highpass) { - int n; - short r, *a, *b; - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *a++ = (r = *x++); - *b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2; - x++; - } - *a = (r = *x++); - *b = (*x - r + 1) >> 1; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ += (r + (*b) + 1) >> 1; - r = *b++; - } -} - -static void dyadic_analyze_53(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { - int lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; - } - } - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); - analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i + nh] = c[i * pitch_c + j]; - analysis_53_col(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i]; - } - } -} - -#elif DWT_TYPE == 26 - -static void analysis_26_row(int length, short *x, - short *lowpass, short *highpass) { - int i, n; - short r, s, *a, *b; - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - r = *x++; - s = *x++; - *a++ = r + s; - *b++ = r - s; - } - n = length >> 1; - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ -= (r - a[1] + 4) >> 3; - r = *a++; - } - *b -= (r - *a + 4) >> 3; - } -} - -static void analysis_26_col(int length, short *x, - short *lowpass, short *highpass) { - int i, n; - short r, s, *a, *b; - a = lowpass; - b = highpass; - for (i = length >> 1; i; i--) { - r = *x++; - s = *x++; - *a++ = (r + s + 1) >> 1; - *b++ = (r - s + 1) >> 1; - } - n = length >> 1; - if (n >= 4) { - a = lowpass; - b = highpass; - r = *lowpass; - while (--n) { - *b++ -= (r - a[1] + 4) >> 3; - r = *a++; - } - *b -= (r - *a + 4) >> 3; - } -} - -static void dyadic_analyze_26(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { - int lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; - } - } - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); - analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i + nh] = c[i * pitch_c + j]; - analysis_26_col(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = buffer[i]; - } - } -} - -#elif DWT_TYPE == 97 - -static void analysis_97(int length, double *x, - double *lowpass, double *highpass) { - static const double a_predict1 = -1.586134342; - static const double a_update1 = -0.05298011854; - static const double a_predict2 = 0.8829110762; - static const double a_update2 = 0.4435068522; - static const double s_low = 1.149604398; - static const double s_high = 1/1.149604398; - int i; - double y[DWT_MAX_LENGTH]; - // Predict 1 - for (i = 1; i < length - 2; i += 2) { - x[i] += a_predict1 * (x[i - 1] + x[i + 1]); - } - x[length - 1] += 2 * a_predict1 * x[length - 2]; - // Update 1 - for (i = 2; i < length; i += 2) { - x[i] += a_update1 * (x[i - 1] + x[i + 1]); - } - x[0] += 2 * a_update1 * x[1]; - // Predict 2 - for (i = 1; i < length - 2; i += 2) { - x[i] += a_predict2 * (x[i - 1] + x[i + 1]); - } - x[length - 1] += 2 * a_predict2 * x[length - 2]; - // Update 2 - for (i = 2; i < length; i += 2) { - x[i] += a_update2 * (x[i - 1] + x[i + 1]); - } - x[0] += 2 * a_update2 * x[1]; - memcpy(y, x, sizeof(*y) * length); - // Scale and pack - for (i = 0; i < length / 2; i++) { - lowpass[i] = y[2 * i] * s_low; - highpass[i] = y[2 * i + 1] * s_high; - } -} - -static void dyadic_analyze_97(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { - int lv, i, j, nh, nw, hh = height, hw = width; - double buffer[2 * DWT_MAX_LENGTH]; - double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; - } - } - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer)); - analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH], - &y[i * DWT_MAX_LENGTH] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) - buffer[i + nh] = y[i * DWT_MAX_LENGTH + j]; - analysis_97(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) - c[i * pitch_c + j] = round(buffer[i]); - } - } -} - -#endif // DWT_TYPE - -// TODO(debargha): Implement the scaling differently so as not to have to -// use the floating point dct -static void dct16x16_1d_f(double input[16], double output[16]) { - static const double C1 = 0.995184726672197; - static const double C2 = 0.98078528040323; - static const double C3 = 0.956940335732209; - static const double C4 = 0.923879532511287; - static const double C5 = 0.881921264348355; - static const double C6 = 0.831469612302545; - static const double C7 = 0.773010453362737; - static const double C8 = 0.707106781186548; - static const double C9 = 0.634393284163646; - static const double C10 = 0.555570233019602; - static const double C11 = 0.471396736825998; - static const double C12 = 0.38268343236509; - static const double C13 = 0.290284677254462; - static const double C14 = 0.195090322016128; - static const double C15 = 0.098017140329561; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double step[16]; - double intermediate[16]; - double temp1, temp2; - - // step 1 - step[ 0] = input[0] + input[15]; - step[ 1] = input[1] + input[14]; - step[ 2] = input[2] + input[13]; - step[ 3] = input[3] + input[12]; - step[ 4] = input[4] + input[11]; - step[ 5] = input[5] + input[10]; - step[ 6] = input[6] + input[ 9]; - step[ 7] = input[7] + input[ 8]; - step[ 8] = input[7] - input[ 8]; - step[ 9] = input[6] - input[ 9]; - step[10] = input[5] - input[10]; - step[11] = input[4] - input[11]; - step[12] = input[3] - input[12]; - step[13] = input[2] - input[13]; - step[14] = input[1] - input[14]; - step[15] = input[0] - input[15]; - - // step 2 - output[0] = step[0] + step[7]; - output[1] = step[1] + step[6]; - output[2] = step[2] + step[5]; - output[3] = step[3] + step[4]; - output[4] = step[3] - step[4]; - output[5] = step[2] - step[5]; - output[6] = step[1] - step[6]; - output[7] = step[0] - step[7]; - - temp1 = step[ 8]*C7; - temp2 = step[15]*C9; - output[ 8] = temp1 + temp2; - - temp1 = step[ 9]*C11; - temp2 = step[14]*C5; - output[ 9] = temp1 - temp2; - - temp1 = step[10]*C3; - temp2 = step[13]*C13; - output[10] = temp1 + temp2; - - temp1 = step[11]*C15; - temp2 = step[12]*C1; - output[11] = temp1 - temp2; - - temp1 = step[11]*C1; - temp2 = step[12]*C15; - output[12] = temp2 + temp1; - - temp1 = step[10]*C13; - temp2 = step[13]*C3; - output[13] = temp2 - temp1; - - temp1 = step[ 9]*C5; - temp2 = step[14]*C11; - output[14] = temp2 + temp1; - - temp1 = step[ 8]*C9; - temp2 = step[15]*C7; - output[15] = temp2 - temp1; - - // step 3 - step[ 0] = output[0] + output[3]; - step[ 1] = output[1] + output[2]; - step[ 2] = output[1] - output[2]; - step[ 3] = output[0] - output[3]; - - temp1 = output[4]*C14; - temp2 = output[7]*C2; - step[ 4] = temp1 + temp2; - - temp1 = output[5]*C10; - temp2 = output[6]*C6; - step[ 5] = temp1 + temp2; - - temp1 = output[5]*C6; - temp2 = output[6]*C10; - step[ 6] = temp2 - temp1; - - temp1 = output[4]*C2; - temp2 = output[7]*C14; - step[ 7] = temp2 - temp1; - - step[ 8] = output[ 8] + output[11]; - step[ 9] = output[ 9] + output[10]; - step[10] = output[ 9] - output[10]; - step[11] = output[ 8] - output[11]; - - step[12] = output[12] + output[15]; - step[13] = output[13] + output[14]; - step[14] = output[13] - output[14]; - step[15] = output[12] - output[15]; - - // step 4 - output[ 0] = (step[ 0] + step[ 1]); - output[ 8] = (step[ 0] - step[ 1]); - - temp1 = step[2]*C12; - temp2 = step[3]*C4; - temp1 = temp1 + temp2; - output[ 4] = 2*(temp1*C8); - - temp1 = step[2]*C4; - temp2 = step[3]*C12; - temp1 = temp2 - temp1; - output[12] = 2*(temp1*C8); - - output[ 2] = 2*((step[4] + step[ 5])*C8); - output[14] = 2*((step[7] - step[ 6])*C8); - - temp1 = step[4] - step[5]; - temp2 = step[6] + step[7]; - output[ 6] = (temp1 + temp2); - output[10] = (temp1 - temp2); - - intermediate[8] = step[8] + step[14]; - intermediate[9] = step[9] + step[15]; - - temp1 = intermediate[8]*C12; - temp2 = intermediate[9]*C4; - temp1 = temp1 - temp2; - output[3] = 2*(temp1*C8); - - temp1 = intermediate[8]*C4; - temp2 = intermediate[9]*C12; - temp1 = temp2 + temp1; - output[13] = 2*(temp1*C8); - - output[ 9] = 2*((step[10] + step[11])*C8); - - intermediate[11] = step[10] - step[11]; - intermediate[12] = step[12] + step[13]; - intermediate[13] = step[12] - step[13]; - intermediate[14] = step[ 8] - step[14]; - intermediate[15] = step[ 9] - step[15]; - - output[15] = (intermediate[11] + intermediate[12]); - output[ 1] = -(intermediate[11] - intermediate[12]); - - output[ 7] = 2*(intermediate[13]*C8); - - temp1 = intermediate[14]*C12; - temp2 = intermediate[15]*C4; - temp1 = temp1 - temp2; - output[11] = -2*(temp1*C8); - - temp1 = intermediate[14]*C4; - temp2 = intermediate[15]*C12; - temp1 = temp2 + temp1; - output[ 5] = 2*(temp1*C8); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, - int scale) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - int shortpitch = pitch >> 1; - int i, j; - double output[256]; - // First transform columns - for (i = 0; i < 16; i++) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; j++) - temp_in[j] = input[j*shortpitch + i]; - dct16x16_1d_f(temp_in, temp_out); - for (j = 0; j < 16; j++) - output[j*16 + i] = temp_out[j]; - } - // Then transform rows - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = output[j + i*16]; - dct16x16_1d_f(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j + i*16] = temp_out[j]; - } - // Scale by some magic number - for (i = 0; i < 256; i++) - out[i] = (short)round(output[i] / (2 << scale)); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) { - int j1, i, j, k; - float b[8]; - float b1[8]; - float d[8][8]; - float f0 = (float) .7071068; - float f1 = (float) .4903926; - float f2 = (float) .4619398; - float f3 = (float) .4157348; - float f4 = (float) .3535534; - float f5 = (float) .2777851; - float f6 = (float) .1913417; - float f7 = (float) .0975452; - pitch = pitch / 2; - for (i = 0, k = 0; i < 8; i++, k += pitch) { - for (j = 0; j < 8; j++) { - b[j] = (float)(block[k + j] << (3 - scale)); - } - /* Horizontal transform */ - for (j = 0; j < 4; j++) { - j1 = 7 - j; - b1[j] = b[j] + b[j1]; - b1[j1] = b[j] - b[j1]; - } - b[0] = b1[0] + b1[3]; - b[1] = b1[1] + b1[2]; - b[2] = b1[1] - b1[2]; - b[3] = b1[0] - b1[3]; - b[4] = b1[4]; - b[5] = (b1[6] - b1[5]) * f0; - b[6] = (b1[6] + b1[5]) * f0; - b[7] = b1[7]; - d[i][0] = (b[0] + b[1]) * f4; - d[i][4] = (b[0] - b[1]) * f4; - d[i][2] = b[2] * f6 + b[3] * f2; - d[i][6] = b[3] * f6 - b[2] * f2; - b1[4] = b[4] + b[5]; - b1[7] = b[7] + b[6]; - b1[5] = b[4] - b[5]; - b1[6] = b[7] - b[6]; - d[i][1] = b1[4] * f7 + b1[7] * f1; - d[i][5] = b1[5] * f3 + b1[6] * f5; - d[i][7] = b1[7] * f7 - b1[4] * f1; - d[i][3] = b1[6] * f3 - b1[5] * f5; - } - /* Vertical transform */ - for (i = 0; i < 8; i++) { - for (j = 0; j < 4; j++) { - j1 = 7 - j; - b1[j] = d[j][i] + d[j1][i]; - b1[j1] = d[j][i] - d[j1][i]; - } - b[0] = b1[0] + b1[3]; - b[1] = b1[1] + b1[2]; - b[2] = b1[1] - b1[2]; - b[3] = b1[0] - b1[3]; - b[4] = b1[4]; - b[5] = (b1[6] - b1[5]) * f0; - b[6] = (b1[6] + b1[5]) * f0; - b[7] = b1[7]; - d[0][i] = (b[0] + b[1]) * f4; - d[4][i] = (b[0] - b[1]) * f4; - d[2][i] = b[2] * f6 + b[3] * f2; - d[6][i] = b[3] * f6 - b[2] * f2; - b1[4] = b[4] + b[5]; - b1[7] = b[7] + b[6]; - b1[5] = b[4] - b[5]; - b1[6] = b[7] - b[6]; - d[1][i] = b1[4] * f7 + b1[7] * f1; - d[5][i] = b1[5] * f3 + b1[6] * f5; - d[7][i] = b1[7] * f7 - b1[4] * f1; - d[3][i] = b1[6] * f3 - b1[5] * f5; - } - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - *(coefs + j + i * 8) = (short) floor(d[i][j] + 0.5); - } - } - return; -} - -#define divide_bits(d, n) ((n) < 0 ? (d) << (n) : (d) >> (n)) - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - -void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { - // assume out is a 32x32 buffer - short buffer[16 * 16]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 97 - dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 53 - dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } -} - -#elif DWTDCT_TYPE == DWTDCT16X16 - -void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { - // assume out is a 32x32 buffer - short buffer[16 * 16]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 97 - dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 53 - dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); - vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16); -} - -#elif DWTDCT_TYPE == DWTDCT8X8 - -void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { - // assume out is a 32x32 buffer - short buffer[8 * 8]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(2, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 97 - dyadic_analyze_97(2, 32, 32, input, short_pitch, out, 32); -#elif DWT_TYPE == 53 - dyadic_analyze_53(2, 32, 32, input, short_pitch, out, 32); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct8x8_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32, buffer + i * 8, sizeof(short) * 8); - - vp9_short_fdct8x8_c_f(out + 8, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32 + 8, buffer + i * 8, sizeof(short) * 8); - - vp9_short_fdct8x8_c_f(out + 32 * 8, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32 + 32 * 8, buffer + i * 8, sizeof(short) * 8); - - vp9_short_fdct8x8_c_f(out + 33 * 8, buffer, 64, 1 + DWT_PRECISION_BITS); - for (i = 0; i < 8; ++i) - vpx_memcpy(out + i * 32 + 33 * 8, buffer + i * 8, sizeof(short) * 8); - - for (i = 0; i < 16; ++i) { - for (j = 16; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } - for (i = 16; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); - } - } -} - -#endif - -#if CONFIG_TX64X64 -void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { - // assume out is a 64x64 buffer - short buffer[16 * 16]; - int i, j; - const int short_pitch = pitch >> 1; -#if DWT_TYPE == 26 - dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64); -#elif DWT_TYPE == 97 - dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64); -#elif DWT_TYPE == 53 - dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64); -#endif - // TODO(debargha): Implement more efficiently by adding output pitch - // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16); - -#if DWTDCT_TYPE == DWTDCT16X16_LEAN - for (i = 0; i < 16; ++i) { - for (j = 16; j < 48; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 16; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } -#elif DWTDCT_TYPE == DWTDCT16X16 - vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16); - - vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS); - for (i = 0; i < 16; ++i) - vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16); - - // There is no dct used on the highest bands for now. - // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS - // TODO(debargha): experiment with turning these coeffs to 0 - for (i = 0; i < 32; ++i) { - for (j = 32; j < 64; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } - for (i = 32; i < 64; ++i) { - for (j = 0; j < 64; ++j) { - out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); - } - } -#endif // DWTDCT_TYPE -} -#endif // CONFIG_TX64X64 -#endif // CONFIG_DWTDCTHYBRID diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 927a1b901..fe33f2ebf 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -28,6 +28,7 @@ #include "vp9/common/vp9_findnearmv.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_tile_common.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9_rtcd.h" #include <stdio.h> @@ -1230,8 +1231,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - - TOKENEXTRA *tp = cpi->tok; int totalrate; // printf("encode_frame_internal frame %d (%d)\n", @@ -1312,26 +1311,19 @@ static void encode_frame_internal(VP9_COMP *cpi) { { // Take tiles into account and give start/end MB - int tile, mb_start = 0; + int tile_col; + TOKENEXTRA *tp = cpi->tok; - for (tile = 0; tile < cm->tile_columns; tile++) { - // calculate end of tile column - const int sb_cols = (cm->mb_cols + 3) >> 2; - const int sb_end = (sb_cols * (tile + 1)) >> cpi->oxcf.tile_columns; - const int mb_end = ((sb_end << 2) > cm->mb_cols) ? - cm->mb_cols : (sb_end << 2); + for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { + TOKENEXTRA *tp_old = tp; // For each row of SBs in the frame - cm->cur_tile_idx = tile; - cm->cur_tile_mb_col_start = mb_start; - cm->cur_tile_mb_col_end = mb_end; + vp9_get_tile_col_offsets(cm, tile_col); for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { encode_sb_row(cpi, mb_row, &tp, &totalrate); } - mb_start = mb_end; + cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); } - - cpi->tok_count = (unsigned int)(tp - cpi->tok); } vpx_usec_timer_mark(&emr_timer); @@ -1543,8 +1535,10 @@ void vp9_encode_frame(VP9_COMP *cpi) { /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */ #if CONFIG_LOSSLESS + cpi->mb.e_mbd.lossless = 0; if (cpi->oxcf.lossless) { txfm_type = ONLY_4X4; + cpi->mb.e_mbd.lossless = 1; } else #endif /* FIXME (rbultje) diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 1dd30130a..a52763080 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -62,7 +62,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob); #endif } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b) ; vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32); } @@ -165,7 +165,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { tx_type, 8, xd->block[idx].eob); #endif } else { - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); } @@ -183,13 +183,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob); #endif } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, b, b + 1); vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32); i++; } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); } @@ -222,7 +222,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, vp9_subtract_b(be, b, 8); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16); + x->fwd_txm4x4(be->src_diff, be->coeff, 16); x->quantize_b_4x4(be, b); vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 52eabf129..12082a88d 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -188,11 +188,11 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) { assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4); } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], + x->fwd_txm8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); i++; } else { - x->vp9_short_fdct4x4(&x->block[i].src_diff[0], + x->fwd_txm4x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } } @@ -202,7 +202,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) { build_dcblock_4x4(x); // do 2nd order transform on the dc block - x->short_walsh4x4(&x->block[24].src_diff[0], + x->fwd_2ndtxm4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } else { vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0])); @@ -213,7 +213,7 @@ void vp9_transform_mbuv_4x4(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 2) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], + x->fwd_txm8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } @@ -253,7 +253,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8); } else { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } } @@ -264,7 +264,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8); } else { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i + 2].coeff[0], 32); } } @@ -274,7 +274,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { build_dcblock_8x8(x); // do 2nd order transform on the dc block - x->short_fhaar2x2(&x->block[24].src_diff[0], + x->fwd_2ndtxm2x2(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } else { vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0])); @@ -285,7 +285,7 @@ void vp9_transform_mbuv_8x8(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 4) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } @@ -303,7 +303,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) { if (tx_type != DCT_DCT) { vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16); } else { - x->vp9_short_fdct16x16(&x->block[0].src_diff[0], + x->fwd_txm16x16(&x->block[0].src_diff[0], &x->block[0].coeff[0], 32); } } @@ -321,9 +321,9 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) { void vp9_transform_sbuv_16x16(MACROBLOCK *x) { SUPERBLOCK * const x_sb = &x->sb_coeff_data; vp9_clear_system_state(); - x->vp9_short_fdct16x16(x_sb->src_diff + 1024, + x->fwd_txm16x16(x_sb->src_diff + 1024, x_sb->coeff + 1024, 32); - x->vp9_short_fdct16x16(x_sb->src_diff + 1280, + x->fwd_txm16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32); } @@ -361,6 +361,13 @@ static const int plane_rd_mult[4] = { }\ } +// This function is a place holder for now but may ultimately need +// to scan previous tokens to work out the correct context. +static int trellis_get_coeff_context(int token) { + int recent_energy = 0; + return vp9_get_coef_context(&recent_energy, token); +} + static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int tx_size) { @@ -380,9 +387,6 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, int err_mult = plane_rd_mult[type]; int default_eob; int const *scan, *bands; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; -#endif switch (tx_size) { default: @@ -424,9 +428,6 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, default_eob = 256; break; } -#if CONFIG_NEWCOEFCONTEXT - neighbors = vp9_get_coef_neighbors_handle(scan); -#endif /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; @@ -459,12 +460,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, /* Consider both possible successor states. */ if (next < default_eob) { band = bands[i + 1]; - pt = vp9_prev_token_class[t0]; -#if CONFIG_NEWCOEFCONTEXT - if (NEWCOEFCONTEXT_BAND_COND(band)) - pt = vp9_get_coef_neighbor_context( - qcoeff_ptr, i0, neighbors, scan[i + 1]); -#endif + pt = trellis_get_coeff_context(t0); rate0 += mb->token_costs[tx_size][type][band][pt][tokens[next][0].token]; rate1 += @@ -512,34 +508,12 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, if (next < default_eob) { band = bands[i + 1]; if (t0 != DCT_EOB_TOKEN) { -#if CONFIG_NEWCOEFCONTEXT - int tmp = qcoeff_ptr[scan[i]]; - qcoeff_ptr[scan[i]] = x; - if (NEWCOEFCONTEXT_BAND_COND(band)) - pt = vp9_get_coef_neighbor_context( - qcoeff_ptr, i0, neighbors, scan[i + 1]); - else - pt = vp9_prev_token_class[t0]; - qcoeff_ptr[scan[i]] = tmp; -#else - pt = vp9_prev_token_class[t0]; -#endif + pt = trellis_get_coeff_context(t0); rate0 += mb->token_costs[tx_size][type][band][pt][ tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { -#if CONFIG_NEWCOEFCONTEXT - int tmp = qcoeff_ptr[scan[i]]; - qcoeff_ptr[scan[i]] = x; - if (NEWCOEFCONTEXT_BAND_COND(band)) - pt = vp9_get_coef_neighbor_context( - qcoeff_ptr, i0, neighbors, scan[i + 1]); - else - pt = vp9_prev_token_class[t1]; - qcoeff_ptr[scan[i]] = tmp; -#else - pt = vp9_prev_token_class[t1]; -#endif + pt = trellis_get_coeff_context(t1); rate1 += mb->token_costs[tx_size][type][band][pt][ tokens[next][1].token]; } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 4694a92c6..3791737d2 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1546,7 +1546,7 @@ int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int in_what_stride = d->pre_stride; int mv_stride = d->pre_stride; uint8_t *bestaddress; - int_mv *best_mv = &d->bmi.as_mv.first; + int_mv *best_mv = &d->bmi.as_mv[0]; int_mv this_mv; int bestsad = INT_MAX; int r, c; @@ -1641,7 +1641,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int in_what_stride = d->pre_stride; int mv_stride = d->pre_stride; uint8_t *bestaddress; - int_mv *best_mv = &d->bmi.as_mv.first; + int_mv *best_mv = &d->bmi.as_mv[0]; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; @@ -1770,7 +1770,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int in_what_stride = d->pre_stride; int mv_stride = d->pre_stride; uint8_t *bestaddress; - int_mv *best_mv = &d->bmi.as_mv.first; + int_mv *best_mv = &d->bmi.as_mv[0]; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index ad5fe7819..3e5940f55 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -23,6 +23,7 @@ #include "vp9/common/vp9_extend.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_tile_common.h" #include "vp9/encoder/vp9_segmentation.h" #include "./vp9_rtcd.h" #include "./vpx_scale_rtcd.h" @@ -752,10 +753,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->quarter_pixel_search = 1; sf->half_pixel_search = 1; sf->iterative_sub_pixel = 1; -#if CONFIG_LOSSLESS - sf->optimize_coefficients = 0; -#else sf->optimize_coefficients = 1; +#if CONFIG_LOSSLESS + if (cpi->oxcf.lossless) + sf->optimize_coefficients = 0; #endif sf->no_skip_block4x4_search = 1; sf->first_step = 0; @@ -840,20 +841,18 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } } - cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16; - cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8; - cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4; - cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4; - cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2; + cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16; + cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8; + cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; + cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; + cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4; + cpi->mb.fwd_2ndtxm2x2 = vp9_short_fhaar2x2; #if CONFIG_LOSSLESS if (cpi->oxcf.lossless) { - cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8; - cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4; - cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless; + cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4_x8; + cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4_x8; + cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4_lossless; } #endif @@ -949,7 +948,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); - vpx_free(cpi->tok); { @@ -1107,6 +1105,22 @@ rescale(int val, int num, int denom) { return (int)(llval * llnum / llden); } +static void set_tile_limits(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + int min_log2_tiles, max_log2_tiles; + + cm->log2_tile_columns = cpi->oxcf.tile_columns; + cm->log2_tile_rows = cpi->oxcf.tile_rows; + + vp9_get_tile_n_bits(cm, &min_log2_tiles, &max_log2_tiles); + max_log2_tiles += min_log2_tiles; + if (cm->log2_tile_columns < min_log2_tiles) + cm->log2_tile_columns = min_log2_tiles; + else if (cm->log2_tile_columns > max_log2_tiles) + cm->log2_tile_columns = max_log2_tiles; + cm->tile_columns = 1 << cm->log2_tile_columns; + cm->tile_rows = 1 << cm->log2_tile_rows; +} static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { VP9_COMP *cpi = (VP9_COMP *)(ptr); @@ -1145,7 +1159,7 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; - cm->tile_columns = 1 << cpi->oxcf.tile_columns; + set_tile_limits(cpi); #if VP9_TEMPORAL_ALT_REF { @@ -1206,18 +1220,18 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; - cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_idct4x4llm; - cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; - cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4llm_1; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4llm; + cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1; + cpi->mb.e_mbd.inv_2ndtxm4x4 = vp9_short_inv_walsh4x4; #if CONFIG_LOSSLESS cpi->oxcf.lossless = oxcf->lossless; if (cpi->oxcf.lossless) { - cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8; - cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8; - cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless; - cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless; + cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; + cpi->mb.e_mbd.inv_txm4x4 = vp9_short_inv_walsh4x4_x8; + cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless; + cpi->mb.e_mbd.inv_2ndtxm4x4 = vp9_short_inv_walsh4x4_lossless; } #endif @@ -1372,7 +1386,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->last_frame_distortion = 0; #endif - cm->tile_columns = 1 << cpi->oxcf.tile_columns; + set_tile_limits(cpi); } #define M_LOG2_E 0.693147180559945309417 @@ -2619,10 +2633,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // For 2 Pass Only used where GF/ARF prediction quality // is above a threshold cpi->zbin_mode_boost = 0; -#if CONFIG_LOSSLESS - cpi->zbin_mode_boost_enabled = FALSE; -#else cpi->zbin_mode_boost_enabled = TRUE; +#if CONFIG_LOSSLESS + if (cpi->oxcf.lossless) + cpi->zbin_mode_boost_enabled = FALSE; #endif if (cpi->gfu_boost <= 400) { cpi->zbin_mode_boost_enabled = FALSE; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7acaef472..1476de4da 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -347,7 +347,7 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tok; - unsigned int tok_count; + unsigned int tok_count[1 << 6]; unsigned int frames_since_key; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index b5dbef0b3..e66db7499 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -228,43 +228,71 @@ void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) { } void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - int zero_run = 0; - int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int16_t *coeff_ptr = b->coeff; - int16_t *zbin_ptr = b->zbin; - int16_t *round_ptr = b->round; - int16_t *quant_ptr = b->quant; - uint8_t *quant_shift_ptr = b->quant_shift; int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; - int16_t *dequant_ptr = d->dequant; - int zbin_oq_value = b->zbin_extra; vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); - eob = -1; - if (!b->skip_block) { - for (i = 0; i < 64; i++) { + int i, rc, eob; + int zbin; + int x, y, z, sz; + int zero_run; + int16_t *zbin_boost_ptr = b->zrun_zbin_boost; + int16_t *coeff_ptr = b->coeff; + int16_t *zbin_ptr = b->zbin; + int16_t *round_ptr = b->round; + int16_t *quant_ptr = b->quant; + uint8_t *quant_shift_ptr = b->quant_shift; + int16_t *dequant_ptr = d->dequant; + int zbin_oq_value = b->zbin_extra; + + eob = -1; + + // Special case for DC as it is the one triggering access in various + // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0] + { + z = coeff_ptr[0]; + zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value); + zero_run = 1; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) { + x += (round_ptr[0]); + y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x)) + >> quant_shift_ptr[0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[0] = x; // write to destination + dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value + + if (y) { + eob = 0; // last nonzero coeffs + zero_run = 0; + } + } + } + for (i = 1; i < 64; i++) { rc = vp9_default_zig_zag1d_8x8[i]; z = coeff_ptr[rc]; - zbin = (zbin_ptr[rc != 0] + zbin_boost_ptr[zero_run] + zbin_oq_value); - zero_run += (zero_run < 15); + zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value); + // The original code was incrementing zero_run while keeping it at + // maximum 15 by adding "(zero_run < 15)". The same is achieved by + // removing the opposite of the sign mask of "(zero_run - 15)". + zero_run -= (zero_run - 15) >> 31; sz = (z >> 31); // sign of z x = (z ^ sz) - sz; // x = abs(z) if (x >= zbin) { x += (round_ptr[rc != 0]); - y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) - >> quant_shift_ptr[rc != 0]; // quantize (x) + y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x)) + >> quant_shift_ptr[1]; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value if (y) { eob = i; // last nonzero coeffs @@ -272,8 +300,10 @@ void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { } } } + d->eob = eob + 1; + } else { + d->eob = 0; } - d->eob = eob + 1; } void vp9_quantize_mby_8x8(MACROBLOCK *x) { @@ -460,18 +490,14 @@ void vp9_init_quantizer(VP9_COMP *cpi) { static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40 }; - - int qrounding_factor = 48; - for (Q = 0; Q < QINDEX_RANGE; Q++) { int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80; + int qrounding_factor = 48; #if CONFIG_LOSSLESS - if (cpi->oxcf.lossless) { - if (Q == 0) { - qzbin_factor = 64; - qrounding_factor = 64; - } + if (cpi->oxcf.lossless && Q == 0) { + qzbin_factor = 64; + qrounding_factor = 64; } #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 29893b819..8385a1872 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -419,11 +419,6 @@ int vp9_uvsse(MACROBLOCK *x) { } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#else -#define PT pt -#endif static INLINE int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, ENTROPY_CONTEXT *a, @@ -443,11 +438,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = (tx_type == DCT_DCT) ? mb->token_costs[tx_size][type] : mb->hybrid_token_costs[tx_size][type]; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; switch (tx_size) { @@ -495,50 +485,34 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); -#if CONFIG_NEWCOEFCONTEXT - neighbors = vp9_get_coef_neighbors_handle(scan); - pn = pt; -#endif if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; if (tx_type != DCT_DCT) { + int recent_energy = 0; for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; - cost += token_costs[band[c]][PT][t]; + cost += token_costs[band[c]][pt][t]; cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; -#if CONFIG_NEWCOEFCONTEXT - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); - else - pn = pt; -#endif + pt = vp9_get_coef_context(&recent_energy, t); } if (c < seg_eob) cost += mb->hybrid_token_costs[tx_size][type][band[c]] - [PT][DCT_EOB_TOKEN]; + [pt][DCT_EOB_TOKEN]; } else { + int recent_energy = 0; for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; cost += token_costs[band[c]][pt][t]; cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; -#if CONFIG_NEWCOEFCONTEXT - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); - else - pn = pt; -#endif + pt = vp9_get_coef_context(&recent_energy, t); } if (c < seg_eob) cost += mb->token_costs[tx_size][type][band[c]] - [PT][DCT_EOB_TOKEN]; + [pt][DCT_EOB_TOKEN]; } // is eob first coefficient; @@ -698,7 +672,8 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, // TODO(jingning) is it possible to quickly determine whether to force // trailing coefficients to be zero, instead of running trellis // optimization in the rate-distortion optimization loop? - if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) + if (mb->optimize && + xd->mode_info_context->mbmi.mode < I8X8_PRED) vp9_optimize_mby_16x16(mb); d = vp9_mbblock_error(mb, 0); @@ -859,21 +834,18 @@ static void super_block_yrd_32x32(MACROBLOCK *x, SUPERBLOCK * const x_sb = &x->sb_coeff_data; MACROBLOCKD * const xd = &x->e_mbd; SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; -#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID +#if DEBUG_ERROR int16_t out[1024]; #endif vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); -#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID +#if DEBUG_ERROR vp9_short_idct32x32(xd_sb->dqcoeff, out, 64); #endif -#if !CONFIG_DWTDCTHYBRID *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024); -#else - *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4; -#endif + #if DEBUG_ERROR printf("IDCT/FDCT error 32x32: %d (d: %d)\n", vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); @@ -1140,7 +1112,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); } @@ -1176,7 +1148,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob); #endif else - xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32); + xd->inv_txm4x4(best_dqcoeff, b->diff, 32); vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -1440,7 +1412,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, if (tx_type != DCT_DCT) vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); else - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); // compute quantization mse of 8x8 block @@ -1474,11 +1446,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, b, b + 1); do_two = 1; } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); @@ -2166,17 +2138,17 @@ static int labels2mode( } break; case LEFT4X4: - this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int : + this_mv->as_int = col ? d[-1].bmi.as_mv[0].as_int : left_block_mv(xd, mic, i); if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int : + this_second_mv->as_int = col ? d[-1].bmi.as_mv[1].as_int : left_block_second_mv(xd, mic, i); break; case ABOVE4X4: - this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int : + this_mv->as_int = row ? d[-4].bmi.as_mv[0].as_int : above_block_mv(mic, i, mis); if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int : + this_second_mv->as_int = row ? d[-4].bmi.as_mv[1].as_int : above_block_second_mv(mic, i, mis); break; case ZERO4X4: @@ -2192,10 +2164,10 @@ static int labels2mode( int_mv left_mv, left_second_mv; left_second_mv.as_int = 0; - left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int : + left_mv.as_int = col ? d[-1].bmi.as_mv[0].as_int : left_block_mv(xd, mic, i); if (mbmi->second_ref_frame > 0) - left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int : + left_second_mv.as_int = col ? d[-1].bmi.as_mv[1].as_int : left_block_second_mv(xd, mic, i); if (left_mv.as_int == this_mv->as_int && @@ -2212,9 +2184,9 @@ static int labels2mode( #endif } - d->bmi.as_mv.first.as_int = this_mv->as_int; + d->bmi.as_mv[0].as_int = this_mv->as_int; if (mbmi->second_ref_frame > 0) - d->bmi.as_mv.second.as_int = this_second_mv->as_int; + d->bmi.as_mv[1].as_int = this_second_mv->as_int; x->partition_info->bmi[i].mode = m; x->partition_info->bmi[i].mv.as_int = this_mv->as_int; @@ -2248,7 +2220,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, if (xd->mode_info_context->mbmi.second_ref_frame > 0) vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix); vp9_subtract_b(be, bd, 16); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, bd); thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); *distortion += thisdistortion; @@ -2300,7 +2272,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { if (otherrd) { - x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); + x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(be2, bd2); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; @@ -2312,7 +2284,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; be = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); *distortion += thisdistortion; @@ -2330,7 +2302,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, for (j = 0; j < 4; j += 2) { BLOCKD *bd = &xd->block[ib + iblock[j]]; BLOCK *be = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; @@ -2344,7 +2316,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, TX_4X4); } } - x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); + x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(be2, bd2); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; @@ -2500,9 +2472,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // use previous block's result as next block's MV predictor. if (segmentation == PARTITIONING_4X4 && i > 0) { - bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int; + bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv[0].as_int; if (i == 4 || i == 8 || i == 12) - bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int; + bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv[0].as_int; step_param = 2; } } @@ -2541,11 +2513,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (thissme < bestsme) { bestsme = thissme; - mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int; + mode_mv[NEW4X4].as_int = e->bmi.as_mv[0].as_int; } else { /* The full search result is actually worse so re-instate the * previous best vector */ - e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int; + e->bmi.as_mv[0].as_int = mode_mv[NEW4X4].as_int; } } } @@ -2885,9 +2857,9 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < 16; i++) { BLOCKD *bd = &x->e_mbd.block[i]; - bd->bmi.as_mv.first.as_int = bsi.mvs[i].as_int; + bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int; if (mbmi->second_ref_frame > 0) - bd->bmi.as_mv.second.as_int = bsi.second_mvs[i].as_int; + bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int; bd->eob = bsi.eobs[i]; } @@ -3307,8 +3279,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, &dis, &sse); } - d->bmi.as_mv.first.as_int = tmp_mv.as_int; - frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int; + d->bmi.as_mv[0].as_int = tmp_mv.as_int; + frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv[0].as_int; // Add the new motion vector cost to our rolling cost variable *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0], @@ -4251,10 +4223,12 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (best_mbmode.mode == SPLITMV) { for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv.first.as_int = best_bmodes[i].as_mv.first.as_int; + xd->mode_info_context->bmi[i].as_mv[0].as_int = + best_bmodes[i].as_mv[0].as_int; if (mbmi->second_ref_frame > 0) for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv.second.as_int = best_bmodes[i].as_mv.second.as_int; + xd->mode_info_context->bmi[i].as_mv[1].as_int = + best_bmodes[i].as_mv[1].as_int; vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 17d8f25bd..b125a486e 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -13,6 +13,7 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_tile_common.h" void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) { int mb_row, mb_col; @@ -254,7 +255,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { int t_pred_cost = INT_MAX; int i; - int tile, mb_row, mb_col, mb_start = 0; + int tile_col, mb_row, mb_col; int temporal_predictor_count[PREDICTION_PROBS][2]; int no_pred_segcounts[MAX_MB_SEGMENTS]; @@ -282,21 +283,13 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - for (tile = 0; tile < cm->tile_columns; tile++) { - // calculate end of tile column - const int sb_cols = (cm->mb_cols + 3) >> 2; - const int sb_end = (sb_cols * (tile + 1)) >> cpi->oxcf.tile_columns; - const int mb_end = ((sb_end << 2) > cm->mb_cols) ? - cm->mb_cols : (sb_end << 2); - - cm->cur_tile_idx = tile; - cm->cur_tile_mb_col_start = mb_start; - cm->cur_tile_mb_col_end = mb_end; - - mi_ptr = cm->mi + mb_start; + for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { + vp9_get_tile_col_offsets(cm, tile_col); + mi_ptr = cm->mi + cm->cur_tile_mb_col_start; for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; - for (mb_col = mb_start; mb_col < mb_end; mb_col += 4, mi += 4) { + for (mb_col = cm->cur_tile_mb_col_start; + mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) { if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, 4, mb_row, mb_col); @@ -338,8 +331,6 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { } } } - - mb_start = mb_end; } // Work out probability tree for coding segments without prediction diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 7bca01e05..164709009 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -171,7 +171,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, /*cpi->sf.search_method == HEX*/ // TODO Check that the 16x16 vf & sdf are selected here // Ignore mv costing by sending NULL pointer instead of cost arrays - bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first, + bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv[0], step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16], NULL, NULL, NULL, NULL, &best_ref_mv1); @@ -183,7 +183,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int distortion; unsigned int sse; // Ignore mv costing by sending NULL pointer instead of cost array - bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first, + bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv[0], &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], @@ -263,8 +263,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (cpi->frames[frame] == NULL) continue; - mbd->block[0].bmi.as_mv.first.as_mv.row = 0; - mbd->block[0].bmi.as_mv.first.as_mv.col = 0; + mbd->block[0].bmi.as_mv[0].as_mv.row = 0; + mbd->block[0].bmi.as_mv[0].as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; @@ -297,8 +297,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->frames[frame]->u_buffer + mb_uv_offset, cpi->frames[frame]->v_buffer + mb_uv_offset, cpi->frames[frame]->y_stride, - mbd->block[0].bmi.as_mv.first.as_mv.row, - mbd->block[0].bmi.as_mv.first.as_mv.col, + mbd->block[0].bmi.as_mv[0].as_mv.row, + mbd->block[0].bmi.as_mv[0].as_mv.col, predictor); // Apply the filter (YUV) diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 2dedb1a51..12fee9037 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -100,12 +100,6 @@ static void fill_value_tokens() { vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } -#if CONFIG_NEWCOEFCONTEXT -#define PT pn -#else -#define PT pt -#endif - static void tokenize_b(VP9_COMP *cpi, MACROBLOCKD *xd, const int ib, @@ -115,6 +109,7 @@ static void tokenize_b(VP9_COMP *cpi, int dry_run) { int pt; /* near block/prev token context index */ int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; + int recent_energy = 0; const BLOCKD * const b = xd->block + ib; const int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ @@ -126,10 +121,6 @@ static void tokenize_b(VP9_COMP *cpi, vp9_coeff_probs *probs; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors; - int pn; -#endif ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; @@ -228,10 +219,6 @@ static void tokenize_b(VP9_COMP *cpi, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); -#if CONFIG_NEWCOEFCONTEXT - neighbors = vp9_get_coef_neighbors_handle(scan); - pn = pt; -#endif if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; @@ -252,21 +239,15 @@ static void tokenize_b(VP9_COMP *cpi, } t->Token = token; - t->context_tree = probs[type][band][PT]; + t->context_tree = probs[type][band][pt]; t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || (band > 1 && type == PLANE_TYPE_Y_NO_DC)); assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { - ++counts[type][band][PT][token]; + ++counts[type][band][pt][token]; } - pt = vp9_prev_token_class[token]; -#if CONFIG_NEWCOEFCONTEXT - if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(bands[c + 1])) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); - else - pn = pt; -#endif + + pt = vp9_get_coef_context(&recent_energy, token); ++t; } while (c < eob && ++c < seg_eob); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index d8d95a136..eb152f521 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -59,6 +59,8 @@ VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.h VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.h VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h VP9_COMMON_SRCS-yes += common/vp9_textblit.h +VP9_COMMON_SRCS-yes += common/vp9_tile_common.h +VP9_COMMON_SRCS-yes += common/vp9_tile_common.c VP9_COMMON_SRCS-yes += common/vp9_treecoder.h VP9_COMMON_SRCS-yes += common/vp9_invtrans.c VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 0b8677285..81f02ee6b 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -27,6 +27,7 @@ struct vp8_extracfg { unsigned int Sharpness; unsigned int static_thresh; unsigned int tile_columns; + unsigned int tile_rows; unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */ unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */ unsigned int arnr_type; /* alt_ref filter type */ @@ -54,7 +55,8 @@ static const struct extraconfig_map extracfg_map[] = { 0, /* noise_sensitivity */ 0, /* Sharpness */ 0, /* static_thresh */ - VP8_ONE_TILE_COLUMN, /* tile_columns */ + 0, /* tile_columns */ + 0, /* tile_rows */ 0, /* arnr_max_frames */ 3, /* arnr_strength */ 3, /* arnr_type*/ @@ -171,8 +173,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); - RANGE_CHECK(vp8_cfg, tile_columns, - VP8_ONE_TILE_COLUMN, VP8_FOUR_TILE_COLUMNS); + RANGE_CHECK(vp8_cfg, tile_columns, 0, 6); + RANGE_CHECK(vp8_cfg, tile_rows, 0, 2); RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); @@ -310,6 +312,7 @@ static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf, oxcf->tuning = vp8_cfg.tuning; oxcf->tile_columns = vp8_cfg.tile_columns; + oxcf->tile_rows = vp8_cfg.tile_rows; #if CONFIG_LOSSLESS oxcf->lossless = vp8_cfg.lossless; @@ -417,6 +420,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP8E_SET_SHARPNESS, xcfg.Sharpness); MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh); MAP(VP9E_SET_TILE_COLUMNS, xcfg.tile_columns); + MAP(VP9E_SET_TILE_ROWS, xcfg.tile_rows); MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames); MAP(VP8E_SET_ARNR_STRENGTH, xcfg.arnr_strength); @@ -1007,6 +1011,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { {VP8E_SET_SHARPNESS, set_param}, {VP8E_SET_STATIC_THRESHOLD, set_param}, {VP9E_SET_TILE_COLUMNS, set_param}, + {VP9E_SET_TILE_ROWS, set_param}, {VP8E_GET_LAST_QUANTIZER, get_param}, {VP8E_GET_LAST_QUANTIZER_64, get_param}, {VP8E_SET_ARNR_MAXFRAMES, set_param}, |