summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_blockd.h19
-rw-r--r--vp9/common/vp9_convolve.c21
-rw-r--r--vp9/common/vp9_debugmodes.c4
-rw-r--r--vp9/common/vp9_entropy.c619
-rw-r--r--vp9/common/vp9_findnearmv.h12
-rw-r--r--vp9/common/vp9_idct.h8
-rw-r--r--vp9/common/vp9_idctllm.c889
-rw-r--r--vp9/common/vp9_invtrans.c9
-rw-r--r--vp9/common/vp9_onyxc_int.h2
-rw-r--r--vp9/common/vp9_reconinter.c160
-rw-r--r--vp9/common/vp9_rtcd_defs.sh7
-rw-r--r--vp9/common/vp9_tile_common.c43
-rw-r--r--vp9/common/vp9_tile_common.h25
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c39
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_ssse3.asm239
15 files changed, 558 insertions, 1538 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index b0c1bfa08..82678d6b6 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -47,18 +47,6 @@ void vpx_log(const char *format, ...);
#define MAX_MV_REFS 9
#define MAX_MV_REF_CANDIDATES 4
-#if CONFIG_DWTDCTHYBRID
-#define DWT_MAX_LENGTH 64
-#define DWT_TYPE 26 // 26/53/97
-#define DWT_PRECISION_BITS 2
-#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2)
-
-#define DWTDCT16X16 0
-#define DWTDCT16X16_LEAN 1
-#define DWTDCT8X8 2
-#define DWTDCT_TYPE DWTDCT16X16_LEAN
-#endif
-
typedef struct {
int r, c;
} POS;
@@ -218,10 +206,7 @@ union b_mode_info {
B_PREDICTION_MODE context;
#endif
} as_mode;
- struct {
- int_mv first;
- int_mv second;
- } as_mv;
+ int_mv as_mv[2]; // first, second inter predictor motion vectors
};
typedef enum {
@@ -425,7 +410,7 @@ typedef struct macroblockd {
#define ACTIVE_HT8 300
-#define ACTIVE_HT16 300
+#define ACTIVE_HT16 0
// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index f21f1d84e..b87c410df 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -7,12 +7,15 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vp9/common/vp9_convolve.h"
+
#include <assert.h>
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
@@ -293,9 +296,21 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- convolve_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h, 8);
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_avg(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
}
void vp9_convolve_copy(const uint8_t *src, int src_stride,
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 5ea7736b7..1953d60c6 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -129,8 +129,8 @@ void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
fprintf(mvs, "%3d:%-3d ",
- mi[mb_index].bmi[bindex].as_mv.first.as_mv.row,
- mi[mb_index].bmi[bindex].as_mv.first.as_mv.col);
+ mi[mb_index].bmi[bindex].as_mv[0].as_mv.row,
+ mi[mb_index].bmi[bindex].as_mv[0].as_mv.col);
}
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 352e17c0c..03f89ac87 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -143,624 +143,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = {
237, 252, 253, 238, 223, 239, 254, 255,
};
-#if CONFIG_DWTDCTHYBRID
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4, 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 256, 225, 194, 163,
- 132, 101, 70, 39, 8, 9, 40, 71,
- 102, 133, 164, 195, 226, 257, 288, 320,
- 289, 258, 227, 196, 165, 134, 103, 72,
- 41, 10, 11, 42, 73, 104, 135, 166,
- 197, 228, 259, 290, 321, 352, 384, 353,
- 322, 291, 260, 229, 198, 167, 136, 105,
- 74, 43, 12, 13, 44, 75, 106, 137,
- 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262,
- 231, 200, 169, 138, 107, 76, 45, 14,
- 15, 46, 77, 108, 139, 170, 201, 232,
- 263, 294, 325, 356, 387, 418, 449, 480,
- 481, 450, 419, 388, 357, 326, 295, 264,
- 233, 202, 171, 140, 109, 78, 47, 79,
- 110, 141, 172, 203, 234, 265, 296, 327,
- 358, 389, 420, 451, 482, 483, 452, 421,
- 390, 359, 328, 297, 266, 235, 204, 173,
- 142, 111, 143, 174, 205, 236, 267, 298,
- 329, 360, 391, 422, 453, 484, 485, 454,
- 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 207, 238, 269, 300, 331, 362, 393,
- 424, 455, 486, 487, 456, 425, 394, 363,
- 332, 301, 270, 239, 271, 302, 333, 364,
- 395, 426, 457, 488, 489, 458, 427, 396,
- 365, 334, 303, 335, 366, 397, 428, 459,
- 490, 491, 460, 429, 398, 367, 399, 430,
- 461, 492, 493, 462, 431, 463, 494, 495,
-
- 16, 512, 528, 17, 513, 529, 48, 544,
- 560, 80, 576, 592, 49, 545, 561, 18,
- 514, 530, 19, 515, 531, 50, 546, 562,
- 81, 577, 593, 112, 608, 624, 144, 640,
- 656, 113, 609, 625, 82, 578, 594, 51,
- 547, 563, 20, 516, 532, 21, 517, 533,
- 52, 548, 564, 83, 579, 595, 114, 610,
- 626, 145, 641, 657, 176, 672, 688, 208,
- 704, 720, 177, 673, 689, 146, 642, 658,
- 115, 611, 627, 84, 580, 596, 53, 549,
- 565, 22, 518, 534, 23, 519, 535, 54,
- 550, 566, 85, 581, 597, 116, 612, 628,
- 147, 643, 659, 178, 674, 690, 209, 705,
- 721, 240, 736, 752, 272, 768, 784, 241,
- 737, 753, 210, 706, 722, 179, 675, 691,
- 148, 644, 660, 117, 613, 629, 86, 582,
- 598, 55, 551, 567, 24, 520, 536, 25,
- 521, 537, 56, 552, 568, 87, 583, 599,
- 118, 614, 630, 149, 645, 661, 180, 676,
- 692, 211, 707, 723, 242, 738, 754, 273,
- 769, 785, 304, 800, 816, 336, 832, 848,
- 305, 801, 817, 274, 770, 786, 243, 739,
- 755, 212, 708, 724, 181, 677, 693, 150,
- 646, 662, 119, 615, 631, 88, 584, 600,
- 57, 553, 569, 26, 522, 538, 27, 523,
- 539, 58, 554, 570, 89, 585, 601, 120,
- 616, 632, 151, 647, 663, 182, 678, 694,
- 213, 709, 725, 244, 740, 756, 275, 771,
- 787, 306, 802, 818, 337, 833, 849, 368,
- 864, 880, 400, 896, 912, 369, 865, 881,
- 338, 834, 850, 307, 803, 819, 276, 772,
- 788, 245, 741, 757, 214, 710, 726, 183,
-
- 679, 695, 152, 648, 664, 121, 617, 633,
- 90, 586, 602, 59, 555, 571, 28, 524,
- 540, 29, 525, 541, 60, 556, 572, 91,
- 587, 603, 122, 618, 634, 153, 649, 665,
- 184, 680, 696, 215, 711, 727, 246, 742,
- 758, 277, 773, 789, 308, 804, 820, 339,
- 835, 851, 370, 866, 882, 401, 897, 913,
- 432, 928, 944, 464, 960, 976, 433, 929,
- 945, 402, 898, 914, 371, 867, 883, 340,
- 836, 852, 309, 805, 821, 278, 774, 790,
- 247, 743, 759, 216, 712, 728, 185, 681,
- 697, 154, 650, 666, 123, 619, 635, 92,
- 588, 604, 61, 557, 573, 30, 526, 542,
- 31, 527, 543, 62, 558, 574, 93, 589,
- 605, 124, 620, 636, 155, 651, 667, 186,
- 682, 698, 217, 713, 729, 248, 744, 760,
- 279, 775, 791, 310, 806, 822, 341, 837,
- 853, 372, 868, 884, 403, 899, 915, 434,
- 930, 946, 465, 961, 977, 496, 992, 1008,
- 497, 993, 1009, 466, 962, 978, 435, 931,
- 947, 404, 900, 916, 373, 869, 885, 342,
- 838, 854, 311, 807, 823, 280, 776, 792,
- 249, 745, 761, 218, 714, 730, 187, 683,
- 699, 156, 652, 668, 125, 621, 637, 94,
- 590, 606, 63, 559, 575, 95, 591, 607,
- 126, 622, 638, 157, 653, 669, 188, 684,
- 700, 219, 715, 731, 250, 746, 762, 281,
- 777, 793, 312, 808, 824, 343, 839, 855,
- 374, 870, 886, 405, 901, 917, 436, 932,
- 948, 467, 963, 979, 498, 994, 1010, 499,
- 995, 1011, 468, 964, 980, 437, 933, 949,
- 406, 902, 918, 375, 871, 887, 344, 840,
-
- 856, 313, 809, 825, 282, 778, 794, 251,
- 747, 763, 220, 716, 732, 189, 685, 701,
- 158, 654, 670, 127, 623, 639, 159, 655,
- 671, 190, 686, 702, 221, 717, 733, 252,
- 748, 764, 283, 779, 795, 314, 810, 826,
- 345, 841, 857, 376, 872, 888, 407, 903,
- 919, 438, 934, 950, 469, 965, 981, 500,
- 996, 1012, 501, 997, 1013, 470, 966, 982,
- 439, 935, 951, 408, 904, 920, 377, 873,
- 889, 346, 842, 858, 315, 811, 827, 284,
- 780, 796, 253, 749, 765, 222, 718, 734,
- 191, 687, 703, 223, 719, 735, 254, 750,
- 766, 285, 781, 797, 316, 812, 828, 347,
- 843, 859, 378, 874, 890, 409, 905, 921,
- 440, 936, 952, 471, 967, 983, 502, 998,
- 1014, 503, 999, 1015, 472, 968, 984, 441,
- 937, 953, 410, 906, 922, 379, 875, 891,
- 348, 844, 860, 317, 813, 829, 286, 782,
- 798, 255, 751, 767, 287, 783, 799, 318,
- 814, 830, 349, 845, 861, 380, 876, 892,
- 411, 907, 923, 442, 938, 954, 473, 969,
- 985, 504, 1000, 1016, 505, 1001, 1017, 474,
- 970, 986, 443, 939, 955, 412, 908, 924,
- 381, 877, 893, 350, 846, 862, 319, 815,
- 831, 351, 847, 863, 382, 878, 894, 413,
- 909, 925, 444, 940, 956, 475, 971, 987,
- 506, 1002, 1018, 507, 1003, 1019, 476, 972,
- 988, 445, 941, 957, 414, 910, 926, 383,
- 879, 895, 415, 911, 927, 446, 942, 958,
- 477, 973, 989, 508, 1004, 1020, 509, 1005,
- 1021, 478, 974, 990, 447, 943, 959, 479,
- 975, 991, 510, 1006, 1022, 511, 1007, 1023,
-};
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6,
- 6, 6, 6,
- 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4,
- 16, 512, 528,
- 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 256, 225, 194, 163,
- 132, 101, 70, 39, 8, 9, 40, 71,
- 102, 133, 164, 195, 226, 257, 288, 320,
- 289, 258, 227, 196, 165, 134, 103, 72,
- 41, 10, 11, 42, 73, 104, 135, 166,
- 197, 228, 259, 290, 321, 352, 384, 353,
- 322, 291, 260, 229, 198, 167, 136, 105,
- 74, 43, 12, 13, 44, 75, 106, 137,
- 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262,
- 231, 200, 169, 138, 107, 76, 45, 14,
- 15, 46, 77, 108, 139, 170, 201, 232,
- 263, 294, 325, 356, 387, 418, 449, 480,
- 481, 450, 419, 388, 357, 326, 295, 264,
- 233, 202, 171, 140, 109, 78, 47, 79,
- 110, 141, 172, 203, 234, 265, 296, 327,
- 358, 389, 420, 451, 482, 483, 452, 421,
- 390, 359, 328, 297, 266, 235, 204, 173,
- 142, 111, 143, 174, 205, 236, 267, 298,
- 329, 360, 391, 422, 453, 484, 485, 454,
- 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 207, 238, 269, 300, 331, 362, 393,
- 424, 455, 486, 487, 456, 425, 394, 363,
- 332, 301, 270, 239, 271, 302, 333, 364,
- 395, 426, 457, 488, 489, 458, 427, 396,
- 365, 334, 303, 335, 366, 397, 428, 459,
- 490, 491, 460, 429, 398, 367, 399, 430,
- 461, 492, 493, 462, 431, 463, 494, 495,
-
- 17, 513, 529, 48, 544,
- 560, 80, 576, 592, 49, 545, 561, 18,
- 514, 530, 19, 515, 531, 50, 546, 562,
- 81, 577, 593, 112, 608, 624, 144, 640,
- 656, 113, 609, 625, 82, 578, 594, 51,
- 547, 563, 20, 516, 532, 21, 517, 533,
- 52, 548, 564, 83, 579, 595, 114, 610,
- 626, 145, 641, 657, 176, 672, 688, 208,
- 704, 720, 177, 673, 689, 146, 642, 658,
- 115, 611, 627, 84, 580, 596, 53, 549,
- 565, 22, 518, 534, 23, 519, 535, 54,
- 550, 566, 85, 581, 597, 116, 612, 628,
- 147, 643, 659, 178, 674, 690, 209, 705,
- 721, 240, 736, 752, 272, 768, 784, 241,
- 737, 753, 210, 706, 722, 179, 675, 691,
- 148, 644, 660, 117, 613, 629, 86, 582,
- 598, 55, 551, 567, 24, 520, 536, 25,
- 521, 537, 56, 552, 568, 87, 583, 599,
- 118, 614, 630, 149, 645, 661, 180, 676,
- 692, 211, 707, 723, 242, 738, 754, 273,
- 769, 785, 304, 800, 816, 336, 832, 848,
- 305, 801, 817, 274, 770, 786, 243, 739,
- 755, 212, 708, 724, 181, 677, 693, 150,
- 646, 662, 119, 615, 631, 88, 584, 600,
- 57, 553, 569, 26, 522, 538, 27, 523,
- 539, 58, 554, 570, 89, 585, 601, 120,
- 616, 632, 151, 647, 663, 182, 678, 694,
- 213, 709, 725, 244, 740, 756, 275, 771,
- 787, 306, 802, 818, 337, 833, 849, 368,
- 864, 880, 400, 896, 912, 369, 865, 881,
- 338, 834, 850, 307, 803, 819, 276, 772,
- 788, 245, 741, 757, 214, 710, 726, 183,
-
- 679, 695, 152, 648, 664, 121, 617, 633,
- 90, 586, 602, 59, 555, 571, 28, 524,
- 540, 29, 525, 541, 60, 556, 572, 91,
- 587, 603, 122, 618, 634, 153, 649, 665,
- 184, 680, 696, 215, 711, 727, 246, 742,
- 758, 277, 773, 789, 308, 804, 820, 339,
- 835, 851, 370, 866, 882, 401, 897, 913,
- 432, 928, 944, 464, 960, 976, 433, 929,
- 945, 402, 898, 914, 371, 867, 883, 340,
- 836, 852, 309, 805, 821, 278, 774, 790,
- 247, 743, 759, 216, 712, 728, 185, 681,
- 697, 154, 650, 666, 123, 619, 635, 92,
- 588, 604, 61, 557, 573, 30, 526, 542,
- 31, 527, 543, 62, 558, 574, 93, 589,
- 605, 124, 620, 636, 155, 651, 667, 186,
- 682, 698, 217, 713, 729, 248, 744, 760,
- 279, 775, 791, 310, 806, 822, 341, 837,
- 853, 372, 868, 884, 403, 899, 915, 434,
- 930, 946, 465, 961, 977, 496, 992, 1008,
- 497, 993, 1009, 466, 962, 978, 435, 931,
- 947, 404, 900, 916, 373, 869, 885, 342,
- 838, 854, 311, 807, 823, 280, 776, 792,
- 249, 745, 761, 218, 714, 730, 187, 683,
- 699, 156, 652, 668, 125, 621, 637, 94,
- 590, 606, 63, 559, 575, 95, 591, 607,
- 126, 622, 638, 157, 653, 669, 188, 684,
- 700, 219, 715, 731, 250, 746, 762, 281,
- 777, 793, 312, 808, 824, 343, 839, 855,
- 374, 870, 886, 405, 901, 917, 436, 932,
- 948, 467, 963, 979, 498, 994, 1010, 499,
- 995, 1011, 468, 964, 980, 437, 933, 949,
- 406, 902, 918, 375, 871, 887, 344, 840,
-
- 856, 313, 809, 825, 282, 778, 794, 251,
- 747, 763, 220, 716, 732, 189, 685, 701,
- 158, 654, 670, 127, 623, 639, 159, 655,
- 671, 190, 686, 702, 221, 717, 733, 252,
- 748, 764, 283, 779, 795, 314, 810, 826,
- 345, 841, 857, 376, 872, 888, 407, 903,
- 919, 438, 934, 950, 469, 965, 981, 500,
- 996, 1012, 501, 997, 1013, 470, 966, 982,
- 439, 935, 951, 408, 904, 920, 377, 873,
- 889, 346, 842, 858, 315, 811, 827, 284,
- 780, 796, 253, 749, 765, 222, 718, 734,
- 191, 687, 703, 223, 719, 735, 254, 750,
- 766, 285, 781, 797, 316, 812, 828, 347,
- 843, 859, 378, 874, 890, 409, 905, 921,
- 440, 936, 952, 471, 967, 983, 502, 998,
- 1014, 503, 999, 1015, 472, 968, 984, 441,
- 937, 953, 410, 906, 922, 379, 875, 891,
- 348, 844, 860, 317, 813, 829, 286, 782,
- 798, 255, 751, 767, 287, 783, 799, 318,
- 814, 830, 349, 845, 861, 380, 876, 892,
- 411, 907, 923, 442, 938, 954, 473, 969,
- 985, 504, 1000, 1016, 505, 1001, 1017, 474,
- 970, 986, 443, 939, 955, 412, 908, 924,
- 381, 877, 893, 350, 846, 862, 319, 815,
- 831, 351, 847, 863, 382, 878, 894, 413,
- 909, 925, 444, 940, 956, 475, 971, 987,
- 506, 1002, 1018, 507, 1003, 1019, 476, 972,
- 988, 445, 941, 957, 414, 910, 926, 383,
- 879, 895, 415, 911, 927, 446, 942, 958,
- 477, 973, 989, 508, 1004, 1020, 509, 1005,
- 1021, 478, 974, 990, 447, 943, 959, 479,
- 975, 991, 510, 1006, 1022, 511, 1007, 1023,
-};
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5,
- 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4, 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 225, 194, 163, 132,
- 101, 70, 39, 71, 102, 133, 164, 195,
- 226, 227, 196, 165, 134, 103, 135, 166,
- 197, 228, 229, 198, 167, 199, 230, 231,
-
- 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328,
- 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298,
- 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361,
- 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269,
- 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393,
- 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394,
- 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270,
- 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364,
- 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488,
- 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396,
- 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335,
- 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459,
- 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429,
- 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430,
- 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462,
- 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495,
-
- 16, 512, 528, 17, 513, 529, 18, 514,
- 530, 19, 515, 531, 20, 516, 532, 21,
- 517, 533, 22, 518, 534, 23, 519, 535,
- 24, 520, 536, 25, 521, 537, 26, 522,
- 538, 27, 523, 539, 28, 524, 540, 29,
- 525, 541, 30, 526, 542, 31, 527, 543,
- 48, 544, 560, 49, 545, 561, 50, 546,
- 562, 51, 547, 563, 52, 548, 564, 53,
- 549, 565, 54, 550, 566, 55, 551, 567,
- 56, 552, 568, 57, 553, 569, 58, 554,
- 570, 59, 555, 571, 60, 556, 572, 61,
- 557, 573, 62, 558, 574, 63, 559, 575,
- 80, 576, 592, 81, 577, 593, 82, 578,
- 594, 83, 579, 595, 84, 580, 596, 85,
- 581, 597, 86, 582, 598, 87, 583, 599,
- 88, 584, 600, 89, 585, 601, 90, 586,
- 602, 91, 587, 603, 92, 588, 604, 93,
- 589, 605, 94, 590, 606, 95, 591, 607,
- 112, 608, 624, 113, 609, 625, 114, 610,
- 626, 115, 611, 627, 116, 612, 628, 117,
- 613, 629, 118, 614, 630, 119, 615, 631,
- 120, 616, 632, 121, 617, 633, 122, 618,
- 634, 123, 619, 635, 124, 620, 636, 125,
- 621, 637, 126, 622, 638, 127, 623, 639,
- 144, 640, 656, 145, 641, 657, 146, 642,
- 658, 147, 643, 659, 148, 644, 660, 149,
- 645, 661, 150, 646, 662, 151, 647, 663,
- 152, 648, 664, 153, 649, 665, 154, 650,
- 666, 155, 651, 667, 156, 652, 668, 157,
- 653, 669, 158, 654, 670, 159, 655, 671,
- 176, 672, 688, 177, 673, 689, 178, 674,
- 690, 179, 675, 691, 180, 676, 692, 181,
- 677, 693, 182, 678, 694, 183, 679, 695,
- 184, 680, 696, 185, 681, 697, 186, 682,
- 698, 187, 683, 699, 188, 684, 700, 189,
- 685, 701, 190, 686, 702, 191, 687, 703,
- 208, 704, 720, 209, 705, 721, 210, 706,
- 722, 211, 707, 723, 212, 708, 724, 213,
- 709, 725, 214, 710, 726, 215, 711, 727,
- 216, 712, 728, 217, 713, 729, 218, 714,
- 730, 219, 715, 731, 220, 716, 732, 221,
- 717, 733, 222, 718, 734, 223, 719, 735,
- 240, 736, 752, 241, 737, 753, 242, 738,
- 754, 243, 739, 755, 244, 740, 756, 245,
- 741, 757, 246, 742, 758, 247, 743, 759,
- 248, 744, 760, 249, 745, 761, 250, 746,
- 762, 251, 747, 763, 252, 748, 764, 253,
- 749, 765, 254, 750, 766, 255, 751, 767,
- 272, 768, 784, 273, 769, 785, 274, 770,
- 786, 275, 771, 787, 276, 772, 788, 277,
- 773, 789, 278, 774, 790, 279, 775, 791,
- 280, 776, 792, 281, 777, 793, 282, 778,
- 794, 283, 779, 795, 284, 780, 796, 285,
- 781, 797, 286, 782, 798, 287, 783, 799,
- 304, 800, 816, 305, 801, 817, 306, 802,
- 818, 307, 803, 819, 308, 804, 820, 309,
- 805, 821, 310, 806, 822, 311, 807, 823,
- 312, 808, 824, 313, 809, 825, 314, 810,
- 826, 315, 811, 827, 316, 812, 828, 317,
- 813, 829, 318, 814, 830, 319, 815, 831,
- 336, 832, 848, 337, 833, 849, 338, 834,
- 850, 339, 835, 851, 340, 836, 852, 341,
- 837, 853, 342, 838, 854, 343, 839, 855,
- 344, 840, 856, 345, 841, 857, 346, 842,
- 858, 347, 843, 859, 348, 844, 860, 349,
- 845, 861, 350, 846, 862, 351, 847, 863,
- 368, 864, 880, 369, 865, 881, 370, 866,
- 882, 371, 867, 883, 372, 868, 884, 373,
- 869, 885, 374, 870, 886, 375, 871, 887,
- 376, 872, 888, 377, 873, 889, 378, 874,
- 890, 379, 875, 891, 380, 876, 892, 381,
- 877, 893, 382, 878, 894, 383, 879, 895,
- 400, 896, 912, 401, 897, 913, 402, 898,
- 914, 403, 899, 915, 404, 900, 916, 405,
- 901, 917, 406, 902, 918, 407, 903, 919,
- 408, 904, 920, 409, 905, 921, 410, 906,
- 922, 411, 907, 923, 412, 908, 924, 413,
- 909, 925, 414, 910, 926, 415, 911, 927,
- 432, 928, 944, 433, 929, 945, 434, 930,
- 946, 435, 931, 947, 436, 932, 948, 437,
- 933, 949, 438, 934, 950, 439, 935, 951,
- 440, 936, 952, 441, 937, 953, 442, 938,
- 954, 443, 939, 955, 444, 940, 956, 445,
- 941, 957, 446, 942, 958, 447, 943, 959,
- 464, 960, 976, 465, 961, 977, 466, 962,
- 978, 467, 963, 979, 468, 964, 980, 469,
- 965, 981, 470, 966, 982, 471, 967, 983,
- 472, 968, 984, 473, 969, 985, 474, 970,
- 986, 475, 971, 987, 476, 972, 988, 477,
- 973, 989, 478, 974, 990, 479, 975, 991,
- 496, 992, 1008, 497, 993, 1009, 498, 994,
- 1010, 499, 995, 1011, 500, 996, 1012, 501,
- 997, 1013, 502, 998, 1014, 503, 999, 1015,
- 504, 1000, 1016, 505, 1001, 1017, 506, 1002,
- 1018, 507, 1003, 1019, 508, 1004, 1020, 509,
- 1005, 1021, 510, 1006, 1022, 511, 1007, 1023,
-};
-#endif
-
-#else
-
DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
@@ -865,7 +247,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892,
923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023,
};
-#endif // CONFIG_DWTDCTHYBRID
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index 74fce7aad..c42aab1a5 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -98,7 +98,7 @@ static int left_block_mv(const MACROBLOCKD *xd,
b += 4;
}
- return (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+ return (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
static int left_block_second_mv(const MACROBLOCKD *xd,
@@ -117,8 +117,8 @@ static int left_block_second_mv(const MACROBLOCKD *xd,
}
return cur_mb->mbmi.second_ref_frame > 0 ?
- (cur_mb->bmi + b - 1)->as_mv.second.as_int :
- (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+ (cur_mb->bmi + b - 1)->as_mv[1].as_int :
+ (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
@@ -131,7 +131,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
b += 16;
}
- return (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+ return (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
@@ -146,8 +146,8 @@ static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
}
return cur_mb->mbmi.second_ref_frame > 0 ?
- (cur_mb->bmi + b - 4)->as_mv.second.as_int :
- (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+ (cur_mb->bmi + b - 4)->as_mv[1].as_int :
+ (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 680a20627..01e8ea3c2 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -50,6 +50,14 @@ static const int cospi_29_64 = 2404;
static const int cospi_30_64 = 1606;
static const int cospi_31_64 = 804;
+#if CONFIG_INTHT4X4
+// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+static const int sinpi_1_9 = 5283;
+static const int sinpi_2_9 = 9929;
+static const int sinpi_3_9 = 13377;
+static const int sinpi_4_9 = 15212;
+#endif
+
static INLINE int dct_const_round_shift(int input) {
int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
assert((rv <= INT16_MAX) && (rv >= INT16_MIN));
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index b27b34cf2..548805726 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -494,7 +494,6 @@ void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
}
#endif
-
void idct4_1d(int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
@@ -651,6 +650,100 @@ void vp9_short_idct8x8_c(int16_t *input, int16_t *output, int pitch) {
}
}
+#if CONFIG_INTHT4X4
+static void iadst4_1d(int16_t *input, int16_t *output) {
+ int x0, x1, x2, x3;
+ int s0, s1, s2, s3, s4, s5, s6, s7;
+
+ x0 = input[0];
+ x1 = input[1];
+ x2 = input[2];
+ x3 = input[3];
+
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
+
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_2_9 * x0;
+ s2 = sinpi_3_9 * x1;
+ s3 = sinpi_4_9 * x2;
+ s4 = sinpi_1_9 * x2;
+ s5 = sinpi_2_9 * x3;
+ s6 = sinpi_4_9 * x3;
+ s7 = x0 - x2 + x3;
+
+ x0 = s0 + s3 + s5;
+ x1 = s1 - s4 - s6;
+ x2 = sinpi_3_9 * s7;
+ x3 = s2;
+
+ s0 = x0 + x3;
+ s1 = x1 + x3;
+ s2 = x2;
+ s3 = x0 + x1 - x3;
+
+ // 1-D transform scaling factor is sqrt(2).
+ // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+ // + 1b (addition) = 29b.
+ // Hence the output bit depth is 15b.
+ output[0] = dct_const_round_shift(s0);
+ output[1] = dct_const_round_shift(s1);
+ output[2] = dct_const_round_shift(s2);
+ output[3] = dct_const_round_shift(s3);
+}
+
+void vp9_short_iht4x4_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[16];
+ int16_t *outptr = &out[0];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+
+ void (*invr)(int16_t*, int16_t*);
+ void (*invc)(int16_t*, int16_t*);
+
+ switch (tx_type) {
+ case ADST_ADST:
+ invc = &iadst4_1d;
+ invr = &iadst4_1d;
+ break;
+ case ADST_DCT:
+ invc = &iadst4_1d;
+ invr = &idct4_1d;
+ break;
+ case DCT_ADST:
+ invc = &idct4_1d;
+ invr = &iadst4_1d;
+ break;
+ case DCT_DCT:
+ invc = &idct4_1d;
+ invr = &idct4_1d;
+ break;
+ default:
+ assert(0);
+ }
+
+ // inverse transform row vectors
+ for (i = 0; i < 4; ++i) {
+ invr(input, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j * 4 + i];
+ invc(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j * short_pitch + i] = (temp_out[j] + 8) >> 4;
+ }
+}
+#endif
+
#if CONFIG_INTHT
static void iadst8_1d(int16_t *input, int16_t *output) {
int x0, x1, x2, x3, x4, x5, x6, x7;
@@ -733,7 +826,7 @@ static void iadst8_1d(int16_t *input, int16_t *output) {
}
void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
- TX_TYPE tx_type, int pitch) {
+ int pitch, TX_TYPE tx_type) {
int16_t out[8 * 8];
int16_t *outptr = &out[0];
const int short_pitch = pitch >> 1;
@@ -1059,8 +1152,6 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
*output = (out + 32) >> 6;
}
-
-#if !CONFIG_DWTDCTHYBRID
void idct32_1d(int16_t *input, int16_t *output) {
int16_t step1[32], step2[32];
int temp1, temp2;
@@ -1428,7 +1519,6 @@ void idct32_1d(int16_t *input, int16_t *output) {
output[31] = step1[0] - step1[31];
}
-
void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
int16_t out[32 * 32];
int16_t *outptr = &out[0];
@@ -1461,792 +1551,3 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
out = dct_const_round_shift(tmp);
*output = (out + 32) >> 6;
}
-
-#else // !CONFIG_DWTDCTHYBRID
-
-#if DWT_TYPE == 53
-
-// Note: block length must be even for this implementation
-static void synthesis_53_row(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, *a, *b;
- int n;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ -= (r + (*b) + 1) >> 1;
- r = *b++;
- }
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *x++ = ((r = *a++) + 1) >> 1;
- *x++ = *b++ + ((r + (*a) + 2) >> 2);
- }
- *x++ = ((r = *a) + 1) >> 1;
- *x++ = *b + ((r + 1) >> 1);
-}
-
-static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, *a, *b;
- int n;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ -= (r + (*b) + 1) >> 1;
- r = *b++;
- }
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- r = *a++;
- *x++ = r;
- *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
- }
- *x++ = *a;
- *x++ = ((*b) << 1) + *a;
-}
-
-static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
- synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
- }
- }
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
- ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :
- -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);
- }
- }
-}
-
-#elif DWT_TYPE == 26
-
-// Note: block length must be even for this implementation
-static void synthesis_26_row(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, s, *a, *b;
- int i, n = length >> 1;
-
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ += (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b += (r - *a + 4) >> 3;
- }
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- s = *b++;
- r = *a++;
- *x++ = (r + s + 1) >> 1;
- *x++ = (r - s + 1) >> 1;
- }
-}
-
-static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, s, *a, *b;
- int i, n = length >> 1;
-
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ += (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b += (r - *a + 4) >> 3;
- }
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- s = *b++;
- r = *a++;
- *x++ = r + s;
- *x++ = r - s;
- }
-}
-
-static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- int16_t buffer[2 * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
- synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
- }
- }
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
- ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :
- -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);
- }
- }
-}
-
-#elif DWT_TYPE == 97
-
-static void synthesis_97(int length, double *lowpass, double *highpass,
- double *x) {
- static const double a_predict1 = -1.586134342;
- static const double a_update1 = -0.05298011854;
- static const double a_predict2 = 0.8829110762;
- static const double a_update2 = 0.4435068522;
- static const double s_low = 1.149604398;
- static const double s_high = 1/1.149604398;
- static const double inv_s_low = 1 / s_low;
- static const double inv_s_high = 1 / s_high;
- int i;
- double y[DWT_MAX_LENGTH];
- // Undo pack and scale
- for (i = 0; i < length / 2; i++) {
- y[i * 2] = lowpass[i] * inv_s_low;
- y[i * 2 + 1] = highpass[i] * inv_s_high;
- }
- memcpy(x, y, sizeof(*y) * length);
- // Undo update 2
- for (i = 2; i < length; i += 2) {
- x[i] -= a_update2 * (x[i-1] + x[i+1]);
- }
- x[0] -= 2 * a_update2 * x[1];
- // Undo predict 2
- for (i = 1; i < length - 2; i += 2) {
- x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] -= 2 * a_predict2 * x[length - 2];
- // Undo update 1
- for (i = 2; i < length; i += 2) {
- x[i] -= a_update1 * (x[i - 1] + x[i + 1]);
- }
- x[0] -= 2 * a_update1 * x[1];
- // Undo predict 1
- for (i = 1; i < length - 2; i += 2) {
- x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] -= 2 * a_predict1 * x[length - 2];
-}
-
-static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- double buffer[2 * DWT_MAX_LENGTH];
- double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_97(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
- synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);
- }
- }
- for (i = 0; i < height; i++)
- for (j = 0; j < width; j++)
- x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /
- (1 << DWT_PRECISION_BITS));
-}
-
-#endif // DWT_TYPE
-
-// TODO(debargha): Implement scaling differently so as not to have to use the
-// floating point 16x16 dct
-static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
-
- // step 1 and 2
- step[ 0] = input[0] + input[8];
- step[ 1] = input[0] - input[8];
-
- temp1 = input[4]*C12;
- temp2 = input[12]*C4;
-
- temp1 -= temp2;
- temp1 *= C8;
-
- step[ 2] = 2*(temp1);
-
- temp1 = input[4]*C4;
- temp2 = input[12]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- step[ 3] = 2*(temp1);
-
- temp1 = input[2]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] + input[10];
-
- step[ 4] = temp1 + temp2;
- step[ 5] = temp1 - temp2;
-
- temp1 = input[14]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] - input[10];
-
- step[ 6] = temp2 - temp1;
- step[ 7] = temp2 + temp1;
-
- // for odd input
- temp1 = input[3]*C12;
- temp2 = input[13]*C4;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[ 8] = 2*(temp1);
-
- temp1 = input[3]*C4;
- temp2 = input[13]*C12;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[ 9] = 2*(temp2);
-
- intermediate[10] = 2*(input[9]*C8);
- intermediate[11] = input[15] - input[1];
- intermediate[12] = input[15] + input[1];
- intermediate[13] = 2*((input[7]*C8));
-
- temp1 = input[11]*C12;
- temp2 = input[5]*C4;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[14] = 2*(temp2);
-
- temp1 = input[11]*C4;
- temp2 = input[5]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[15] = 2*(temp1);
-
- step[ 8] = intermediate[ 8] + intermediate[14];
- step[ 9] = intermediate[ 9] + intermediate[15];
- step[10] = intermediate[10] + intermediate[11];
- step[11] = intermediate[10] - intermediate[11];
- step[12] = intermediate[12] + intermediate[13];
- step[13] = intermediate[12] - intermediate[13];
- step[14] = intermediate[ 8] - intermediate[14];
- step[15] = intermediate[ 9] - intermediate[15];
-
- // step 3
- output[0] = step[ 0] + step[ 3];
- output[1] = step[ 1] + step[ 2];
- output[2] = step[ 1] - step[ 2];
- output[3] = step[ 0] - step[ 3];
-
- temp1 = step[ 4]*C14;
- temp2 = step[ 7]*C2;
- temp1 -= temp2;
- output[4] = (temp1);
-
- temp1 = step[ 4]*C2;
- temp2 = step[ 7]*C14;
- temp1 += temp2;
- output[7] = (temp1);
-
- temp1 = step[ 5]*C10;
- temp2 = step[ 6]*C6;
- temp1 -= temp2;
- output[5] = (temp1);
-
- temp1 = step[ 5]*C6;
- temp2 = step[ 6]*C10;
- temp1 += temp2;
- output[6] = (temp1);
-
- output[8] = step[ 8] + step[11];
- output[9] = step[ 9] + step[10];
- output[10] = step[ 9] - step[10];
- output[11] = step[ 8] - step[11];
- output[12] = step[12] + step[15];
- output[13] = step[13] + step[14];
- output[14] = step[13] - step[14];
- output[15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[0] + output[7];
- step[ 1] = output[1] + output[6];
- step[ 2] = output[2] + output[5];
- step[ 3] = output[3] + output[4];
- step[ 4] = output[3] - output[4];
- step[ 5] = output[2] - output[5];
- step[ 6] = output[1] - output[6];
- step[ 7] = output[0] - output[7];
-
- temp1 = output[8]*C7;
- temp2 = output[15]*C9;
- temp1 -= temp2;
- step[ 8] = (temp1);
-
- temp1 = output[9]*C11;
- temp2 = output[14]*C5;
- temp1 += temp2;
- step[ 9] = (temp1);
-
- temp1 = output[10]*C3;
- temp2 = output[13]*C13;
- temp1 -= temp2;
- step[10] = (temp1);
-
- temp1 = output[11]*C15;
- temp2 = output[12]*C1;
- temp1 += temp2;
- step[11] = (temp1);
-
- temp1 = output[11]*C1;
- temp2 = output[12]*C15;
- temp2 -= temp1;
- step[12] = (temp2);
-
- temp1 = output[10]*C13;
- temp2 = output[13]*C3;
- temp1 += temp2;
- step[13] = (temp1);
-
- temp1 = output[9]*C5;
- temp2 = output[14]*C11;
- temp2 -= temp1;
- step[14] = (temp2);
-
- temp1 = output[8]*C9;
- temp2 = output[15]*C7;
- temp1 += temp2;
- step[15] = (temp1);
-
- // step 5
- output[0] = (step[0] + step[15]);
- output[1] = (step[1] + step[14]);
- output[2] = (step[2] + step[13]);
- output[3] = (step[3] + step[12]);
- output[4] = (step[4] + step[11]);
- output[5] = (step[5] + step[10]);
- output[6] = (step[6] + step[ 9]);
- output[7] = (step[7] + step[ 8]);
-
- output[15] = (step[0] - step[15]);
- output[14] = (step[1] - step[14]);
- output[13] = (step[2] - step[13]);
- output[12] = (step[3] - step[12]);
- output[11] = (step[4] - step[11]);
- output[10] = (step[5] - step[10]);
- output[9] = (step[6] - step[ 9]);
- output[8] = (step[7] - step[ 8]);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch,
- int scale) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double out[16*16], out2[16*16];
- const int short_pitch = pitch >> 1;
- int i, j;
- // First transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j + i*short_pitch];
- butterfly_16x16_idct_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out[j + i*16] = temp_out[j];
- }
- // Then transform columns
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j*16 + i];
- butterfly_16x16_idct_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out2[j*16 + i] = temp_out[j];
- }
- for (i = 0; i < 16*16; ++i)
- output[i] = round(out2[i] / (128 >> scale));
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void idct8_1d_f(double *x) {
- int i, j;
- double t[8];
- static const double idctmat[64] = {
- 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127,
- 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064,
- 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064,
- -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098,
- 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162,
- -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127,
- 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098,
- 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162,
- 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098,
- 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162,
- 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161,
- -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127,
- 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065,
- -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098,
- 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127,
- 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064
- };
- for (i = 0; i < 8; ++i) {
- t[i] = 0;
- for (j = 0; j < 8; ++j)
- t[i] += idctmat[i * 8 + j] * x[j];
- }
- for (i = 0; i < 8; ++i) {
- x[i] = t[i];
- }
-}
-
-static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch,
- int scale) {
- double X[8 * 8], Y[8];
- int i, j;
- int shortpitch = pitch >> 1;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- X[i * 8 + j] = (double)coefs[i * shortpitch + j];
- }
- }
- for (i = 0; i < 8; i++)
- idct8_1d_f(X + 8 * i);
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; ++j)
- Y[j] = X[i + 8 * j];
- idct8_1d_f(Y);
- for (j = 0; j < 8; ++j)
- X[i + 8 * j] = Y[j];
- }
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale));
- }
- }
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n))
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[8 * 8];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct8x8_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8,
- sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8,
- sizeof(*buffer2) * 8);
- }
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#endif
-
-#if CONFIG_TX64X64
-void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 64x64 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[64 * 64];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16);
- }
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 16; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
-#elif DWTDCT_TYPE == DWTDCT16X16
- vp9_short_idct16x16_c_f(input + 16, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
-
- // Copying and scaling highest bands into buffer2
- for (i = 0; i < 32; ++i) {
- for (j = 32; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 32; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
-#endif // DWTDCT_TYPE
-
-#if DWT_TYPE == 26
- dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64);
-#endif
-}
-#endif // CONFIG_TX64X64
-#endif // !CONFIG_DWTDCTHYBRID
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index 241a5bcb7..233ffd8a7 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -51,8 +51,13 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
for (i = 0; i < 16; i++) {
TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
+#if CONFIG_INTHT4X4
+ vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff,
+ 32, tx_type);
+#else
vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
tx_type, 4, xd->block[i].eob);
+#endif
} else {
vp9_inverse_transform_b_4x4(xd, i, 32);
}
@@ -93,7 +98,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
if (tx_type != DCT_DCT) {
#if CONFIG_INTHT
vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,
- tx_type, 32);
+ 32, tx_type);
#else
vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
xd->block[i].eob);
@@ -108,7 +113,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
if (tx_type != DCT_DCT) {
#if CONFIG_INTHT
vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
- tx_type, 32);
+ 32, tx_type);
#else
vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
xd->block[i + 2].eob);
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 5e57228b4..a333a4b02 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -279,7 +279,7 @@ typedef struct VP9Common {
int error_resilient_mode;
int frame_parallel_decoding_mode;
- int tile_columns;
+ int tile_columns, log2_tile_columns;
int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_idx;
} VP9_COMMON;
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index d4435d872..b75525e2c 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -154,7 +154,7 @@ void vp9_build_inter_predictors_b(BLOCKD *d, int pitch,
int_mv mv;
ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ mv.as_int = d->bmi.as_mv[0].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -179,7 +179,7 @@ void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
int_mv mv;
ptr_base = *(d->base_second_pre);
- mv.as_int = d->bmi.as_mv.second.as_int;
+ mv.as_int = d->bmi.as_mv[1].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -197,7 +197,7 @@ void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
int_mv mv;
ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ mv.as_int = d->bmi.as_mv[0].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -222,7 +222,7 @@ void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
int_mv mv;
ptr_base = *(d->base_second_pre);
- mv.as_int = d->bmi.as_mv.second.as_int;
+ mv.as_int = d->bmi.as_mv[1].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -240,7 +240,7 @@ static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
int_mv mv;
ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ mv.as_int = d->bmi.as_mv[0].as_int;
ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
(mv.as_mv.col >> 3);
@@ -264,38 +264,38 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
int voffset = 20 + i * 2 + j;
int temp;
- temp = blockd[yoffset ].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 1].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 4].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 5].bmi.as_mv.first.as_mv.row;
+ temp = blockd[yoffset ].bmi.as_mv[0].as_mv.row
+ + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row
+ + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row
+ + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row;
if (temp < 0) temp -= 4;
else temp += 4;
- xd->block[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
+ xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = blockd[yoffset ].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 1].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 4].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 5].bmi.as_mv.first.as_mv.col;
+ temp = blockd[yoffset ].bmi.as_mv[0].as_mv.col
+ + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col
+ + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col
+ + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col;
if (temp < 0) temp -= 4;
else temp += 4;
- blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
- blockd[voffset].bmi.as_mv.first.as_mv.row =
- blockd[uoffset].bmi.as_mv.first.as_mv.row;
- blockd[voffset].bmi.as_mv.first.as_mv.col =
- blockd[uoffset].bmi.as_mv.first.as_mv.col;
+ blockd[voffset].bmi.as_mv[0].as_mv.row =
+ blockd[uoffset].bmi.as_mv[0].as_mv.row;
+ blockd[voffset].bmi.as_mv[0].as_mv.col =
+ blockd[uoffset].bmi.as_mv[0].as_mv.col;
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- temp = blockd[yoffset ].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 1].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 4].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 5].bmi.as_mv.second.as_mv.row;
+ temp = blockd[yoffset ].bmi.as_mv[1].as_mv.row
+ + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row
+ + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row
+ + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row;
if (temp < 0) {
temp -= 4;
@@ -303,13 +303,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = blockd[yoffset ].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 1].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 4].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 5].bmi.as_mv.second.as_mv.col;
+ temp = blockd[yoffset ].bmi.as_mv[1].as_mv.col
+ + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col
+ + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col
+ + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col;
if (temp < 0) {
temp -= 4;
@@ -317,13 +317,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
- blockd[voffset].bmi.as_mv.second.as_mv.row =
- blockd[uoffset].bmi.as_mv.second.as_mv.row;
- blockd[voffset].bmi.as_mv.second.as_mv.col =
- blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ blockd[voffset].bmi.as_mv[1].as_mv.row =
+ blockd[uoffset].bmi.as_mv[1].as_mv.row;
+ blockd[voffset].bmi.as_mv[1].as_mv.col =
+ blockd[uoffset].bmi.as_mv[1].as_mv.col;
}
}
}
@@ -332,7 +332,7 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int)
build_inter_predictors2b(xd, d0, 8);
else {
vp9_build_inter_predictors_b(d0, 8, &xd->subpix);
@@ -717,15 +717,15 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
blockd[10].bmi = xd->mode_info_context->bmi[10];
if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[0].as_mv, xd);
if (mbmi->second_ref_frame > 0) {
- clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[10].bmi.as_mv[1].as_mv, xd);
}
}
@@ -750,15 +750,15 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[0].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[0].as_mv, xd);
if (mbmi->second_ref_frame > 0) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[1].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv[1].as_mv, xd);
}
}
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int)
build_inter_predictors2b(xd, d0, 16);
else {
vp9_build_inter_predictors_b(d0, 16, &xd->subpix);
@@ -776,7 +776,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ if (d0->bmi.as_mv[0].as_int == d1->bmi.as_mv[0].as_int)
build_inter_predictors2b(xd, d0, 8);
else {
vp9_build_inter_predictors_b(d0, 8, &xd->subpix);
@@ -803,44 +803,44 @@ void build_4x4uvmvs(MACROBLOCKD *xd) {
int temp;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.row;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.row;
if (temp < 0) temp -= 4;
else temp += 4;
- blockd[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.col;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.col;
if (temp < 0) temp -= 4;
else temp += 4;
- blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
// if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+ clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd);
// if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+ clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd);
- blockd[voffset].bmi.as_mv.first.as_mv.row =
- blockd[uoffset].bmi.as_mv.first.as_mv.row;
- blockd[voffset].bmi.as_mv.first.as_mv.col =
- blockd[uoffset].bmi.as_mv.first.as_mv.col;
+ blockd[voffset].bmi.as_mv[0].as_mv.row =
+ blockd[uoffset].bmi.as_mv[0].as_mv.row;
+ blockd[voffset].bmi.as_mv[0].as_mv.col =
+ blockd[uoffset].bmi.as_mv[0].as_mv.col;
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.row;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.row;
if (temp < 0) {
temp -= 4;
@@ -848,13 +848,13 @@ void build_4x4uvmvs(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) &
xd->fullpixel_mask;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.col;
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.col;
if (temp < 0) {
temp -= 4;
@@ -862,21 +862,21 @@ void build_4x4uvmvs(MACROBLOCKD *xd) {
temp += 4;
}
- blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
+ blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) &
xd->fullpixel_mask;
// if (mbmi->need_to_clamp_mvs)
clamp_uvmv_to_umv_border(
- &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+ &blockd[uoffset].bmi.as_mv[1].as_mv, xd);
// if (mbmi->need_to_clamp_mvs)
clamp_uvmv_to_umv_border(
- &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+ &blockd[uoffset].bmi.as_mv[1].as_mv, xd);
- blockd[voffset].bmi.as_mv.second.as_mv.row =
- blockd[uoffset].bmi.as_mv.second.as_mv.row;
- blockd[voffset].bmi.as_mv.second.as_mv.col =
- blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ blockd[voffset].bmi.as_mv[1].as_mv.row =
+ blockd[uoffset].bmi.as_mv[1].as_mv.row;
+ blockd[voffset].bmi.as_mv[1].as_mv.col =
+ blockd[uoffset].bmi.as_mv[1].as_mv.col;
}
}
}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 02f8b6614..3bd1f250f 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -300,10 +300,15 @@ prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
specialize vp9_short_idct1_32x32
#if CONFIG_INTHT
-prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int tx_type, int pitch"
+prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int pitch, int tx_type"
specialize vp9_short_iht8x8
#endif
+#if CONFIG_INTHT4X4
+prototype void vp9_short_iht4x4 "int16_t *input, int16_t *output, int pitch, int tx_type"
+specialize vp9_short_iht4x4
+#endif
+
prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"
specialize vp9_ihtllm
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
new file mode 100644
index 000000000..02e0d1461
--- /dev/null
+++ b/vp9/common/vp9_tile_common.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_tile_common.h"
+
+void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
+ int *max_tile_off) {
+ const int log2_n_tiles = cm->log2_tile_columns;
+ const int tile_idx = cm->cur_tile_idx;
+ const int mb_cols = cm->mb_cols;
+ const int sb_cols = (mb_cols + 3) >> 2;
+ const int sb_off1 = (tile_idx * sb_cols) >> log2_n_tiles;
+ const int sb_off2 = ((tile_idx + 1) * sb_cols) >> log2_n_tiles;
+
+ *min_tile_off = (sb_off1 << 2) > mb_cols ? mb_cols : (sb_off1 << 2);
+ *max_tile_off = (sb_off2 << 2) > mb_cols ? mb_cols : (sb_off2 << 2);
+}
+
+#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
+#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
+
+void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,
+ int *delta_log2_n_tiles) {
+ const int sb_cols = (cm->mb_cols + 3) >> 2;
+ int min_log2_n_tiles, max_log2_n_tiles;
+
+ for (max_log2_n_tiles = 0;
+ (sb_cols >> max_log2_n_tiles) >= MIN_TILE_WIDTH_SBS;
+ max_log2_n_tiles++) {}
+ for (min_log2_n_tiles = 0;
+ (MAX_TILE_WIDTH_SBS << min_log2_n_tiles) < sb_cols;
+ min_log2_n_tiles++) {}
+
+ *min_log2_n_tiles_ptr = min_log2_n_tiles;
+ *delta_log2_n_tiles = max_log2_n_tiles - min_log2_n_tiles;
+}
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
new file mode 100644
index 000000000..653b6b4f6
--- /dev/null
+++ b/vp9/common/vp9_tile_common.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_TILE_COMMON_H_
+#define VP9_COMMON_VP9_TILE_COMMON_H_
+
+#include "vp9/common/vp9_onyxc_int.h"
+
+#define MIN_TILE_WIDTH 256
+#define MAX_TILE_WIDTH 4096
+
+extern void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
+ int *max_tile_off);
+
+extern void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles,
+ int *delta_log2_n_tiles);
+
+#endif // VP9_COMMON_VP9_TILE_COMMON_H_
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 3e2346f29..fbc95b6ce 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -65,6 +65,20 @@ void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
unsigned int output_height,
const short *filter);
+void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
@@ -87,6 +101,14 @@ void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
dst += 8;
w -= 8;
}
+ while (w >= 4) {
+ vp9_filter_block1d4_h8_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
if (w) {
vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
@@ -117,6 +139,14 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride,
dst += 8;
w -= 8;
}
+ while (w >= 4) {
+ vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
if (w) {
vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
@@ -156,6 +186,15 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
h, filter_y);
return;
}
+ if (w == 4) {
+ vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d4_v8_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
+ }
}
vp9_convolve8_c(src, src_stride, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index c6d65e904..5f039454a 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -30,6 +30,124 @@
; unsigned int output_height,
; short *filter
;)
+global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+ lea rbx, [rdx + rdx*4]
+ add rbx, rdx ;pitch * 6
+
+.vp9_filter_block1d4_v8_ssse3_loop:
+ movd xmm0, [rsi] ;A
+ movd xmm1, [rsi + rdx] ;B
+ movd xmm2, [rsi + rdx * 2] ;C
+ movd xmm3, [rax + rdx * 2] ;D
+ movd xmm4, [rsi + rdx * 4] ;E
+ movd xmm5, [rax + rdx * 4] ;F
+
+ punpcklbw xmm0, xmm1 ;A B
+ punpcklbw xmm2, xmm3 ;C D
+ punpcklbw xmm4, xmm5 ;E F
+
+ movd xmm6, [rsi + rbx] ;G
+ movd xmm7, [rax + rbx] ;H
+
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm2, k2k3
+ punpcklbw xmm6, xmm7 ;G H
+ pmaddubsw xmm4, k4k5
+ pmaddubsw xmm6, k6k7
+
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ paddsw xmm4, xmm6
+ paddsw xmm0, xmm4
+
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ add rsi, rdx
+ add rax, rdx
+
+ movd [rdi], xmm0
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d4_v8_ssse3_loop
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d8_v8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; short *filter
+;)
global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
sym(vp9_filter_block1d8_v8_ssse3):
push rbp
@@ -289,6 +407,110 @@ sym(vp9_filter_block1d16_v8_ssse3):
pop rbp
ret
+;void vp9_filter_block1d4_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+.filter_block1d4_h8_rowloop_ssse3:
+ movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
+
+ movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
+ punpcklqdq xmm0, xmm3
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf_t0t1)]
+ pmaddubsw xmm0, k0k1
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf_t2t3)]
+ pmaddubsw xmm1, k2k3
+
+ movdqa xmm4, xmm2
+ pshufb xmm2, [GLOBAL(shuf_t4t5)]
+ pmaddubsw xmm2, k4k5
+
+ pshufb xmm4, [GLOBAL(shuf_t6t7)]
+ pmaddubsw xmm4, k6k7
+
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ lea rsi, [rsi + rax]
+ movd [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz .filter_block1d4_h8_rowloop_ssse3
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
;void vp9_filter_block1d8_h8_ssse3
;(
; unsigned char *src_ptr,
@@ -340,7 +562,7 @@ sym(vp9_filter_block1d8_h8_ssse3):
pshufd xmm5, xmm5, 0
movdqa k4k5, xmm2
movdqa k6k7, xmm3
-; movdqa krd, xmm5
+ movdqa krd, xmm5
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rdx, dword ptr arg(3) ;output_pitch
@@ -349,10 +571,7 @@ sym(vp9_filter_block1d8_h8_ssse3):
.filter_block1d8_h8_rowloop_ssse3:
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
-; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
-;note: if we create a k0_k7 filter, we can save a pshufb
-; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
punpcklqdq xmm0, xmm3
movdqa xmm1, xmm0
@@ -371,9 +590,9 @@ sym(vp9_filter_block1d8_h8_ssse3):
pmaddubsw xmm4, k6k7
paddsw xmm0, xmm1
- paddsw xmm0, xmm2
- paddsw xmm0, xmm5
paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -456,10 +675,7 @@ sym(vp9_filter_block1d16_h8_ssse3):
.filter_block1d16_h8_rowloop_ssse3:
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
-; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
-;note: if we create a k0_k7 filter, we can save a pshufb
-; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
punpcklqdq xmm0, xmm3
movdqa xmm1, xmm0
@@ -486,10 +702,7 @@ sym(vp9_filter_block1d16_h8_ssse3):
movq xmm3, [rsi + 5]
-; movq xmm7, [rsi + 12]
movq xmm7, [rsi + 13]
-;note: same as above
-; punpcklbw xmm3, xmm7
punpcklqdq xmm3, xmm7
movdqa xmm1, xmm3
@@ -508,9 +721,9 @@ sym(vp9_filter_block1d16_h8_ssse3):
pmaddubsw xmm4, k6k7
paddsw xmm3, xmm1
+ paddsw xmm3, xmm4
paddsw xmm3, xmm2
paddsw xmm3, krd
- paddsw xmm3, xmm4
psraw xmm3, 7
packuswb xmm3, xmm3
punpcklqdq xmm0, xmm3