diff options
author | Deb Mukherjee <debargha@google.com> | 2013-01-09 06:26:54 -0800 |
---|---|---|
committer | Deb Mukherjee <debargha@google.com> | 2013-01-12 16:00:53 -0800 |
commit | 516db21c2c903a7d9b0b5fc156277e9bb538ade9 (patch) | |
tree | b7c3fd5a4a85d039edcee4579f7f03390972b8b0 /vp9/common | |
parent | e42b280e1170d485af47000f411ff45d56af33bd (diff) | |
download | libvpx-516db21c2c903a7d9b0b5fc156277e9bb538ade9.tar libvpx-516db21c2c903a7d9b0b5fc156277e9bb538ade9.tar.gz libvpx-516db21c2c903a7d9b0b5fc156277e9bb538ade9.tar.bz2 libvpx-516db21c2c903a7d9b0b5fc156277e9bb538ade9.zip |
Further enhancements/fixes on dct/dwt hybrid txfm
Fixes some scaling issues. Adds an option to only compute the
dct on the low-low subband for 32x32 and 64x64 blocks using
only a single 16x16 dct after 1 and 2 wavelet decomposition
levels respectively. Also adds an option to use a 8x8 dct
as building block.
Currenlty with the 2/6 filter and with a single 16x16 dct on
the low low band, the reuslts compared to full 32x32 dct is
as follows:
derf: -0.15%
yt: -0.29%
std-hd: -0.18%
hd: -0.6%
These are my current recommended settings, since the 2/6 filter
is very simple.
Results with 8x8 dct are about 0.3% worse.
Change-Id: I00100cdc96e32deced591985785ef0d06f325e44
Diffstat (limited to 'vp9/common')
-rw-r--r-- | vp9/common/vp9_blockd.h | 12 | ||||
-rw-r--r-- | vp9/common/vp9_entropy.c | 412 | ||||
-rw-r--r-- | vp9/common/vp9_idctllm.c | 218 |
3 files changed, 613 insertions, 29 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b5f9bd2c4..18c2ae0a8 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -47,6 +47,18 @@ void vpx_log(const char *format, ...); #define MAX_MV_REFS 9 #define MAX_MV_REF_CANDIDATES 4 +#if CONFIG_DWTDCTHYBRID +#define DWT_MAX_LENGTH 64 +#define DWT_TYPE 26 // 26/53/97 +#define DWT_PRECISION_BITS 2 +#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) + +#define DWTDCT16X16 0 +#define DWTDCT16X16_LEAN 1 +#define DWTDCT8X8 2 +#define DWTDCT_TYPE DWTDCT16X16_LEAN +#endif + typedef struct { int r, c; } POS; diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 2200cc3f9..352e17c0c 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -70,7 +70,8 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 12, 13, 14, 15 }; -DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, +DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -143,6 +144,214 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { }; #if CONFIG_DWTDCTHYBRID + +#if DWTDCT_TYPE == DWTDCT16X16_LEAN +DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 1, 32, 64, 33, 2, 3, 34, + 65, 96, 128, 97, 66, 35, 4, 5, + 36, 67, 98, 129, 160, 192, 161, 130, + 99, 68, 37, 6, 7, 38, 69, 100, + 131, 162, 193, 224, 256, 225, 194, 163, + 132, 101, 70, 39, 8, 9, 40, 71, + 102, 133, 164, 195, 226, 257, 288, 320, + 289, 258, 227, 196, 165, 134, 103, 72, + 41, 10, 11, 42, 73, 104, 135, 166, + 197, 228, 259, 290, 321, 352, 384, 353, + 322, 291, 260, 229, 198, 167, 136, 105, + 74, 43, 12, 13, 44, 75, 106, 137, + 168, 199, 230, 261, 292, 323, 354, 385, + 416, 448, 417, 386, 355, 324, 293, 262, + 231, 200, 169, 138, 107, 76, 45, 14, + 15, 46, 77, 108, 139, 170, 201, 232, + 263, 294, 325, 356, 387, 418, 449, 480, + 481, 450, 419, 388, 357, 326, 295, 264, + 233, 202, 171, 140, 109, 78, 47, 79, + 110, 141, 172, 203, 234, 265, 296, 327, + 358, 389, 420, 451, 482, 483, 452, 421, + 390, 359, 328, 297, 266, 235, 204, 173, + 142, 111, 143, 174, 205, 236, 267, 298, + 329, 360, 391, 422, 453, 484, 485, 454, + 423, 392, 361, 330, 299, 268, 237, 206, + 175, 207, 238, 269, 300, 331, 362, 393, + 424, 455, 486, 487, 456, 425, 394, 363, + 332, 301, 270, 239, 271, 302, 333, 364, + 395, 426, 457, 488, 489, 458, 427, 396, + 365, 334, 303, 335, 366, 397, 428, 459, + 490, 491, 460, 429, 398, 367, 399, 430, + 461, 492, 493, 462, 431, 463, 494, 495, + + 16, 512, 528, 17, 513, 529, 48, 544, + 560, 80, 576, 592, 49, 545, 561, 18, + 514, 530, 19, 515, 531, 50, 546, 562, + 81, 577, 593, 112, 608, 624, 144, 640, + 656, 113, 609, 625, 82, 578, 594, 51, + 547, 563, 20, 516, 532, 21, 517, 533, + 52, 548, 564, 83, 579, 595, 114, 610, + 626, 145, 641, 657, 176, 672, 688, 208, + 704, 720, 177, 673, 689, 146, 642, 658, + 115, 611, 627, 84, 580, 596, 53, 549, + 565, 22, 518, 534, 23, 519, 535, 54, + 550, 566, 85, 581, 597, 116, 612, 628, + 147, 643, 659, 178, 674, 690, 209, 705, + 721, 240, 736, 752, 272, 768, 784, 241, + 737, 753, 210, 706, 722, 179, 675, 691, + 148, 644, 660, 117, 613, 629, 86, 582, + 598, 55, 551, 567, 24, 520, 536, 25, + 521, 537, 56, 552, 568, 87, 583, 599, + 118, 614, 630, 149, 645, 661, 180, 676, + 692, 211, 707, 723, 242, 738, 754, 273, + 769, 785, 304, 800, 816, 336, 832, 848, + 305, 801, 817, 274, 770, 786, 243, 739, + 755, 212, 708, 724, 181, 677, 693, 150, + 646, 662, 119, 615, 631, 88, 584, 600, + 57, 553, 569, 26, 522, 538, 27, 523, + 539, 58, 554, 570, 89, 585, 601, 120, + 616, 632, 151, 647, 663, 182, 678, 694, + 213, 709, 725, 244, 740, 756, 275, 771, + 787, 306, 802, 818, 337, 833, 849, 368, + 864, 880, 400, 896, 912, 369, 865, 881, + 338, 834, 850, 307, 803, 819, 276, 772, + 788, 245, 741, 757, 214, 710, 726, 183, + + 679, 695, 152, 648, 664, 121, 617, 633, + 90, 586, 602, 59, 555, 571, 28, 524, + 540, 29, 525, 541, 60, 556, 572, 91, + 587, 603, 122, 618, 634, 153, 649, 665, + 184, 680, 696, 215, 711, 727, 246, 742, + 758, 277, 773, 789, 308, 804, 820, 339, + 835, 851, 370, 866, 882, 401, 897, 913, + 432, 928, 944, 464, 960, 976, 433, 929, + 945, 402, 898, 914, 371, 867, 883, 340, + 836, 852, 309, 805, 821, 278, 774, 790, + 247, 743, 759, 216, 712, 728, 185, 681, + 697, 154, 650, 666, 123, 619, 635, 92, + 588, 604, 61, 557, 573, 30, 526, 542, + 31, 527, 543, 62, 558, 574, 93, 589, + 605, 124, 620, 636, 155, 651, 667, 186, + 682, 698, 217, 713, 729, 248, 744, 760, + 279, 775, 791, 310, 806, 822, 341, 837, + 853, 372, 868, 884, 403, 899, 915, 434, + 930, 946, 465, 961, 977, 496, 992, 1008, + 497, 993, 1009, 466, 962, 978, 435, 931, + 947, 404, 900, 916, 373, 869, 885, 342, + 838, 854, 311, 807, 823, 280, 776, 792, + 249, 745, 761, 218, 714, 730, 187, 683, + 699, 156, 652, 668, 125, 621, 637, 94, + 590, 606, 63, 559, 575, 95, 591, 607, + 126, 622, 638, 157, 653, 669, 188, 684, + 700, 219, 715, 731, 250, 746, 762, 281, + 777, 793, 312, 808, 824, 343, 839, 855, + 374, 870, 886, 405, 901, 917, 436, 932, + 948, 467, 963, 979, 498, 994, 1010, 499, + 995, 1011, 468, 964, 980, 437, 933, 949, + 406, 902, 918, 375, 871, 887, 344, 840, + + 856, 313, 809, 825, 282, 778, 794, 251, + 747, 763, 220, 716, 732, 189, 685, 701, + 158, 654, 670, 127, 623, 639, 159, 655, + 671, 190, 686, 702, 221, 717, 733, 252, + 748, 764, 283, 779, 795, 314, 810, 826, + 345, 841, 857, 376, 872, 888, 407, 903, + 919, 438, 934, 950, 469, 965, 981, 500, + 996, 1012, 501, 997, 1013, 470, 966, 982, + 439, 935, 951, 408, 904, 920, 377, 873, + 889, 346, 842, 858, 315, 811, 827, 284, + 780, 796, 253, 749, 765, 222, 718, 734, + 191, 687, 703, 223, 719, 735, 254, 750, + 766, 285, 781, 797, 316, 812, 828, 347, + 843, 859, 378, 874, 890, 409, 905, 921, + 440, 936, 952, 471, 967, 983, 502, 998, + 1014, 503, 999, 1015, 472, 968, 984, 441, + 937, 953, 410, 906, 922, 379, 875, 891, + 348, 844, 860, 317, 813, 829, 286, 782, + 798, 255, 751, 767, 287, 783, 799, 318, + 814, 830, 349, 845, 861, 380, 876, 892, + 411, 907, 923, 442, 938, 954, 473, 969, + 985, 504, 1000, 1016, 505, 1001, 1017, 474, + 970, 986, 443, 939, 955, 412, 908, 924, + 381, 877, 893, 350, 846, 862, 319, 815, + 831, 351, 847, 863, 382, 878, 894, 413, + 909, 925, 444, 940, 956, 475, 971, 987, + 506, 1002, 1018, 507, 1003, 1019, 476, 972, + 988, 445, 941, 957, 414, 910, 926, 383, + 879, 895, 415, 911, 927, 446, 942, 958, + 477, 973, 989, 508, 1004, 1020, 509, 1005, + 1021, 478, 974, 990, 447, 943, 959, 479, + 975, 991, 510, 1006, 1022, 511, 1007, 1023, +}; + +#elif DWTDCT_TYPE == DWTDCT16X16 + DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 6, @@ -351,7 +560,206 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 975, 991, 510, 1006, 1022, 511, 1007, 1023, }; -#else // CONFIG_DWTDCTHYBRID +#elif DWTDCT_TYPE == DWTDCT8X8 + +DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { + 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + + 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 1, 32, 64, 33, 2, 3, 34, + 65, 96, 128, 97, 66, 35, 4, 5, + 36, 67, 98, 129, 160, 192, 161, 130, + 99, 68, 37, 6, 7, 38, 69, 100, + 131, 162, 193, 224, 225, 194, 163, 132, + 101, 70, 39, 71, 102, 133, 164, 195, + 226, 227, 196, 165, 134, 103, 135, 166, + 197, 228, 229, 198, 167, 199, 230, 231, + + 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328, + 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298, + 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361, + 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269, + 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393, + 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394, + 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270, + 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364, + 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488, + 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396, + 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335, + 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459, + 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429, + 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430, + 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462, + 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495, + + 16, 512, 528, 17, 513, 529, 18, 514, + 530, 19, 515, 531, 20, 516, 532, 21, + 517, 533, 22, 518, 534, 23, 519, 535, + 24, 520, 536, 25, 521, 537, 26, 522, + 538, 27, 523, 539, 28, 524, 540, 29, + 525, 541, 30, 526, 542, 31, 527, 543, + 48, 544, 560, 49, 545, 561, 50, 546, + 562, 51, 547, 563, 52, 548, 564, 53, + 549, 565, 54, 550, 566, 55, 551, 567, + 56, 552, 568, 57, 553, 569, 58, 554, + 570, 59, 555, 571, 60, 556, 572, 61, + 557, 573, 62, 558, 574, 63, 559, 575, + 80, 576, 592, 81, 577, 593, 82, 578, + 594, 83, 579, 595, 84, 580, 596, 85, + 581, 597, 86, 582, 598, 87, 583, 599, + 88, 584, 600, 89, 585, 601, 90, 586, + 602, 91, 587, 603, 92, 588, 604, 93, + 589, 605, 94, 590, 606, 95, 591, 607, + 112, 608, 624, 113, 609, 625, 114, 610, + 626, 115, 611, 627, 116, 612, 628, 117, + 613, 629, 118, 614, 630, 119, 615, 631, + 120, 616, 632, 121, 617, 633, 122, 618, + 634, 123, 619, 635, 124, 620, 636, 125, + 621, 637, 126, 622, 638, 127, 623, 639, + 144, 640, 656, 145, 641, 657, 146, 642, + 658, 147, 643, 659, 148, 644, 660, 149, + 645, 661, 150, 646, 662, 151, 647, 663, + 152, 648, 664, 153, 649, 665, 154, 650, + 666, 155, 651, 667, 156, 652, 668, 157, + 653, 669, 158, 654, 670, 159, 655, 671, + 176, 672, 688, 177, 673, 689, 178, 674, + 690, 179, 675, 691, 180, 676, 692, 181, + 677, 693, 182, 678, 694, 183, 679, 695, + 184, 680, 696, 185, 681, 697, 186, 682, + 698, 187, 683, 699, 188, 684, 700, 189, + 685, 701, 190, 686, 702, 191, 687, 703, + 208, 704, 720, 209, 705, 721, 210, 706, + 722, 211, 707, 723, 212, 708, 724, 213, + 709, 725, 214, 710, 726, 215, 711, 727, + 216, 712, 728, 217, 713, 729, 218, 714, + 730, 219, 715, 731, 220, 716, 732, 221, + 717, 733, 222, 718, 734, 223, 719, 735, + 240, 736, 752, 241, 737, 753, 242, 738, + 754, 243, 739, 755, 244, 740, 756, 245, + 741, 757, 246, 742, 758, 247, 743, 759, + 248, 744, 760, 249, 745, 761, 250, 746, + 762, 251, 747, 763, 252, 748, 764, 253, + 749, 765, 254, 750, 766, 255, 751, 767, + 272, 768, 784, 273, 769, 785, 274, 770, + 786, 275, 771, 787, 276, 772, 788, 277, + 773, 789, 278, 774, 790, 279, 775, 791, + 280, 776, 792, 281, 777, 793, 282, 778, + 794, 283, 779, 795, 284, 780, 796, 285, + 781, 797, 286, 782, 798, 287, 783, 799, + 304, 800, 816, 305, 801, 817, 306, 802, + 818, 307, 803, 819, 308, 804, 820, 309, + 805, 821, 310, 806, 822, 311, 807, 823, + 312, 808, 824, 313, 809, 825, 314, 810, + 826, 315, 811, 827, 316, 812, 828, 317, + 813, 829, 318, 814, 830, 319, 815, 831, + 336, 832, 848, 337, 833, 849, 338, 834, + 850, 339, 835, 851, 340, 836, 852, 341, + 837, 853, 342, 838, 854, 343, 839, 855, + 344, 840, 856, 345, 841, 857, 346, 842, + 858, 347, 843, 859, 348, 844, 860, 349, + 845, 861, 350, 846, 862, 351, 847, 863, + 368, 864, 880, 369, 865, 881, 370, 866, + 882, 371, 867, 883, 372, 868, 884, 373, + 869, 885, 374, 870, 886, 375, 871, 887, + 376, 872, 888, 377, 873, 889, 378, 874, + 890, 379, 875, 891, 380, 876, 892, 381, + 877, 893, 382, 878, 894, 383, 879, 895, + 400, 896, 912, 401, 897, 913, 402, 898, + 914, 403, 899, 915, 404, 900, 916, 405, + 901, 917, 406, 902, 918, 407, 903, 919, + 408, 904, 920, 409, 905, 921, 410, 906, + 922, 411, 907, 923, 412, 908, 924, 413, + 909, 925, 414, 910, 926, 415, 911, 927, + 432, 928, 944, 433, 929, 945, 434, 930, + 946, 435, 931, 947, 436, 932, 948, 437, + 933, 949, 438, 934, 950, 439, 935, 951, + 440, 936, 952, 441, 937, 953, 442, 938, + 954, 443, 939, 955, 444, 940, 956, 445, + 941, 957, 446, 942, 958, 447, 943, 959, + 464, 960, 976, 465, 961, 977, 466, 962, + 978, 467, 963, 979, 468, 964, 980, 469, + 965, 981, 470, 966, 982, 471, 967, 983, + 472, 968, 984, 473, 969, 985, 474, 970, + 986, 475, 971, 987, 476, 972, 988, 477, + 973, 989, 478, 974, 990, 479, 975, 991, + 496, 992, 1008, 497, 993, 1009, 498, 994, + 1010, 499, 995, 1011, 500, 996, 1012, 501, + 997, 1013, 502, 998, 1014, 503, 999, 1015, + 504, 1000, 1016, 505, 1001, 1017, 506, 1002, + 1018, 507, 1003, 1019, 508, 1004, 1020, 509, + 1005, 1021, 510, 1006, 1022, 511, 1007, 1023, +}; +#endif + +#else DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 0e6a6447c..106ef9c19 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -1536,6 +1536,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { #if !CONFIG_DWTDCTHYBRID #define DownshiftMultiplyBy2(x) x * 2 #define DownshiftMultiply(x) x + static void idct16(double *input, double *output, int stride) { static const double C1 = 0.995184726672197; static const double C2 = 0.98078528040323; @@ -1738,6 +1739,7 @@ static void idct16(double *input, double *output, int stride) { output[stride*9] = step[6] - step[ 9]; output[stride*8] = step[7] - step[ 8]; } + static void butterfly_32_idct_1d(double *input, double *output, int stride) { static const double C1 = 0.998795456205; // cos(pi * 1 / 64) static const double C3 = 0.989176509965; // cos(pi * 3 / 64) @@ -1878,12 +1880,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -#else // CONFIG_DWTDCTHYBRID - -#define DWT_MAX_LENGTH 32 -#define DWT_TYPE 26 // 26/53/97 -#define DWT_PRECISION_BITS 2 -#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) +#else // !CONFIG_DWTDCTHYBRID #if DWT_TYPE == 53 @@ -2388,6 +2385,72 @@ static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch, vp9_clear_system_state(); // Make it simd safe : __asm emms; } +static void idct8_1d(double *x) { + int i, j; + double t[8]; + static const double idctmat[64] = { + 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127, + 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064, + 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064, + -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098, + 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162, + -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127, + 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098, + 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162, + 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098, + 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162, + 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161, + -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127, + 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065, + -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098, + 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127, + 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064 + }; + for (i = 0; i < 8; ++i) { + t[i] = 0; + for (j = 0; j < 8; ++j) + t[i] += idctmat[i * 8 + j] * x[j]; + } + for (i = 0; i < 8; ++i) { + x[i] = t[i]; + } +} + +static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch, + int scale) { + double X[8 * 8], Y[8]; + int i, j; + int shortpitch = pitch >> 1; + + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + X[i * 8 + j] = (double)coefs[i * shortpitch + j]; + } + } + for (i = 0; i < 8; i++) + idct8_1d(X + 8 * i); + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; ++j) + Y[j] = X[i + 8 * j]; + idct8_1d(Y); + for (j = 0; j < 8; ++j) + X[i + 8 * j] = Y[j]; + } + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale)); + } + } + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} + +#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n)) + +#if DWTDCT_TYPE == DWTDCT16X16_LEAN + void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { // assume output is a 32x32 buffer // Temporary buffer to hold a 16x16 block for 16x16 inverse dct @@ -2396,7 +2459,47 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { int16_t buffer2[32 * 32]; // Note: pitch is in bytes, short_pitch is in short units const int short_pitch = pitch >> 1; - int i; + int i, j; + + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the idct16x16 function + vp9_short_idct16x16_c_f(input, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); + } + for (i = 0; i < 16; ++i) { + for (j = 16; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } + for (i = 16; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } +#if DWT_TYPE == 26 + dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 97 + dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 53 + dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); +#endif +} + +#elif DWTDCT_TYPE == DWTDCT16X16 + +void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { + // assume output is a 32x32 buffer + // Temporary buffer to hold a 16x16 block for 16x16 inverse dct + int16_t buffer[16 * 16]; + // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt + int16_t buffer2[32 * 32]; + // Note: pitch is in bytes, short_pitch is in short units + const int short_pitch = pitch >> 1; + int i, j; // TODO(debargha): Implement more efficiently by adding output pitch // argument to the idct16x16 function @@ -2431,6 +2534,66 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { #endif } +#elif DWTDCT_TYPE == DWTDCT8X8 + +void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { + // assume output is a 32x32 buffer + // Temporary buffer to hold a 16x16 block for 16x16 inverse dct + int16_t buffer[8 * 8]; + // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt + int16_t buffer2[32 * 32]; + // Note: pitch is in bytes, short_pitch is in short units + const int short_pitch = pitch >> 1; + int i, j; + + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the idct16x16 function + vp9_short_idct8x8_c_f(input, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8); + } + vp9_short_idct8x8_c_f(input + 8, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8); + } + vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8, + sizeof(*buffer2) * 8); + } + vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8, + sizeof(*buffer2) * 8); + } + for (i = 0; i < 16; ++i) { + for (j = 16; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } + for (i = 16; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } +#if DWT_TYPE == 26 + dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 97 + dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 53 + dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32); +#endif +} + +#endif + +#if CONFIG_TX64X64 void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { // assume output is a 64x64 buffer // Temporary buffer to hold a 16x16 block for 16x16 inverse dct @@ -2448,6 +2611,20 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { for (i = 0; i < 16; ++i) { vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16); } +#if DWTDCT_TYPE == DWTDCT16X16_LEAN + for (i = 0; i < 16; ++i) { + for (j = 16; j < 64; ++j) { + buffer2[i * 64 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); + } + } + for (i = 16; i < 64; ++i) { + for (j = 0; j < 64; ++j) { + buffer2[i * 64 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); + } + } +#elif DWTDCT_TYPE == DWTDCT16X16 vp9_short_idct16x16_c_f(input + 16, buffer, pitch, 2 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) { @@ -2467,33 +2644,19 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { } // Copying and scaling highest bands into buffer2 -#if DWT_PRECISION_BITS < 1 for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 64 + 32 + j] = - input[i * short_pitch + 32 + j] >> (1 - DWT_PRECISION_BITS); - } - } - for (i = 0; i < 32; ++i) { - for (j = 0; j < 64; ++j) { + for (j = 32; j < 64; ++j) { buffer2[i * 64 + j] = - input[(i + 32) * short_pitch + j] >> (1 - DWT_PRECISION_BITS); + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); } } -#else - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 64 + 32 + j] = - input[i * short_pitch + 32 + j] << (DWT_PRECISION_BITS - 1); - } - } - for (i = 0; i < 32; ++i) { + for (i = 32; i < 64; ++i) { for (j = 0; j < 64; ++j) { buffer2[i * 64 + j] = - input[(i + 32) * short_pitch + j] << (DWT_PRECISION_BITS - 1); + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); } } -#endif +#endif // DWTDCT_TYPE #if DWT_TYPE == 26 dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64); @@ -2503,4 +2666,5 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64); #endif } -#endif // CONFIG_DWTDCTHYBRID +#endif // CONFIG_TX64X64 +#endif // !CONFIG_DWTDCTHYBRID |