diff options
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/alloccommon.c | 18 | ||||
-rw-r--r-- | vp8/common/blockd.h | 20 | ||||
-rw-r--r-- | vp8/common/coefupdateprobs.h | 177 | ||||
-rw-r--r-- | vp8/common/common.h | 3 | ||||
-rw-r--r-- | vp8/common/defaultcoefcounts.c | 179 | ||||
-rw-r--r-- | vp8/common/defaultcoefcounts.h | 7 | ||||
-rw-r--r-- | vp8/common/entropy.c | 67 | ||||
-rw-r--r-- | vp8/common/entropy.h | 21 | ||||
-rw-r--r-- | vp8/common/generic/systemdependent.c | 18 | ||||
-rw-r--r-- | vp8/common/idct.h | 36 | ||||
-rw-r--r-- | vp8/common/idctllm.c | 346 | ||||
-rw-r--r-- | vp8/common/invtrans.c | 91 | ||||
-rw-r--r-- | vp8/common/invtrans.h | 7 | ||||
-rw-r--r-- | vp8/common/maskingmv.c | 855 | ||||
-rw-r--r-- | vp8/common/onyxc_int.h | 4 | ||||
-rw-r--r-- | vp8/common/quant_common.c | 45 | ||||
-rw-r--r-- | vp8/common/reconintra4x4.c | 16 | ||||
-rw-r--r-- | vp8/common/x86/mask_sse3.asm | 484 |
18 files changed, 2350 insertions, 44 deletions
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index 376707ec6..869f6e20f 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -148,7 +148,16 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) } void vp8_setup_version(VP8_COMMON *cm) { - switch (cm->version) + if (cm->version & 0x4) + { + if (!CONFIG_EXPERIMENTAL) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Bitstream was created by an experimental " + "encoder"); + cm->experimental = 1; + } + + switch (cm->version & 0x3) { case 0: cm->no_lpf = 0; @@ -174,13 +183,6 @@ void vp8_setup_version(VP8_COMMON *cm) cm->use_bilinear_mc_filter = 1; cm->full_pixel = 1; break; - default: - /*4,5,6,7 are reserved for future use*/ - cm->no_lpf = 0; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 0; - cm->full_pixel = 0; - break; } } void vp8_create_common(VP8_COMMON *oci) diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 84c52773c..d2ef84256 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -20,6 +20,7 @@ void vpx_log(const char *format, ...); #include "treecoder.h" #include "subpixel.h" #include "vpx_ports/mem.h" +#include "common.h" #define TRUE 1 #define FALSE 0 @@ -29,6 +30,7 @@ void vpx_log(const char *format, ...); #define DCPREDCNTTHRESH 3 #define MB_FEATURE_TREE_PROBS 3 + #define MAX_MB_SEGMENTS 4 #define MAX_REF_LF_DELTAS 4 @@ -64,6 +66,10 @@ extern const unsigned char vp8_block2above[25]; #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ Dest = ((A)!=0) + ((B)!=0); +#if CONFIG_T8X8 +#define VP8_COMBINEENTROPYCONTEXTS_8x8( Dest, A1, B1, A2, B2) \ + Dest = ((A1)!=0 || (A2)!=0) + ((B1)!=0 || (B2)!=0); +#endif typedef enum { @@ -157,7 +163,9 @@ typedef struct MB_PREDICTION_MODE mode, uv_mode; MV_REFERENCE_FRAME ref_frame; int_mv mv; - +#if CONFIG_SEGMENTATION + unsigned char segment_flag; +#endif unsigned char partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; @@ -232,9 +240,13 @@ typedef struct /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */ /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ - vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */ - - signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */ +#if CONFIG_SEGMENTATION + vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS + 3]; // Probability Tree used to code Segment number + unsigned char temporal_update; +#else + vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; +#endif + signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters /* mode_based Loop filter adjustment */ unsigned char mode_ref_lf_delta_enabled; diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index 9e194dc9a..6fe5fcc6f 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -183,3 +183,180 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE }, }, }; +#if CONFIG_T8X8 +const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = +{ + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {219, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 209, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 219, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 209, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 209, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 198, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 204, 204, 255, 255, 255, 255, 255, 255, 255, 255, }, + {219, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 198, 204, 255, 255, 255, 255, 255, 255, 255, 255, }, + {209, 193, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 214, 214, 255, 255, 255, 255, 255, 255, 255, 255, }, + {173, 193, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 224, 219, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 239, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 234, 224, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 234, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 255, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {234, 183, 214, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 229, 255, 249, 255, 255, 255, 255, 255, 255, }, + {229, 214, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 249, 255, 255, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 219, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 249, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 224, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 204, 234, 249, 249, 255, 255, 255, 255, 255, 255, }, + {255, 249, 249, 255, 244, 249, 255, 255, 255, 255, 255, }, + }, + { + {255, 178, 224, 255, 249, 255, 255, 255, 255, 255, 255, }, + {234, 224, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 183, 229, 255, 249, 255, 255, 255, 255, 255, 255, }, + {234, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 249, 249, 255, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 224, 249, 255, 244, 255, 255, 255, 255, 255, }, + {219, 224, 229, 255, 255, 249, 255, 255, 255, 255, 255, }, + {255, 255, 255, 249, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 224, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 244, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 239, 234, 244, 239, 244, 249, 255, 255, 255, 255, }, + }, + { + {255, 249, 239, 239, 244, 255, 255, 255, 255, 255, 255, }, + {255, 249, 244, 255, 249, 255, 255, 255, 255, 255, 255, }, + {255, 255, 239, 255, 255, 249, 255, 255, 255, 255, 255, }, + }, + { + {255, 244, 239, 239, 244, 255, 255, 255, 255, 255, 255, }, + {255, 234, 239, 234, 249, 255, 255, 255, 255, 255, 255, }, + {255, 255, 229, 239, 234, 249, 244, 255, 255, 255, 255, }, + }, + { + {255, 239, 229, 239, 234, 234, 255, 255, 255, 255, 255, }, + {255, 239, 234, 229, 244, 239, 255, 234, 255, 255, 255, }, + {255, 229, 209, 229, 239, 234, 244, 229, 255, 249, 255, }, + }, + { + {255, 239, 234, 229, 244, 249, 255, 249, 255, 255, 255, }, + {255, 234, 229, 244, 234, 249, 255, 249, 255, 255, 255, }, + {255, 229, 239, 229, 249, 255, 255, 244, 255, 255, 255, }, + }, + { + {255, 239, 234, 239, 234, 239, 255, 249, 255, 255, 255, }, + {255, 229, 234, 239, 239, 239, 255, 244, 255, 255, 255, }, + {255, 229, 234, 239, 239, 244, 255, 255, 255, 255, 255, }, + }, + { + {255, 219, 224, 229, 229, 234, 239, 224, 255, 255, 255, }, + {255, 229, 229, 224, 234, 229, 239, 239, 255, 255, 255, }, + {255, 229, 224, 239, 234, 239, 224, 224, 255, 249, 255, }, + }, + { + {255, 234, 229, 244, 229, 229, 255, 214, 255, 255, 255, }, + {255, 239, 234, 239, 214, 239, 255, 209, 255, 255, 255, }, + {249, 239, 219, 209, 219, 224, 239, 204, 255, 255, 255, }, + }, + }, + +}; +#endif
\ No newline at end of file diff --git a/vp8/common/common.h b/vp8/common/common.h index 9a93da991..999f79f2f 100644 --- a/vp8/common/common.h +++ b/vp8/common/common.h @@ -13,7 +13,7 @@ #define common_h 1 #include <assert.h> - +#include "vpx_config.h" /* Interface header for common constant data structures and lookup tables */ #include "vpx_mem/vpx_mem.h" @@ -38,5 +38,4 @@ #define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest)); - #endif /* common_h */ diff --git a/vp8/common/defaultcoefcounts.c b/vp8/common/defaultcoefcounts.c index b0e2e702a..34d1fb1d5 100644 --- a/vp8/common/defaultcoefcounts.c +++ b/vp8/common/defaultcoefcounts.c @@ -223,3 +223,182 @@ const unsigned int vp8_default_coef_counts[BLOCK_TYPES] }, }, }; + + +#if CONFIG_T8X8 +const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS] = +{ + + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 1 */ + { 21041, 13314, 3420, 592, 117, 0, 0, 0, 0, 0, 0, 11783}, + { 48236, 6918, 586, 153, 0, 0, 0, 0, 0, 0, 0, 23137}, + { 676112, 106685, 24701, 6003, 1426, 429, 165, 0, 0, 0, 0, 28910} + }, + { /* Coeff Band 2 */ + { 660107, 75227, 8451, 1345, 259, 0, 0, 0, 0, 0, 0, 0}, + { 79164, 36835, 6865, 1185, 246, 47, 0, 0, 0, 0, 0, 2575}, + { 19469, 14330, 3070, 579, 94, 6, 0, 0, 0, 0, 0, 44} + }, + { /* Coeff Band 3 */ + { 1978004, 235343, 28485, 3242, 271, 0, 0, 0, 0, 0, 0, 0}, + { 228684, 106736, 21431, 2842, 272, 46, 0, 0, 0, 0, 0, 9266}, + { 32470, 27496, 6852, 1386, 45, 93, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 4 */ + { 1911212, 224613, 49653, 13748, 2541, 568, 48, 0, 0, 0, 0, 0}, + { 196670, 103472, 44473, 11490, 2432, 977, 72, 0, 0, 0, 0, 9447}, + { 37876, 40417, 19142, 6069, 1799, 727, 51, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 3813399, 437714, 64387, 11312, 695, 219, 0, 0, 0, 0, 0, 0}, + { 438288, 215917, 61905, 10194, 674, 107, 0, 0, 0, 0, 0, 17808}, + { 99139, 93643, 30054, 5758, 802, 171, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 12259383, 1625505, 234927, 46306, 8417, 1456, 151, 0, 0, 0, 0, 0}, + { 1518161, 734287, 204240, 44228, 9462, 2240, 65, 0, 0, 0, 0, 107630}, + { 292470, 258894, 94925, 25864, 6662, 2055, 170, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 7 */ + { 9791308, 2118949, 169439, 16735, 1122, 0, 0, 0, 0, 0, 0, 0}, + { 1500281, 752410, 123259, 13065, 1168, 47, 0, 0, 0, 0, 0, 707182}, + { 193067, 142638, 31018, 4719, 516, 138, 0, 0, 0, 0, 0, 12439} + } + }, + { /* block Type 1 */ + { /* Coeff Band 0 */ + { 16925, 10553, 852, 16, 63, 87, 47, 0, 0, 0, 0, 31232}, + { 39777, 26839, 6822, 1908, 678, 456, 227, 168, 35, 0, 0, 46825}, + { 17300, 16666, 4168, 1209, 492, 154, 118, 207, 0, 0, 0, 19608} + }, + { /* Coeff Band 1 */ + { 35882, 31722, 4625, 1270, 266, 237, 0, 0, 0, 0, 0, 0}, + { 15426, 13894, 4482, 1305, 281, 43, 0, 0, 0, 0, 0, 18627}, + { 3900, 6552, 3472, 1723, 746, 366, 115, 35, 0, 0, 0, 798} + }, + { /* Coeff Band 2 */ + { 21998, 29132, 3353, 679, 46, 0, 0, 0, 0, 0, 0, 0}, + { 9098, 15767, 3794, 792, 268, 47, 0, 0, 0, 0, 0, 22402}, + { 4007, 8472, 2844, 687, 217, 0, 0, 0, 0, 0, 0, 2739} + }, + { /* Coeff Band 3 */ + { 0, 31414, 2911, 682, 96, 0, 0, 0, 0, 0, 0, 0}, + { 0, 16515, 4425, 938, 124, 0, 0, 0, 0, 0, 0, 31369}, + { 0, 4833, 2787, 1213, 150, 0, 0, 0, 0, 0, 0, 3744} + }, + { /* Coeff Band 4 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52762}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13326} + }, + { /* Coeff Band 7 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + } + }, + { /* block Type 2 */ + { /* Coeff Band 0 */ + { 4444, 1614, 120, 48, 0, 48, 0, 0, 0, 0, 0, 278}, + { 192436, 103730, 24494, 9845, 4122, 1193, 102, 0, 0, 0, 0, 2577}, + { 3473446, 2308716, 815510, 370374, 167797, 92152, 12073, 86, 0, 0, 0, 6801} + }, + { /* Coeff Band 1 */ + { 2150616, 1136388, 250011, 86888, 31434, 13746, 1243, 0, 0, 0, 0, 0}, + { 1179945, 799802, 266012, 106787, 40809, 16486, 1546, 0, 0, 0, 0, 2673}, + { 465128, 504130, 286989, 146259, 62380, 30192, 2866, 20, 0, 0, 0, 0} + }, + { /* Coeff Band 2 */ + { 2157762, 1177519, 282665, 108499, 43389, 23224, 2597, 34, 0, 0, 0, 0}, + { 1135685, 813705, 278079, 123255, 53935, 29492, 3152, 39, 0, 0, 0, 2978}, + { 391894, 428037, 264216, 144306, 69326, 40281, 5541, 29, 0, 0, 0, 38} + }, + { /* Coeff Band 3 */ + { 6669109, 3468471, 782161, 288484, 115500, 51083, 4943, 41, 0, 0, 0, 0}, + { 3454493, 2361636, 809524, 337663, 141343, 65036, 6361, 0, 0, 0, 0, 8730}, + { 1231825, 1359522, 824686, 420784, 185517, 98731, 10973, 72, 0, 0, 0, 20} + }, + { /* Coeff Band 4 */ + { 7606203, 3452846, 659856, 191703, 49335, 14336, 450, 0, 0, 0, 0, 0}, + { 3806506, 2379332, 691697, 224938, 61966, 18324, 766, 0, 0, 0, 0, 8193}, + { 1270110, 1283728, 628775, 243378, 72617, 24897, 1087, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 15314169, 7436809, 1579928, 515790, 167453, 58305, 3502, 19, 0, 0, 0, 0}, + { 7021286, 4667922, 1545706, 574463, 191793, 68748, 4048, 1, 0, 0, 0, 17222}, + { 2011989, 2145878, 1185336, 534879, 195719, 79103, 5343, 4, 0, 0, 0, 37} + }, + { /* Coeff Band 6 */ + { 63458382, 25384462, 4208045, 1091050, 299011, 95242, 5238, 33, 0, 0, 0, 0}, + { 25638401, 14694085, 3945978, 1195420, 344813, 117355, 6703, 0, 0, 0, 0, 216811}, + { 5988177, 5824044, 2754413, 1077350, 370739, 139710, 9693, 38, 0, 0, 0, 1835} + }, + { /* Coeff Band 7 */ + { 74998348, 29342158, 2955001, 452912, 69631, 9516, 37, 0, 0, 0, 0, 0}, + { 24762356, 13281085, 2409883, 436787, 68948, 10658, 36, 0, 0, 0, 0, 6614989}, + { 3882867, 3224489, 1052289, 252890, 46967, 8548, 154, 0, 0, 0, 0, 194354} + } + }, + { /* block Type 3 */ + { /* Coeff Band 0 */ + { 10583, 12059, 3155, 1041, 248, 175, 24, 2, 0, 0, 0, 5717}, + { 42461, 41782, 13553, 4966, 1352, 855, 89, 0, 0, 0, 0, 15000}, + { 4691125, 5045589, 2673566, 1089317, 378161, 160268, 18252, 813, 69, 13, 0, 49} + }, + { /* Coeff Band 1 */ + { 1535203, 1685686, 924565, 390329, 141709, 60523, 5983, 171, 0, 0, 0, 0}, + { 1594021, 1793276, 1016078, 441332, 164159, 70843, 8098, 311, 0, 0, 0, 11312}, + { 1225223, 1430184, 888492, 460713, 203286, 115149, 22061, 804, 7, 0, 0, 0} + }, + { /* Coeff Band 2 */ + { 1522386, 1590366, 799910, 303691, 96625, 37608, 3637, 180, 33, 11, 0, 0}, + { 1682184, 1793869, 913649, 353520, 113674, 46309, 4736, 221, 18, 3, 0, 963}, + { 1574580, 1740474, 954392, 417994, 151400, 67091, 8000, 536, 73, 10, 0, 63} + }, + { /* Coeff Band 3 */ + { 4963672, 5197790, 2585383, 982161, 313333, 118498, 16014, 536, 62, 0, 0, 0}, + { 5223913, 5569803, 2845858, 1107384, 364949, 147841, 18296, 658, 11, 11, 0, 1866}, + { 4042207, 4548894, 2608767, 1154993, 446290, 221295, 41054, 2438, 124, 20, 0, 0} + }, + { /* Coeff Band 4 */ + { 3857216, 4431325, 2670447, 1330169, 553301, 286825, 46763, 1917, 0, 0, 0, 0}, + { 4226215, 4963701, 3046198, 1523923, 644670, 355519, 58792, 2525, 0, 0, 0, 1298}, + { 3831873, 4580350, 3018580, 1660048, 797298, 502983, 123906, 7172, 16, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 8524543, 9285149, 4979435, 2039330, 683458, 266032, 22628, 270, 0, 0, 0, 0}, + { 9432163, 10428088, 5715661, 2385738, 838389, 326264, 29981, 361, 0, 0, 0, 884}, + { 9039066, 10368964, 6136765, 2862030, 1098269, 511668, 63105, 945, 14, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 33222872, 34748297, 17701695, 7214933, 2602336, 1191859, 187873, 12667, 390, 3, 0, 0}, + { 34765051, 37140719, 19525578, 8268934, 3085012, 1473864, 246743, 15258, 736, 3, 0, 8403}, + { 28591289, 32252393, 19037068, 9213729, 4020653, 2372354, 586420, 67428, 3920, 92, 7, 3} + }, + { /* Coeff Band 7 */ + { 68604786, 60777665, 19712887, 5656955, 1520443, 507166, 51829, 2466, 10, 0, 0, 0}, + { 55447403, 51682540, 19008774, 5928582, 1706884, 595531, 65998, 3661, 101, 0, 0, 8468343}, + { 28321970, 29149398, 13565882, 5258675, 1868588, 898041, 192023, 21497, 672, 17, 0, 1884921} + } + } + }; +#endif
\ No newline at end of file diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h index 7a1e28b7b..293e74269 100644 --- a/vp8/common/defaultcoefcounts.h +++ b/vp8/common/defaultcoefcounts.h @@ -18,4 +18,9 @@ extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif //__DEFAULTCOEFCOUNTS_H +extern const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; + +#endif
\ No newline at end of file diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index 0eee60ec8..ca37aab22 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -59,6 +59,24 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = 9, 12, 13, 10, 7, 11, 14, 15, }; +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 +}; +DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = +{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +}; +#endif DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = { @@ -69,6 +87,9 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = }; DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]); +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, short, vp8_default_zig_zag_mask_8x8[64]);//int64_t +#endif const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; @@ -99,9 +120,15 @@ static const Prob Pcat2[] = { 165, 145}; static const Prob Pcat3[] = { 173, 148, 140}; static const Prob Pcat4[] = { 176, 155, 140, 135}; static const Prob Pcat5[] = { 180, 157, 141, 134, 130}; +#if CONFIG_EXTEND_QRANGE +static const Prob Pcat6[] = +{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129}; +#else static const Prob Pcat6[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129}; +#endif + static vp8_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[22]; void vp8_init_scan_order_mask() @@ -112,7 +139,12 @@ void vp8_init_scan_order_mask() { vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; } - +#if CONFIG_T8X8 + for (i = 0; i < 64; i++) + { + vp8_default_zig_zag_mask_8x8[vp8_default_zig_zag1d_8x8[i]] = 1 << i; + } +#endif } static void init_bit_tree(vp8_tree_index *p, int n) @@ -135,7 +167,11 @@ static void init_bit_trees() init_bit_tree(cat3, 3); init_bit_tree(cat4, 4); init_bit_tree(cat5, 5); +#if CONFIG_EXTEND_QRANGE + init_bit_tree(cat6, 13); +#else init_bit_tree(cat6, 11); +#endif } vp8_extra_bit_struct vp8_extra_bits[12] = @@ -150,7 +186,11 @@ vp8_extra_bit_struct vp8_extra_bits[12] = { cat3, Pcat3, 3, 11}, { cat4, Pcat4, 4, 19}, { cat5, Pcat5, 5, 35}, +#if CONFIG_EXTEND_QRANGE + { cat6, Pcat6, 13, 67}, +#else { cat6, Pcat6, 11, 67}, +#endif { 0, 0, 0, 0} }; #include "defaultcoefcounts.h" @@ -183,6 +223,31 @@ void vp8_default_coef_probs(VP8_COMMON *pc) while (++i < COEF_BANDS); } while (++h < BLOCK_TYPES); +#if CONFIG_T8X8 + h = 0; + do + { + int i = 0; + + do + { + int k = 0; + + do + { + unsigned int branch_ct [ENTROPY_NODES] [2]; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + pc->fc.coef_probs_8x8 [h][i][k], branch_ct, vp8_default_coef_counts_8x8 [h][i][k], + 256, 1); + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++i < COEF_BANDS); + } + while (++h < BLOCK_TYPES); +#endif } diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 66d282b61..d3e841c3e 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -14,7 +14,7 @@ #include "treecoder.h" #include "blockd.h" - +#include "common.h" /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ @@ -27,7 +27,7 @@ #define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ #define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ #define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ +#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */ #define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ #define MAX_ENTROPY_TOKENS 12 @@ -50,8 +50,11 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define PROB_UPDATE_BASELINE_COST 7 #define MAX_PROB 255 +#if CONFIG_EXTEND_QRANGE +#define DCT_MAX_VALUE 8192 +#else #define DCT_MAX_VALUE 2048 - +#endif /* Coefficients are predicted via a 3-dimensional probability table. */ @@ -64,6 +67,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define COEF_BANDS 8 extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); +#if CONFIG_T8X8 +extern DECLARE_ALIGNED(64, const unsigned char, vp8_coef_bands_8x8[64]); +#endif /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -87,14 +93,19 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - +#if CONFIG_T8X8 +extern const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif struct VP8Common; void vp8_default_coef_probs(struct VP8Common *); - extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); extern short vp8_default_zig_zag_mask[16]; +#if CONFIG_T8X8 +extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]); +extern short vp8_default_zig_zag_mask_8x8[64];//int64_t +#endif extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; void vp8_coef_tree_initialize(void); diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index c61629407..1acc0157b 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -75,7 +75,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c; rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; - +#if CONFIG_T8X8 + rtcd->idct.idct8 = vp8_short_idct8x8_c; + rtcd->idct.idct8_1 = vp8_short_idct8x8_1_c; + rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c; + rtcd->idct.ihaar2 = vp8_short_ihaar2x2_c; + rtcd->idct.ihaar2_1 = vp8_short_ihaar2x2_1_c; +#endif rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; @@ -129,9 +135,19 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) vp8_arch_x86_common_init(ctx); #endif + #if ARCH_ARM vp8_arch_arm_common_init(ctx); #endif +#if CONFIG_EXTEND_QRANGE + rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c; + rtcd->idct.idct16 = vp8_short_idct4x4llm_c; + rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c; + rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; + rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; + +#endif + #if CONFIG_MULTITHREAD ctx->processor_core_count = get_cpu_count(); diff --git a/vp8/common/idct.h b/vp8/common/idct.h index f5fd94dfd..d1890b9e5 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -31,6 +31,34 @@ #include "arm/idct_arm.h" #endif +#if CONFIG_T8X8 +#ifndef vp8_idct_idct8 +#define vp8_idct_idct8 vp8_short_idct8x8_c +#endif +extern prototype_idct(vp8_idct_idct8); + +#ifndef vp8_idct_idct8_1 +#define vp8_idct_idct8_1 vp8_short_idct8x8_1_c +#endif +extern prototype_idct(vp8_idct_idct8_1); + +#ifndef vp8_idct_ihaar2 +#define vp8_idct_ihaar2 vp8_short_ihaar2x2_c +#endif +extern prototype_idct(vp8_idct_ihaar2); + +#ifndef vp8_idct_ihaar2_1 +#define vp8_idct_ihaar2_1 vp8_short_ihaar2x2_1_c +#endif +extern prototype_idct(vp8_idct_ihaar2_1); + +#ifndef vp8_idct_idct1_scalar_add_8x8 +#define vp8_idct_idct1_scalar_add_8x8 vp8_dc_only_idct_add_8x8_c +#endif +extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add_8x8); + +#endif + #ifndef vp8_idct_idct1 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_c #endif @@ -69,6 +97,14 @@ typedef struct vp8_second_order_fn_t iwalsh1; vp8_second_order_fn_t iwalsh16; + +#if CONFIG_T8X8 + vp8_idct_fn_t idct8; + vp8_idct_fn_t idct8_1; + vp8_idct_scalar_add_fn_t idct1_scalar_add_8x8; + vp8_idct_fn_t ihaar2; + vp8_idct_fn_t ihaar2_1; +#endif } vp8_idct_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index 196062df6..4f3a01b1b 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -22,9 +22,15 @@ * so * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). **************************************************************************/ +#include "vpx_ports/config.h" + + +#include <math.h> + static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; static const int rounding = 0; + void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) { int i; @@ -75,11 +81,19 @@ void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) d1 = temp1 + temp2; +#if !CONFIG_EXTEND_QRANGE op[0] = (a1 + d1 + 4) >> 3; op[3] = (a1 - d1 + 4) >> 3; op[1] = (b1 + c1 + 4) >> 3; op[2] = (b1 - c1 + 4) >> 3; +#else + op[0] = (a1 + d1 + 16) >> 5; + op[3] = (a1 - d1 + 16) >> 5; + + op[1] = (b1 + c1 + 16) >> 5; + op[2] = (b1 - c1 + 16) >> 5; +#endif ip += shortpitch; op += shortpitch; @@ -92,8 +106,11 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch) int a1; short *op = output; int shortpitch = pitch >> 1; +#if !CONFIG_EXTEND_QRANGE a1 = ((input[0] + 4) >> 3); - +#else + a1 = ((input[0] + 16) >> 5); +#endif for (i = 0; i < 4; i++) { op[0] = a1; @@ -106,7 +123,11 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch) void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) { +#if !CONFIG_EXTEND_QRANGE int a1 = ((input_dc + 4) >> 3); +#else + int a1 = ((input_dc + 16) >> 5); +#endif int r, c; for (r = 0; r < 4; r++) @@ -168,11 +189,17 @@ void vp8_short_inv_walsh4x4_c(short *input, short *output) c2 = a1 - b1; d2 = d1 - c1; +#if !CONFIG_EXTEND_QRANGE op[0] = (a2 + 3) >> 3; op[1] = (b2 + 3) >> 3; op[2] = (c2 + 3) >> 3; op[3] = (d2 + 3) >> 3; - +#else + op[0] = (a2 + 1) >> 2; + op[1] = (b2 + 1) >> 2; + op[2] = (c2 + 1) >> 2; + op[3] = (d2 + 1) >> 2; +#endif ip += 4; op += 4; } @@ -184,7 +211,11 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output) int a1; short *op = output; - a1 = ((input[0] + 3) >> 3); +#if !CONFIG_EXTEND_QRANGE + a1 = (input[0] + 3 )>> 3; +#else + a1 = (input[0] + 1 )>> 2; +#endif for (i = 0; i < 4; i++) { @@ -195,3 +226,312 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output) op += 4; } } + +#if CONFIG_T8X8 + +#define FAST_IDCT_8X8 + +void vp8_short_idct8x8_1_c(short *input, short *output, int pitch) +{ + int i, b; + int a1; + short *op = output; + short *orig_op = output; + int shortpitch = pitch >> 1; + a1 = ((input[0] + 4) >> 3); + for (b = 0; b < 4; b++) + { + for (i = 0; i < 4; i++) + { + op[0] = a1; + op[1] = a1; + op[2] = a1; + op[3] = a1; + op += shortpitch; + } + op = orig_op + (b+1)%2*4 +(b+1)/2*4*shortpitch; + } +} + +void vp8_dc_only_idct_add_8x8_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) +{ + int a1 = ((input_dc + 4) >> 3); + int r, c, b; + unsigned char *orig_pred = pred_ptr; + unsigned char *orig_dst = dst_ptr; + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = a1 + pred_ptr[c] ; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dst_ptr[c] = (unsigned char) a ; + } + + dst_ptr += stride; + pred_ptr += pitch; + } + dst_ptr = orig_dst + (b+1)%2*4 + (b+1)/2*4*stride; + pred_ptr = orig_pred + (b+1)%2*4 + (b+1)/2*4*pitch; + } +} + +#ifdef FAST_IDCT_8X8 + +#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ +#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */ +#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */ +#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */ +#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */ +#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */ + +/* row (horizontal) IDCT + * + * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- * + * ( k + - ) * l ) l=0 8 2 + * + * where: c[0] = 128 c[1..7] = 128*sqrt(2) */ + +static void idctrow (int *blk) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) | + (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) + { + blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3; + return; + } + x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7 * (x4 + x5); + x4 = x8 + (W1 - W7) * x4; + x5 = x8 - (W1 + W7) * x5; + x8 = W3 * (x6 + x7); + x6 = x8 - (W3 - W5) * x6; + x7 = x8 - (W3 + W5) * x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2); + x2 = x1 - (W2 + W6) * x2; + x3 = x1 + (W2 - W6) * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[0] = (x7 + x1) >> 8; + blk[1] = (x3 + x2) >> 8; + blk[2] = (x0 + x4) >> 8; + blk[3] = (x8 + x6) >> 8; + blk[4] = (x8 - x6) >> 8; + blk[5] = (x0 - x4) >> 8; + blk[6] = (x3 - x2) >> 8; + blk[7] = (x7 - x1) >> 8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] * + * cos( -- * ( k + - ) * l ) l=0 8 2 + * + * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */ +static void idctcol (int *blk) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) | + (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | (x7 = blk[8 * 3]))) + { + blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] = blk[8 * 7] = + ((blk[8 * 0] + 32) >> 6); + return; + } + x0 = (blk[8 * 0] << 8) + 8192; + + /* first stage */ + x8 = W7 * (x4 + x5) + 4; + x4 = (x8 + (W1 - W7) * x4) >> 3; + x5 = (x8 - (W1 + W7) * x5) >> 3; + x8 = W3 * (x6 + x7) + 4; + x6 = (x8 - (W3 - W5) * x6) >> 3; + x7 = (x8 - (W3 + W5) * x7) >> 3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2) + 4; + x2 = (x1 - (W2 + W6) * x2) >> 3; + x3 = (x1 + (W2 - W6) * x3) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[8 * 0] = (x7 + x1) >> 14; + blk[8 * 1] = (x3 + x2) >> 14; + blk[8 * 2] = (x0 + x4) >> 14; + blk[8 * 3] = (x8 + x6) >> 14; + blk[8 * 4] = (x8 - x6) >> 14; + blk[8 * 5] = (x0 - x4) >> 14; + blk[8 * 6] = (x3 - x2) >> 14; + blk[8 * 7] = (x7 - x1) >> 14; +} + +#define TX_DIM 8 +void vp8_short_idct8x8_c(short *coefs, short *block, int pitch) +// an approximate 8x8 dct implementation, but not used +{ + int X[TX_DIM*TX_DIM]; + int i,j; + int shortpitch = pitch >> 1; + + for (i = 0; i < TX_DIM; i++) + { + for (j = 0; j < TX_DIM; j++) + { + X[i * TX_DIM + j] = (int)coefs[i * TX_DIM + j]; + } + } + for (i = 0; i < 8; i++) + idctrow (X + 8 * i); + + for (i = 0; i < 8; i++) + idctcol (X + i); + + for (i = 0; i < TX_DIM; i++) + { + for (j = 0; j < TX_DIM; j++) + { + block[i*shortpitch+j] = X[i * TX_DIM + j]>>1; + } + } +} + +#else + +/* This is really for testing */ +void vp8_short_idct8x8_c(short *input, short *output, int pitch) +{ + int X[8][8]; + double C[8][8]={{0.0}}, Ct[8][8]={{0.0}}, temp[8][8]={{0.0}}; + int i,j,k; + double temp1=0.0; + double pi = atan( 1.0 ) * 4.0; + //static int count=0; + + int shortpitch = pitch >> 1; + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + X[i][j] = input[i * 8 + j]; + } + } + + // TODO: DCT matrix should be calculated once for all + for ( j = 0 ; j < 8 ; j++ ) { + C[ 0 ][ j ] = 1.0 / sqrt( (double) 8 ); + Ct[ j ][ 0 ] = C[ 0 ][ j ]; + } + for ( i = 1 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + C[ i ][ j ] = sqrt( 2.0 / 8 ) * + cos( pi * ( 2 * j + 1 ) * i / ( 2.0 * 8 ) ); + Ct[ j ][ i ] = C[ i ][ j ]; + } + } + /* MatrixMultiply( temp, input, C ); */ + for ( i = 0 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + temp[ i ][ j ] = 0.0; + for ( k = 0 ; k < 8 ; k++ ) + temp[ i ][ j ] += X[ i ][ k ] * C[ k ][ j ]; + } + } + + /* MatrixMultiply( output, Ct, temp ); */ + for ( i = 0 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + temp1 = 0.0; + for ( k = 0 ; k < 8 ; k++ ) + temp1 += Ct[ i ][ k ] * temp[ k ][ j ]; + X[ i ][ j ] = floor( temp1/ 2.0 + 0.5); + } + } + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + output[i*shortpitch+j] = X[i][j]; + } + } +} +#endif + +void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) +{ + int i, x; + short *ip = input; //0,1, 4, 8 + short *op = output; + for (i = 0; i < 16; i++) + { + op[i] = 0; + } + + x = (ip[0] + ip[1] + ip[4] + ip[8]); + op[0] = (x>=0?x+1:x-1)>>2; + x = (ip[0] - ip[1] + ip[4] - ip[8]); + op[1] = (x>=0?x+1:x-1)>>2; + x = (ip[0] + ip[1] - ip[4] - ip[8]); + op[4] = (x>=0?x+1:x-1)>>2; + x = (ip[0] - ip[1] - ip[4] + ip[8]); + op[8] = (x>=0?x+1:x-1)>>2; +} + +void vp8_short_ihaar2x2_1_c(short *input, short *output, int pitch) +{ + int a1; + short *ip = input; + short *op = output; + a1 = ((ip[0]>=0?ip[0]+1:ip[0]-1) >> 2); + op[0] = a1; + op[2] = a1; + op[8] = a1; + op[10] = a1; + +} +#endif diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index 81a3f2d89..d361b654a 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -24,13 +24,24 @@ static void recon_dcblock(MACROBLOCKD *x) } } +#if CONFIG_T8X8 +static void recon_dcblock_8x8(MACROBLOCKD *x) +{ + BLOCKD *b = &x->block[24]; //for coeff 0, 2, 8, 10 + x->block[0].dqcoeff[0] = b->diff[0]; + x->block[4].dqcoeff[0] = b->diff[1]; + x->block[8].dqcoeff[0] = b->diff[4]; + x->block[12].dqcoeff[0] = b->diff[8]; + +} +#endif void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) { - if (b->eob > 1) - IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch); - else + if (b->eob <= 1) IDCT_INVOKE(rtcd, idct1)(b->dqcoeff, b->diff, pitch); + else + IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch); } @@ -86,3 +97,77 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x } } + +#if CONFIG_T8X8 +void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch)//pay attention to use when 8x8 +{ + // int b,i; + //if (b->eob > 1) + IDCT_INVOKE(rtcd, idct8)(input_dqcoeff, output_coeff, pitch); + //else + //IDCT_INVOKE(rtcd, idct8_1)(b->dqcoeff, b->diff, pitch);//pitch + +} + + +void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + // do 2nd order transform on the dc block + IDCT_INVOKE(rtcd, ihaar2)(x->block[24].dqcoeff, x->block[24].diff, 8); + + recon_dcblock_8x8(x); //need to change for 8x8 + for (i = 0; i < 9; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32); + } + +} +void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + for (i = 16; i < 24; i += 4) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); + } + +} + + +void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + if (x->mode_info_context->mbmi.mode != B_PRED && + x->mode_info_context->mbmi.mode != SPLITMV) + { + // do 2nd order transform on the dc block + + IDCT_INVOKE(rtcd, ihaar2)(&x->block[24].dqcoeff[0], x->block[24].diff, 8);//dqcoeff[0] + recon_dcblock_8x8(x); //need to change for 8x8 + + } + + for (i = 0; i < 9; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32); + } + + + for (i = 16; i < 24; i += 4) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); + } + +} +#endif diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index b3ffb7073..1466a5844 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -20,4 +20,11 @@ extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBL extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#if CONFIG_T8X8 +extern void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch); +extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#endif + #endif diff --git a/vp8/common/maskingmv.c b/vp8/common/maskingmv.c new file mode 100644 index 000000000..d01a18fc8 --- /dev/null +++ b/vp8/common/maskingmv.c @@ -0,0 +1,855 @@ +/* + ============================================================================ + Name : maskingmv.c + Author : jimbankoski + Version : + Copyright : Your copyright notice + Description : Hello World in C, Ansi-style + ============================================================================ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +extern unsigned int vp8_sad16x16_sse3( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + int max_err); + +extern void vp8_sad16x16x3_sse3( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + int *results); + +extern int vp8_growmaskmb_sse3( + unsigned char *om, + unsigned char *nm); + +extern void vp8_makemask_sse3( + unsigned char *y, + unsigned char *u, + unsigned char *v, + unsigned char *ym, + int yp, + int uvp, + int ys, + int us, + int vs, + int yt, + int ut, + int vt); + +unsigned int vp8_sad16x16_unmasked_wmt( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + unsigned char *mask); + +unsigned int vp8_sad16x16_masked_wmt( + unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr, + int ref_stride, + unsigned char *mask); + +unsigned int vp8_masked_predictor_wmt( + unsigned char *masked, + unsigned char *unmasked, + int src_stride, + unsigned char *dst_ptr, + int dst_stride, + unsigned char *mask); +unsigned int vp8_masked_predictor_uv_wmt( + unsigned char *masked, + unsigned char *unmasked, + int src_stride, + unsigned char *dst_ptr, + int dst_stride, + unsigned char *mask); +unsigned int vp8_uv_from_y_mask( + unsigned char *ymask, + unsigned char *uvmask); +int yp=16; +unsigned char sxy[]= +{ +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90 +}; + +unsigned char sts[]= +{ +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +}; +unsigned char str[]= +{ +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +}; + +unsigned char y[]= +{ +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40, +60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40, +40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40, +40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40 +}; +int uvp=8; +unsigned char u[]= +{ +90,80,70,70,90,90,90,17, +90,80,70,70,90,90,90,17, +84,70,70,90,90,90,17,17, +84,70,70,90,90,90,17,17, +80,70,70,90,90,90,17,17, +90,80,70,70,90,90,90,17, +90,80,70,70,90,90,90,17, +90,80,70,70,90,90,90,17 +}; + +unsigned char v[]= +{ +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80, +80,80,80,80,80,80,80,80 +}; + +unsigned char ym[256]; +unsigned char uvm[64]; +typedef struct +{ + unsigned char y; + unsigned char yt; + unsigned char u; + unsigned char ut; + unsigned char v; + unsigned char vt; + unsigned char use; +} COLOR_SEG_ELEMENT; + +/* +COLOR_SEG_ELEMENT segmentation[]= +{ + { 60,4,80,17,80,10, 1}, + { 40,4,15,10,80,10, 1}, +}; +*/ + +COLOR_SEG_ELEMENT segmentation[]= +{ + { 79,44,92,44, 237,60, 1}, +}; + +unsigned char pixel_mask(unsigned char y,unsigned char u,unsigned char v, + COLOR_SEG_ELEMENT sgm[], + int c) +{ + COLOR_SEG_ELEMENT *s=sgm; + unsigned char m =0; + int i; + for(i=0;i<c;i++,s++) + m |= ( abs(y-s->y)< s->yt && + abs(u-s->u)< s->ut && + abs(v-s->v)< s->vt ? 255 : 0 ); + + return m; +} +int neighbors[256][8]; +int makeneighbors(void) +{ + int i,j; + for(i=0;i<256;i++) + { + int r=(i>>4),c=(i&15); + int ni=0; + for(j=0;j<8;j++) + neighbors[i][j]=i; + for(j=0;j<256;j++) + { + int nr=(j>>4),nc=(j&15); + if(abs(nr-r)<2&&abs(nc-c)<2) + neighbors[i][ni++]=j; + } + } + return 0; +} +void grow_ymask(unsigned char *ym) +{ + unsigned char nym[256]; + int i,j; + + for(i=0;i<256;i++) + { + nym[i]=ym[i]; + for(j=0;j<8;j++) + { + nym[i]|=ym[neighbors[i][j]]; + } + } + for(i=0;i<256;i++) + ym[i]=nym[i]; +} +void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v, + unsigned char *ym, unsigned char *uvm, + int yp, int uvp, + COLOR_SEG_ELEMENT sgm[], + int count) +{ + int r,c; + unsigned char *oym = ym; + + memset(ym,20,256); + for(r=0;r<8;r++,uvm+=8,u+=uvp,v+=uvp,y+=(yp+yp),ym+=32) + for(c=0;c<8;c++) + { + int y1=y[c<<1]; + int u1=u[c]; + int v1=v[c]; + int m = pixel_mask(y1,u1,v1,sgm,count); + uvm[c] = m; + ym[c<<1] = uvm[c];// = pixel_mask(y[c<<1],u[c],v[c],sgm,count); + ym[(c<<1)+1] = pixel_mask(y[1+(c<<1)],u[c],v[c],sgm,count); + ym[(c<<1)+16] = pixel_mask(y[yp+(c<<1)],u[c],v[c],sgm,count); + ym[(c<<1)+17] = pixel_mask(y[1+yp+(c<<1)],u[c],v[c],sgm,count); + } + grow_ymask(oym); +} + +int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp, + unsigned char *ym ) +{ + int i,j; + unsigned sad = 0; + for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16) + for(j=0;j<16;j++) + if(ym[j]) + sad+= abs(src[j]-dst[j]); + + return sad; +} + +int compare_masks(unsigned char *sym, unsigned char *ym) +{ + int i,j; + unsigned sad = 0; + for(i=0;i<16;i++,sym += 16,ym+=16) + for(j=0;j<16;j++) + sad+= (sym[j]!=ym[j]?1:0); + + return sad; +} +int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp, + unsigned char *ym) +{ + int i,j; + unsigned sad = 0; + for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16) + for(j=0;j<16;j++) + if(!ym[j]) + sad+= abs(src[j]-dst[j]); + + return sad; +} +int masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v, + int yp, int uvp, + unsigned char *dy, unsigned char *du, unsigned char *dv, + int dyp, int duvp, + COLOR_SEG_ELEMENT sgm[], + int count, + int *mi, + int *mj, + int *ui, + int *uj, + int *wm) +{ + int i,j; + + unsigned char ym[256]; + unsigned char uvm[64]; + unsigned char dym[256]; + unsigned char duvm[64]; + unsigned int e = 0 ; + int beste=256; + int bmi=-32,bmj=-32; + int bui=-32,buj=-32; + int beste1=256; + int bmi1=-32,bmj1=-32; + int bui1=-32,buj1=-32; + int obeste; + + // first try finding best mask and then unmasked + beste = 0xffffffff; + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count); + + e = unmasked_sad(y, yp, dyz+j, dyp, dym ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + //bui=0;buj=0; + // best mv masked destination + make_mb_mask(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2, + dym, duvm, dyp, duvp,sgm,count); + + obeste = beste; + beste = 0xffffffff; + + // find best masked + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = masked_sad(y, yp, dyz+j, dyp, dym ); + + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + beste1=beste+obeste; + bmi1=bmi;bmj1=bmj; + bui1=bui;buj1=buj; + + beste = 0xffffffff; + // source mask + make_mb_mask(y,u, v, ym, uvm, yp, uvp,sgm,count); + + // find best mask + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count); + + e = compare_masks(ym, dym); + + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + + + // best mv masked destination + make_mb_mask(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2, + dym, duvm, dyp, duvp,sgm,count); + + obeste = masked_sad(y, yp, dy+bmi*dyp+bmj, dyp, dym ); + + beste = 0xffffffff; + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = unmasked_sad(y, yp, dyz+j, dyp, dym ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + beste += obeste; + + + if(beste<beste1) + { + *mi = bmi; + *mj = bmj; + *ui = bui; + *uj = buj; + *wm = 1; + } + else + { + *mi = bmi1; + *mj = bmj1; + *ui = bui1; + *uj = buj1; + *wm = 0; + + } + return 0; +} + +int predict(unsigned char *src, int p, unsigned char *dst, int dp, + unsigned char *ym, unsigned char *prd ) +{ + int i,j; + for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16, prd+=16) + for(j=0;j<16;j++) + prd[j]=(ym[j] ? src[j]:dst[j]); + return 0; +} + +int fast_masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v, + int yp, int uvp, + unsigned char *dy, unsigned char *du, unsigned char *dv, + int dyp, int duvp, + COLOR_SEG_ELEMENT sgm[], + int count, + int *mi, + int *mj, + int *ui, + int *uj, + int *wm) +{ + int i,j; + + unsigned char ym[256]; + unsigned char ym2[256]; + unsigned char uvm[64]; + unsigned char dym2[256]; + unsigned char dym[256]; + unsigned char duvm[64]; + unsigned int e = 0 ; + int beste=256; + int bmi=-32,bmj=-32; + int bui=-32,buj=-32; + int beste1=256; + int bmi1=-32,bmj1=-32; + int bui1=-32,buj1=-32; + int obeste; + + // first try finding best mask and then unmasked + beste = 0xffffffff; + +#if 0 + for(i=0;i<16;i++) + { + unsigned char *dy = i*yp + y; + for(j=0;j<16;j++) + printf("%2x",dy[j]); + printf("\n"); + } + printf("\n"); + + for(i=-32;i<48;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<48;j++) + printf("%2x",dyz[j]); + printf("\n"); + } +#endif + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + //bui=0;buj=0; + // best mv masked destination + + vp8_makemask_sse3(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2, + dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + obeste = beste; + beste = 0xffffffff; + + // find best masked + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = vp8_sad16x16_masked_wmt(y, yp, dyz+j, dyp, dym2 ); + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + beste1=beste+obeste; + bmi1=bmi;bmj1=bmj; + bui1=bui;buj1=buj; + + // source mask + vp8_makemask_sse3(y,u, v, + ym, yp, uvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(ym,ym2); + + // find best mask + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + unsigned char *duz = i/2*duvp + du; + unsigned char *dvz = i/2*duvp + dv; + for(j=-32;j<32;j++) + { + // 0,0 masked destination + vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + e = compare_masks(ym2, dym2); + + if(e<beste) + { + bmi=i; + bmj=j; + beste=e; + } + } + } + + vp8_makemask_sse3(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2, + dym, dyp, duvp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + + obeste = vp8_sad16x16_masked_wmt(y, yp, dy+bmi*dyp+bmj, dyp, dym2 ); + + beste = 0xffffffff; + + // find best unmasked mv + for(i=-32;i<32;i++) + { + unsigned char *dyz = i*dyp + dy; + for(j=-32;j<32;j++) + { + e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 ); + + if(e<beste) + { + bui=i; + buj=j; + beste=e; + } + } + } + beste += obeste; + + if(beste<beste1) + { + *mi = bmi; + *mj = bmj; + *ui = bui; + *uj = buj; + *wm = 1; + } + else + { + *mi = bmi1; + *mj = bmj1; + *ui = bui1; + *uj = buj1; + *wm = 0; + beste=beste1; + + } + return beste; +} + +int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm, + int ymp, int uvmp, + unsigned char *yp, unsigned char *up, unsigned char *vp, + int ypp, int uvpp, + COLOR_SEG_ELEMENT sgm[], + int count, + int mi, + int mj, + int ui, + int uj, + int wm) +{ + int i,j; + unsigned char dym[256]; + unsigned char dym2[256]; + unsigned char duvm[64]; + unsigned char *yu=ym,*uu=um, *vu=vm; + + unsigned char *dym3=dym2; + + ym+=mi*ymp+mj; + um+=mi/2*uvmp+mj/2; + vm+=mi/2*uvmp+mj/2; + + yu+=ui*ymp+uj; + uu+=ui/2*uvmp+uj/2; + vu+=ui/2*uvmp+uj/2; + + // best mv masked destination + if(wm) + vp8_makemask_sse3(ym,um, vm, dym, ymp, uvmp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + else + vp8_makemask_sse3(yu,uu, vu, dym, ymp, uvmp, + sgm[0].y,sgm[0].u,sgm[0].v, + sgm[0].yt,sgm[0].ut,sgm[0].vt); + + vp8_growmaskmb_sse3(dym,dym2); + vp8_masked_predictor_wmt(ym,yu,ymp,yp,ypp,dym3); + vp8_uv_from_y_mask(dym3,duvm); + vp8_masked_predictor_uv_wmt(um,uu,uvmp,up,uvpp,duvm); + vp8_masked_predictor_uv_wmt(vm,vu,uvmp,vp,uvpp,duvm); + + return 0; +} + +unsigned char f0p[1280*720*3/2]; +unsigned char f1p[1280*720*3/2]; +unsigned char prd[1280*720*3/2]; +unsigned char msk[1280*720*3/2]; + + +int mainz(int argc, char *argv[]) { + + FILE *f=fopen(argv[1],"rb"); + FILE *g=fopen(argv[2],"wb"); + int w=atoi(argv[3]),h=atoi(argv[4]); + int y_stride=w,uv_stride=w/2; + int r,c; + unsigned char *f0=f0p,*f1=f1p,*t; + unsigned char ym[256],uvm[64]; + unsigned char ym2[256],uvm2[64]; + unsigned char ym3[256],uvm3[64]; + int a,b; + + COLOR_SEG_ELEMENT last={ 20,20,20,20, 230,20, 1},best; +#if 0 + makeneighbors(); + COLOR_SEG_ELEMENT segmentation[]= + { + { 60,4,80,17,80,10, 1}, + { 40,4,15,10,80,10, 1}, + }; + make_mb_mask(y, u, v,ym2,uvm2,16,8,segmentation,1); + + vp8_makemask_sse3(y,u,v,ym, (int) 16,(int) 8, + (int) segmentation[0].y,(int) segmentation[0].u,(int) segmentation[0].v, + segmentation[0].yt,segmentation[0].ut,segmentation[0].vt); + + vp8_growmaskmb_sse3(ym,ym3); + + a = vp8_sad16x16_masked_wmt(str,16,sts,16,ym3); + b = vp8_sad16x16_unmasked_wmt(str,16,sts,16,ym3); + + vp8_masked_predictor_wmt(str,sts,16,ym,16,ym3); + + vp8_uv_from_y_mask(ym3,uvm3); + + return 4; +#endif + makeneighbors(); + + + memset(prd,128,w*h*3/2); + + fread(f0,w*h*3/2,1,f); + + while(!feof(f)) + { + unsigned char *ys=f1,*yd=f0,*yp=prd; + unsigned char *us=f1+w*h,*ud=f0+w*h,*up=prd+w*h; + unsigned char *vs=f1+w*h*5/4,*vd=f0+w*h*5/4,*vp=prd+w*h*5/4; + fread(f1,w*h*3/2,1,f); + + ys+=32*y_stride;yd+=32*y_stride;yp+=32*y_stride; + us+=16*uv_stride;ud+=16*uv_stride;up+=16*uv_stride; + vs+=16*uv_stride;vd+=16*uv_stride;vp+=16*uv_stride; + for(r=32;r<h-32;r+=16, + ys+=16*w,yd+=16*w,yp+=16*w, + us+=8*uv_stride,ud+=8*uv_stride,up+=8*uv_stride, + vs+=8*uv_stride,vd+=8*uv_stride,vp+=8*uv_stride) + { + for(c=32;c<w-32;c+=16) + { + int mi,mj,ui,uj,wm; + int bmi,bmj,bui,buj,bwm; + unsigned char ym[256]; + + if(vp8_sad16x16_sse3( ys+c,y_stride, yd+c,y_stride,0xffff) == 0) + bmi=bmj=bui=buj=bwm=0; + else + { + COLOR_SEG_ELEMENT cs[5]; + int j; + unsigned int beste=0xfffffff; + unsigned int bestj=0; + + // try color from last mb segmentation + cs[0] = last; + + // try color segs from 4 pixels in mb recon as segmentation + cs[1].y = yd[c + y_stride + 1];cs[1].u = ud[c/2 + uv_stride]; + cs[1].v = vd[c/2 + uv_stride]; + cs[1].yt = cs[1].ut = cs[1].vt = 20; + cs[2].y = yd[c + w + 14]; + cs[2].u = ud[c/2 + uv_stride+7]; + cs[2].v = vd[c/2 + uv_stride+7]; + cs[2].yt = cs[2].ut = cs[2].vt = 20; + cs[3].y = yd[c + w*14 + 1]; + cs[3].u = ud[c/2 + uv_stride*7]; + cs[3].v = vd[c/2 + uv_stride*7]; + cs[3].yt = cs[3].ut = cs[3].vt = 20; + cs[4].y = yd[c + w*14 + 14]; + cs[4].u = ud[c/2 + uv_stride*7+7]; + cs[4].v = vd[c/2 + uv_stride*7+7]; + cs[4].yt = cs[4].ut = cs[4].vt = 20; + + for(j=0;j<5;j++) + { + int e; + + e = fast_masked_motion_search( + ys+c, us+c/2, vs+c/2, y_stride, uv_stride, + yd+c, ud+c/2, vd+c/2, y_stride, uv_stride, + &cs[j], 1, &mi,&mj,&ui,&uj,&wm); + + if(e<beste) + { + bmi=mi;bmj=mj;bui=ui;buj=uj,bwm=wm; + bestj=j; + beste=e; + } + } + best = cs[bestj]; + //best = segmentation[0]; + last = best; + } + predict_all(yd+c, ud+c/2, vd+c/2, w, uv_stride, + yp+c, up+c/2, vp+c/2, w, uv_stride, + &best, 1, bmi,bmj,bui,buj,bwm); + + } + } + fwrite(prd,w*h*3/2,1,g); + t=f0; + f0=f1; + f1=t; + + } + fclose(f); + fclose(g); + return; +} diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 4356b5133..246fa116d 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -44,6 +44,9 @@ typedef struct frame_contexts vp8_prob uv_mode_prob [VP8_UV_MODES-1]; vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_T8X8 + vp8_prob coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif MV_CONTEXT mvc[2]; MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ } FRAME_CONTEXT; @@ -121,6 +124,7 @@ typedef struct VP8Common int mode_info_stride; /* profile settings */ + int experimental; int mb_no_coeff_skip; int no_lpf; int use_bilinear_mc_filter; diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c index e9833fe33..b8e6e2972 100644 --- a/vp8/common/quant_common.c +++ b/vp8/common/quant_common.c @@ -11,6 +11,8 @@ #include "quant_common.h" + +#if !CONFIG_EXTEND_QRANGE static const int dc_qlookup[QINDEX_RANGE] = { 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, @@ -34,7 +36,32 @@ static const int ac_qlookup[QINDEX_RANGE] = 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, }; +#else + +static const int dc_qlookup[QINDEX_RANGE] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 32, 34, 36, 38, 40, 42, + 44, 46, 49, 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, + 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, + 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 205, 210, 215, 220, + 225, 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, + 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, + 472, 484, 496, 508, 520, 532, 544, 556, 572, 588, 608, 628, 648, 668, 692, 720, +}; +static const int ac_qlookup[QINDEX_RANGE] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 51, + 54, 57, 60, 63, 66, 69, 72, 76, 80, 84, 88, 92, 96, 100, 105, 110, + 115, 120, 125, 130, 135, 140, 146, 152, 158, 164, 170, 176, 182, 188, 194, 200, + 206, 212, 218, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, + 330, 340, 350, 360, 370, 380, 392, 404, 416, 428, 440, 454, 468, 482, 496, 510, + 524, 540, 556, 572, 588, 604, 622, 640, 658, 676, 696, 716, 736, 756, 776, 796, + 820, 844, 868, 892, 916, 944, 972, 1000, 1032, 1064, 1096, 1128, 1168, 1208, 1252, 1300 +}; +#endif int vp8_dc_quant(int QIndex, int Delta) { @@ -62,7 +89,11 @@ int vp8_dc2quant(int QIndex, int Delta) else if (QIndex < 0) QIndex = 0; +#if !CONFIG_EXTEND_QRANGE retval = dc_qlookup[ QIndex ] * 2; +#else + retval = dc_qlookup[ QIndex ]; +#endif return retval; } @@ -72,16 +103,13 @@ int vp8_dc_uv_quant(int QIndex, int Delta) QIndex = QIndex + Delta; - if (QIndex > 127) - QIndex = 127; + if (QIndex > 117) + QIndex = 117; else if (QIndex < 0) QIndex = 0; retval = dc_qlookup[ QIndex ]; - if (retval > 132) - retval = 132; - return retval; } @@ -108,12 +136,13 @@ int vp8_ac2quant(int QIndex, int Delta) QIndex = 127; else if (QIndex < 0) QIndex = 0; - +#if !CONFIG_EXTEND_QRANGE retval = (ac_qlookup[ QIndex ] * 155) / 100; - if (retval < 8) retval = 8; - +#else + retval = ac_qlookup[ QIndex ]; +#endif return retval; } int vp8_ac_uv_quant(int QIndex, int Delta) diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c index 18c514541..12e2e60c7 100644 --- a/vp8/common/reconintra4x4.c +++ b/vp8/common/reconintra4x4.c @@ -81,10 +81,10 @@ void vp8_intra4x4_predict(BLOCKD *x, { unsigned int ap[4]; - ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2; - ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2; - ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2; - ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2; + ap[0] = Above[0]; + ap[1] = Above[1]; + ap[2] = Above[2]; + ap[3] = Above[3]; for (r = 0; r < 4; r++) { @@ -105,10 +105,10 @@ void vp8_intra4x4_predict(BLOCKD *x, { unsigned int lp[4]; - lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2; - lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2; - lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2; - lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2; + lp[0] = Left[0]; + lp[1] = Left[1]; + lp[2] = Left[2]; + lp[3] = Left[3]; for (r = 0; r < 4; r++) { diff --git a/vp8/common/x86/mask_sse3.asm b/vp8/common/x86/mask_sse3.asm new file mode 100644 index 000000000..0d90cfa86 --- /dev/null +++ b/vp8/common/x86/mask_sse3.asm @@ -0,0 +1,484 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void int vp8_makemask_sse3( +; unsigned char *y, +; unsigned char *u, +; unsigned char *v, +; unsigned char *ym, +; unsigned char *uvm, +; int yp, +; int uvp, +; int ys, +; int us, +; int vs, +; int yt, +; int ut, +; int vt) +global sym(vp8_makemask_sse3) +sym(vp8_makemask_sse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 14 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;y + mov rdi, arg(1) ;u + mov rcx, arg(2) ;v + mov rax, arg(3) ;ym + movsxd rbx, dword arg(4) ;yp + movsxd rdx, dword arg(5) ;uvp + + pxor xmm0,xmm0 + + ;make 16 copies of the center y value + movd xmm1, arg(6) + pshufb xmm1, xmm0 + + ; make 16 copies of the center u value + movd xmm2, arg(7) + pshufb xmm2, xmm0 + + ; make 16 copies of the center v value + movd xmm3, arg(8) + pshufb xmm3, xmm0 + unpcklpd xmm2, xmm3 + + ;make 16 copies of the y tolerance + movd xmm3, arg(9) + pshufb xmm3, xmm0 + + ;make 16 copies of the u tolerance + movd xmm4, arg(10) + pshufb xmm4, xmm0 + + ;make 16 copies of the v tolerance + movd xmm5, arg(11) + pshufb xmm5, xmm0 + unpckhpd xmm4, xmm5 + + mov r8,8 + +NextPairOfRows: + + ;grab the y source values + movdqu xmm0, [rsi] + + ;compute abs difference between source and y target + movdqa xmm6, xmm1 + movdqa xmm7, xmm0 + psubusb xmm0, xmm1 + psubusb xmm6, xmm7 + por xmm0, xmm6 + + ;compute abs difference between + movdqa xmm6, xmm3 + pcmpgtb xmm6, xmm0 + + ;grab the y source values + add rsi, rbx + movdqu xmm0, [rsi] + + ;compute abs difference between source and y target + movdqa xmm11, xmm1 + movdqa xmm7, xmm0 + psubusb xmm0, xmm1 + psubusb xmm11, xmm7 + por xmm0, xmm11 + + ;compute abs difference between + movdqa xmm11, xmm3 + pcmpgtb xmm11, xmm0 + + + ;grab the u and v source values + movdqu xmm7, [rdi] + movdqu xmm8, [rcx] + unpcklpd xmm7, xmm8 + + ;compute abs difference between source and uv targets + movdqa xmm9, xmm2 + movdqa xmm10, xmm7 + psubusb xmm7, xmm2 + psubusb xmm9, xmm10 + por xmm7, xmm9 + + ;check whether the number is < tolerance + movdqa xmm0, xmm4 + pcmpgtb xmm0, xmm7 + + ;double u and v masks + movdqa xmm8, xmm0 + punpckhbw xmm0, xmm0 + punpcklbw xmm8, xmm8 + + ;mask row 0 and output + pand xmm6, xmm8 + pand xmm6, xmm0 + movdqa [rax],xmm6 + + ;mask row 1 and output + pand xmm11, xmm8 + pand xmm11, xmm0 + movdqa [rax+16],xmm11 + + + ; to the next row or set of rows + add rsi, rbx + add rdi, rdx + add rcx, rdx + add rax,32 + dec r8 + jnz NextPairOfRows + + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;GROW_HORIZ (register for result, source register or mem local) +; takes source and shifts left and ors with source +; then shifts right and ors with source +%macro GROW_HORIZ 2 + movdqa %1, %2 + movdqa xmm14, %1 + movdqa xmm15, %1 + pslldq xmm14, 1 + psrldq xmm15, 1 + por %1,xmm14 + por %1,xmm15 +%endmacro +;GROW_VERT (result, center row, above row, below row) +%macro GROW_VERT 4 + movdqa %1,%2 + por %1,%3 + por %1,%4 +%endmacro + +;GROW_NEXTLINE (new line to grow, new source, line to write) +%macro GROW_NEXTLINE 3 + GROW_HORIZ %1, %2 + GROW_VERT xmm3, xmm0, xmm1, xmm2 + movdqa %3,xmm3 +%endmacro + + +;void int vp8_growmaskmb_sse3( +; unsigned char *om, +; unsigned char *nm, +global sym(vp8_growmaskmb_sse3) +sym(vp8_growmaskmb_sse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 2 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src + mov rdi, arg(1) ;rst + + GROW_HORIZ xmm0, [rsi] + GROW_HORIZ xmm1, [rsi+16] + GROW_HORIZ xmm2, [rsi+32] + + GROW_VERT xmm3, xmm0, xmm1, xmm2 + por xmm0,xmm1 + movdqa [rdi], xmm0 + movdqa [rdi+16],xmm3 + + GROW_NEXTLINE xmm0,[rsi+48],[rdi+32] + GROW_NEXTLINE xmm1,[rsi+64],[rdi+48] + GROW_NEXTLINE xmm2,[rsi+80],[rdi+64] + GROW_NEXTLINE xmm0,[rsi+96],[rdi+80] + GROW_NEXTLINE xmm1,[rsi+112],[rdi+96] + GROW_NEXTLINE xmm2,[rsi+128],[rdi+112] + GROW_NEXTLINE xmm0,[rsi+144],[rdi+128] + GROW_NEXTLINE xmm1,[rsi+160],[rdi+144] + GROW_NEXTLINE xmm2,[rsi+176],[rdi+160] + GROW_NEXTLINE xmm0,[rsi+192],[rdi+176] + GROW_NEXTLINE xmm1,[rsi+208],[rdi+192] + GROW_NEXTLINE xmm2,[rsi+224],[rdi+208] + GROW_NEXTLINE xmm0,[rsi+240],[rdi+224] + + por xmm0,xmm2 + movdqa [rdi+240], xmm0 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + + +;unsigned int vp8_sad16x16_masked_wmt( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *ref_ptr, +; int ref_stride, +; unsigned char *mask) +global sym(vp8_sad16x16_masked_wmt) +sym(vp8_sad16x16_masked_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + mov rbx, arg(4) ;mask + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + mov rcx, 16 + + pxor xmm3, xmm3 + +NextSadRow: + movdqu xmm0, [rsi] + movdqu xmm1, [rdi] + movdqu xmm2, [rbx] + pand xmm0, xmm2 + pand xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm3, xmm0 + + add rsi, rax + add rdi, rdx + add rbx, 16 + + dec rcx + jnz NextSadRow + + movdqa xmm4 , xmm3 + psrldq xmm4, 8 + paddw xmm3, xmm4 + movq rax, xmm3 + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;unsigned int vp8_sad16x16_unmasked_wmt( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *ref_ptr, +; int ref_stride, +; unsigned char *mask) +global sym(vp8_sad16x16_unmasked_wmt) +sym(vp8_sad16x16_unmasked_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + mov rbx, arg(4) ;mask + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + mov rcx, 16 + + pxor xmm3, xmm3 + +next_vp8_sad16x16_unmasked_wmt: + movdqu xmm0, [rsi] + movdqu xmm1, [rdi] + movdqu xmm2, [rbx] + por xmm0, xmm2 + por xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm3, xmm0 + + add rsi, rax + add rdi, rdx + add rbx, 16 + + dec rcx + jnz next_vp8_sad16x16_unmasked_wmt + + movdqa xmm4 , xmm3 + psrldq xmm4, 8 + paddw xmm3, xmm4 + movq rax, xmm3 + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;unsigned int vp8_masked_predictor_wmt( +; unsigned char *masked, +; unsigned char *unmasked, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; unsigned char *mask) +global sym(vp8_masked_predictor_wmt) +sym(vp8_masked_predictor_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;ref_ptr + + mov rbx, arg(5) ;mask + movsxd rax, dword ptr arg(2) ;src_stride + mov r11, arg(3) ; destination + movsxd rdx, dword ptr arg(4) ;dst_stride + + mov rcx, 16 + + pxor xmm3, xmm3 + +next_vp8_masked_predictor_wmt: + movdqu xmm0, [rsi] + movdqu xmm1, [rdi] + movdqu xmm2, [rbx] + + pand xmm0, xmm2 + pandn xmm2, xmm1 + por xmm0, xmm2 + movdqu [r11], xmm0 + + add r11, rdx + add rsi, rax + add rdi, rdx + add rbx, 16 + + dec rcx + jnz next_vp8_masked_predictor_wmt + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;unsigned int vp8_masked_predictor_uv_wmt( +; unsigned char *masked, +; unsigned char *unmasked, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; unsigned char *mask) +global sym(vp8_masked_predictor_uv_wmt) +sym(vp8_masked_predictor_uv_wmt): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;ref_ptr + + mov rbx, arg(5) ;mask + movsxd rax, dword ptr arg(2) ;src_stride + mov r11, arg(3) ; destination + movsxd rdx, dword ptr arg(4) ;dst_stride + + mov rcx, 8 + + pxor xmm3, xmm3 + +next_vp8_masked_predictor_uv_wmt: + movq xmm0, [rsi] + movq xmm1, [rdi] + movq xmm2, [rbx] + + pand xmm0, xmm2 + pandn xmm2, xmm1 + por xmm0, xmm2 + movq [r11], xmm0 + + add r11, rdx + add rsi, rax + add rdi, rax + add rbx, 8 + + dec rcx + jnz next_vp8_masked_predictor_uv_wmt + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;unsigned int vp8_uv_from_y_mask( +; unsigned char *ymask, +; unsigned char *uvmask) +global sym(vp8_uv_from_y_mask) +sym(vp8_uv_from_y_mask): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;dst_ptr + + + mov rcx, 8 + + pxor xmm3, xmm3 + +next_p8_uv_from_y_mask: + movdqu xmm0, [rsi] + pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)] + movq [rdi],xmm0 + add rdi, 8 + add rsi,32 + + dec rcx + jnz next_p8_uv_from_y_mask + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +shuf1b: + db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0 + |