summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/alloccommon.c18
-rw-r--r--vp8/common/blockd.h20
-rw-r--r--vp8/common/coefupdateprobs.h177
-rw-r--r--vp8/common/common.h3
-rw-r--r--vp8/common/defaultcoefcounts.c179
-rw-r--r--vp8/common/defaultcoefcounts.h7
-rw-r--r--vp8/common/entropy.c67
-rw-r--r--vp8/common/entropy.h21
-rw-r--r--vp8/common/generic/systemdependent.c18
-rw-r--r--vp8/common/idct.h36
-rw-r--r--vp8/common/idctllm.c346
-rw-r--r--vp8/common/invtrans.c91
-rw-r--r--vp8/common/invtrans.h7
-rw-r--r--vp8/common/maskingmv.c855
-rw-r--r--vp8/common/onyxc_int.h4
-rw-r--r--vp8/common/quant_common.c45
-rw-r--r--vp8/common/reconintra4x4.c16
-rw-r--r--vp8/common/x86/mask_sse3.asm484
18 files changed, 2350 insertions, 44 deletions
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 376707ec6..869f6e20f 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -148,7 +148,16 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
}
void vp8_setup_version(VP8_COMMON *cm)
{
- switch (cm->version)
+ if (cm->version & 0x4)
+ {
+ if (!CONFIG_EXPERIMENTAL)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Bitstream was created by an experimental "
+ "encoder");
+ cm->experimental = 1;
+ }
+
+ switch (cm->version & 0x3)
{
case 0:
cm->no_lpf = 0;
@@ -174,13 +183,6 @@ void vp8_setup_version(VP8_COMMON *cm)
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 1;
break;
- default:
- /*4,5,6,7 are reserved for future use*/
- cm->no_lpf = 0;
- cm->filter_type = NORMAL_LOOPFILTER;
- cm->use_bilinear_mc_filter = 0;
- cm->full_pixel = 0;
- break;
}
}
void vp8_create_common(VP8_COMMON *oci)
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 84c52773c..d2ef84256 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -20,6 +20,7 @@ void vpx_log(const char *format, ...);
#include "treecoder.h"
#include "subpixel.h"
#include "vpx_ports/mem.h"
+#include "common.h"
#define TRUE 1
#define FALSE 0
@@ -29,6 +30,7 @@ void vpx_log(const char *format, ...);
#define DCPREDCNTTHRESH 3
#define MB_FEATURE_TREE_PROBS 3
+
#define MAX_MB_SEGMENTS 4
#define MAX_REF_LF_DELTAS 4
@@ -64,6 +66,10 @@ extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = ((A)!=0) + ((B)!=0);
+#if CONFIG_T8X8
+#define VP8_COMBINEENTROPYCONTEXTS_8x8( Dest, A1, B1, A2, B2) \
+ Dest = ((A1)!=0 || (A2)!=0) + ((B1)!=0 || (B2)!=0);
+#endif
typedef enum
{
@@ -157,7 +163,9 @@ typedef struct
MB_PREDICTION_MODE mode, uv_mode;
MV_REFERENCE_FRAME ref_frame;
int_mv mv;
-
+#if CONFIG_SEGMENTATION
+ unsigned char segment_flag;
+#endif
unsigned char partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
unsigned char need_to_clamp_mvs;
@@ -232,9 +240,13 @@ typedef struct
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
- vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
-
- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
+#if CONFIG_SEGMENTATION
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS + 3]; // Probability Tree used to code Segment number
+ unsigned char temporal_update;
+#else
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
+#endif
+ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 9e194dc9a..6fe5fcc6f 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -183,3 +183,180 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE
},
},
};
+#if CONFIG_T8X8
+const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] =
+{
+ {
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {219, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {239, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 209, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {239, 219, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {229, 209, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 193, 209, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {229, 198, 239, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 204, 204, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {219, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 198, 204, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {209, 193, 234, 249, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 249, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 214, 214, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {173, 193, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+ {
+ {
+ {255, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {224, 224, 219, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {229, 239, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 234, 224, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {224, 234, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {229, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {224, 255, 239, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+ {
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {224, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {234, 183, 214, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 193, 229, 255, 249, 255, 255, 255, 255, 255, 255, },
+ {229, 214, 234, 249, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 249, 255, 255, 249, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {229, 219, 249, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 249, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 193, 224, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {229, 204, 234, 249, 249, 255, 255, 255, 255, 255, 255, },
+ {255, 249, 249, 255, 244, 249, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 178, 224, 255, 249, 255, 255, 255, 255, 255, 255, },
+ {234, 224, 234, 249, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 183, 229, 255, 249, 255, 255, 255, 255, 255, 255, },
+ {234, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 249, 249, 255, 249, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 193, 224, 249, 255, 244, 255, 255, 255, 255, 255, },
+ {219, 224, 229, 255, 255, 249, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 249, 249, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 193, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {224, 224, 239, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {249, 244, 249, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+ {
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 239, 234, 244, 239, 244, 249, 255, 255, 255, 255, },
+ },
+ {
+ {255, 249, 239, 239, 244, 255, 255, 255, 255, 255, 255, },
+ {255, 249, 244, 255, 249, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 239, 255, 255, 249, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 244, 239, 239, 244, 255, 255, 255, 255, 255, 255, },
+ {255, 234, 239, 234, 249, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 229, 239, 234, 249, 244, 255, 255, 255, 255, },
+ },
+ {
+ {255, 239, 229, 239, 234, 234, 255, 255, 255, 255, 255, },
+ {255, 239, 234, 229, 244, 239, 255, 234, 255, 255, 255, },
+ {255, 229, 209, 229, 239, 234, 244, 229, 255, 249, 255, },
+ },
+ {
+ {255, 239, 234, 229, 244, 249, 255, 249, 255, 255, 255, },
+ {255, 234, 229, 244, 234, 249, 255, 249, 255, 255, 255, },
+ {255, 229, 239, 229, 249, 255, 255, 244, 255, 255, 255, },
+ },
+ {
+ {255, 239, 234, 239, 234, 239, 255, 249, 255, 255, 255, },
+ {255, 229, 234, 239, 239, 239, 255, 244, 255, 255, 255, },
+ {255, 229, 234, 239, 239, 244, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 219, 224, 229, 229, 234, 239, 224, 255, 255, 255, },
+ {255, 229, 229, 224, 234, 229, 239, 239, 255, 255, 255, },
+ {255, 229, 224, 239, 234, 239, 224, 224, 255, 249, 255, },
+ },
+ {
+ {255, 234, 229, 244, 229, 229, 255, 214, 255, 255, 255, },
+ {255, 239, 234, 239, 214, 239, 255, 209, 255, 255, 255, },
+ {249, 239, 219, 209, 219, 224, 239, 204, 255, 255, 255, },
+ },
+ },
+
+};
+#endif \ No newline at end of file
diff --git a/vp8/common/common.h b/vp8/common/common.h
index 9a93da991..999f79f2f 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -13,7 +13,7 @@
#define common_h 1
#include <assert.h>
-
+#include "vpx_config.h"
/* Interface header for common constant data structures and lookup tables */
#include "vpx_mem/vpx_mem.h"
@@ -38,5 +38,4 @@
#define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest));
-
#endif /* common_h */
diff --git a/vp8/common/defaultcoefcounts.c b/vp8/common/defaultcoefcounts.c
index b0e2e702a..34d1fb1d5 100644
--- a/vp8/common/defaultcoefcounts.c
+++ b/vp8/common/defaultcoefcounts.c
@@ -223,3 +223,182 @@ const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
},
},
};
+
+
+#if CONFIG_T8X8
+const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [MAX_ENTROPY_TOKENS] =
+{
+
+ { /* block Type 0 */
+ { /* Coeff Band 0 */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 1 */
+ { 21041, 13314, 3420, 592, 117, 0, 0, 0, 0, 0, 0, 11783},
+ { 48236, 6918, 586, 153, 0, 0, 0, 0, 0, 0, 0, 23137},
+ { 676112, 106685, 24701, 6003, 1426, 429, 165, 0, 0, 0, 0, 28910}
+ },
+ { /* Coeff Band 2 */
+ { 660107, 75227, 8451, 1345, 259, 0, 0, 0, 0, 0, 0, 0},
+ { 79164, 36835, 6865, 1185, 246, 47, 0, 0, 0, 0, 0, 2575},
+ { 19469, 14330, 3070, 579, 94, 6, 0, 0, 0, 0, 0, 44}
+ },
+ { /* Coeff Band 3 */
+ { 1978004, 235343, 28485, 3242, 271, 0, 0, 0, 0, 0, 0, 0},
+ { 228684, 106736, 21431, 2842, 272, 46, 0, 0, 0, 0, 0, 9266},
+ { 32470, 27496, 6852, 1386, 45, 93, 0, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 4 */
+ { 1911212, 224613, 49653, 13748, 2541, 568, 48, 0, 0, 0, 0, 0},
+ { 196670, 103472, 44473, 11490, 2432, 977, 72, 0, 0, 0, 0, 9447},
+ { 37876, 40417, 19142, 6069, 1799, 727, 51, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 5 */
+ { 3813399, 437714, 64387, 11312, 695, 219, 0, 0, 0, 0, 0, 0},
+ { 438288, 215917, 61905, 10194, 674, 107, 0, 0, 0, 0, 0, 17808},
+ { 99139, 93643, 30054, 5758, 802, 171, 0, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 6 */
+ { 12259383, 1625505, 234927, 46306, 8417, 1456, 151, 0, 0, 0, 0, 0},
+ { 1518161, 734287, 204240, 44228, 9462, 2240, 65, 0, 0, 0, 0, 107630},
+ { 292470, 258894, 94925, 25864, 6662, 2055, 170, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 7 */
+ { 9791308, 2118949, 169439, 16735, 1122, 0, 0, 0, 0, 0, 0, 0},
+ { 1500281, 752410, 123259, 13065, 1168, 47, 0, 0, 0, 0, 0, 707182},
+ { 193067, 142638, 31018, 4719, 516, 138, 0, 0, 0, 0, 0, 12439}
+ }
+ },
+ { /* block Type 1 */
+ { /* Coeff Band 0 */
+ { 16925, 10553, 852, 16, 63, 87, 47, 0, 0, 0, 0, 31232},
+ { 39777, 26839, 6822, 1908, 678, 456, 227, 168, 35, 0, 0, 46825},
+ { 17300, 16666, 4168, 1209, 492, 154, 118, 207, 0, 0, 0, 19608}
+ },
+ { /* Coeff Band 1 */
+ { 35882, 31722, 4625, 1270, 266, 237, 0, 0, 0, 0, 0, 0},
+ { 15426, 13894, 4482, 1305, 281, 43, 0, 0, 0, 0, 0, 18627},
+ { 3900, 6552, 3472, 1723, 746, 366, 115, 35, 0, 0, 0, 798}
+ },
+ { /* Coeff Band 2 */
+ { 21998, 29132, 3353, 679, 46, 0, 0, 0, 0, 0, 0, 0},
+ { 9098, 15767, 3794, 792, 268, 47, 0, 0, 0, 0, 0, 22402},
+ { 4007, 8472, 2844, 687, 217, 0, 0, 0, 0, 0, 0, 2739}
+ },
+ { /* Coeff Band 3 */
+ { 0, 31414, 2911, 682, 96, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 16515, 4425, 938, 124, 0, 0, 0, 0, 0, 0, 31369},
+ { 0, 4833, 2787, 1213, 150, 0, 0, 0, 0, 0, 0, 3744}
+ },
+ { /* Coeff Band 4 */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 5 */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 6 */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52762},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13326}
+ },
+ { /* Coeff Band 7 */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+ }
+ },
+ { /* block Type 2 */
+ { /* Coeff Band 0 */
+ { 4444, 1614, 120, 48, 0, 48, 0, 0, 0, 0, 0, 278},
+ { 192436, 103730, 24494, 9845, 4122, 1193, 102, 0, 0, 0, 0, 2577},
+ { 3473446, 2308716, 815510, 370374, 167797, 92152, 12073, 86, 0, 0, 0, 6801}
+ },
+ { /* Coeff Band 1 */
+ { 2150616, 1136388, 250011, 86888, 31434, 13746, 1243, 0, 0, 0, 0, 0},
+ { 1179945, 799802, 266012, 106787, 40809, 16486, 1546, 0, 0, 0, 0, 2673},
+ { 465128, 504130, 286989, 146259, 62380, 30192, 2866, 20, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 2 */
+ { 2157762, 1177519, 282665, 108499, 43389, 23224, 2597, 34, 0, 0, 0, 0},
+ { 1135685, 813705, 278079, 123255, 53935, 29492, 3152, 39, 0, 0, 0, 2978},
+ { 391894, 428037, 264216, 144306, 69326, 40281, 5541, 29, 0, 0, 0, 38}
+ },
+ { /* Coeff Band 3 */
+ { 6669109, 3468471, 782161, 288484, 115500, 51083, 4943, 41, 0, 0, 0, 0},
+ { 3454493, 2361636, 809524, 337663, 141343, 65036, 6361, 0, 0, 0, 0, 8730},
+ { 1231825, 1359522, 824686, 420784, 185517, 98731, 10973, 72, 0, 0, 0, 20}
+ },
+ { /* Coeff Band 4 */
+ { 7606203, 3452846, 659856, 191703, 49335, 14336, 450, 0, 0, 0, 0, 0},
+ { 3806506, 2379332, 691697, 224938, 61966, 18324, 766, 0, 0, 0, 0, 8193},
+ { 1270110, 1283728, 628775, 243378, 72617, 24897, 1087, 0, 0, 0, 0, 0}
+ },
+ { /* Coeff Band 5 */
+ { 15314169, 7436809, 1579928, 515790, 167453, 58305, 3502, 19, 0, 0, 0, 0},
+ { 7021286, 4667922, 1545706, 574463, 191793, 68748, 4048, 1, 0, 0, 0, 17222},
+ { 2011989, 2145878, 1185336, 534879, 195719, 79103, 5343, 4, 0, 0, 0, 37}
+ },
+ { /* Coeff Band 6 */
+ { 63458382, 25384462, 4208045, 1091050, 299011, 95242, 5238, 33, 0, 0, 0, 0},
+ { 25638401, 14694085, 3945978, 1195420, 344813, 117355, 6703, 0, 0, 0, 0, 216811},
+ { 5988177, 5824044, 2754413, 1077350, 370739, 139710, 9693, 38, 0, 0, 0, 1835}
+ },
+ { /* Coeff Band 7 */
+ { 74998348, 29342158, 2955001, 452912, 69631, 9516, 37, 0, 0, 0, 0, 0},
+ { 24762356, 13281085, 2409883, 436787, 68948, 10658, 36, 0, 0, 0, 0, 6614989},
+ { 3882867, 3224489, 1052289, 252890, 46967, 8548, 154, 0, 0, 0, 0, 194354}
+ }
+ },
+ { /* block Type 3 */
+ { /* Coeff Band 0 */
+ { 10583, 12059, 3155, 1041, 248, 175, 24, 2, 0, 0, 0, 5717},
+ { 42461, 41782, 13553, 4966, 1352, 855, 89, 0, 0, 0, 0, 15000},
+ { 4691125, 5045589, 2673566, 1089317, 378161, 160268, 18252, 813, 69, 13, 0, 49}
+ },
+ { /* Coeff Band 1 */
+ { 1535203, 1685686, 924565, 390329, 141709, 60523, 5983, 171, 0, 0, 0, 0},
+ { 1594021, 1793276, 1016078, 441332, 164159, 70843, 8098, 311, 0, 0, 0, 11312},
+ { 1225223, 1430184, 888492, 460713, 203286, 115149, 22061, 804, 7, 0, 0, 0}
+ },
+ { /* Coeff Band 2 */
+ { 1522386, 1590366, 799910, 303691, 96625, 37608, 3637, 180, 33, 11, 0, 0},
+ { 1682184, 1793869, 913649, 353520, 113674, 46309, 4736, 221, 18, 3, 0, 963},
+ { 1574580, 1740474, 954392, 417994, 151400, 67091, 8000, 536, 73, 10, 0, 63}
+ },
+ { /* Coeff Band 3 */
+ { 4963672, 5197790, 2585383, 982161, 313333, 118498, 16014, 536, 62, 0, 0, 0},
+ { 5223913, 5569803, 2845858, 1107384, 364949, 147841, 18296, 658, 11, 11, 0, 1866},
+ { 4042207, 4548894, 2608767, 1154993, 446290, 221295, 41054, 2438, 124, 20, 0, 0}
+ },
+ { /* Coeff Band 4 */
+ { 3857216, 4431325, 2670447, 1330169, 553301, 286825, 46763, 1917, 0, 0, 0, 0},
+ { 4226215, 4963701, 3046198, 1523923, 644670, 355519, 58792, 2525, 0, 0, 0, 1298},
+ { 3831873, 4580350, 3018580, 1660048, 797298, 502983, 123906, 7172, 16, 0, 0, 0}
+ },
+ { /* Coeff Band 5 */
+ { 8524543, 9285149, 4979435, 2039330, 683458, 266032, 22628, 270, 0, 0, 0, 0},
+ { 9432163, 10428088, 5715661, 2385738, 838389, 326264, 29981, 361, 0, 0, 0, 884},
+ { 9039066, 10368964, 6136765, 2862030, 1098269, 511668, 63105, 945, 14, 0, 0, 0}
+ },
+ { /* Coeff Band 6 */
+ { 33222872, 34748297, 17701695, 7214933, 2602336, 1191859, 187873, 12667, 390, 3, 0, 0},
+ { 34765051, 37140719, 19525578, 8268934, 3085012, 1473864, 246743, 15258, 736, 3, 0, 8403},
+ { 28591289, 32252393, 19037068, 9213729, 4020653, 2372354, 586420, 67428, 3920, 92, 7, 3}
+ },
+ { /* Coeff Band 7 */
+ { 68604786, 60777665, 19712887, 5656955, 1520443, 507166, 51829, 2466, 10, 0, 0, 0},
+ { 55447403, 51682540, 19008774, 5928582, 1706884, 595531, 65998, 3661, 101, 0, 0, 8468343},
+ { 28321970, 29149398, 13565882, 5258675, 1868588, 898041, 192023, 21497, 672, 17, 0, 1884921}
+ }
+ }
+ };
+#endif \ No newline at end of file
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index 7a1e28b7b..293e74269 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -18,4 +18,9 @@ extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
[PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
-#endif //__DEFAULTCOEFCOUNTS_H
+extern const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [MAX_ENTROPY_TOKENS];
+
+#endif \ No newline at end of file
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index 0eee60ec8..ca37aab22 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -59,6 +59,24 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
9, 12, 13, 10,
7, 11, 14, 15,
};
+#if CONFIG_T8X8
+DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
+ 5, 3, 6, 3, 5, 4, 6, 6,
+ 6, 5, 5, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7
+};
+DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+#endif
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
{
@@ -69,6 +87,9 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
};
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
+#if CONFIG_T8X8
+DECLARE_ALIGNED(64, short, vp8_default_zig_zag_mask_8x8[64]);//int64_t
+#endif
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
@@ -99,9 +120,15 @@ static const Prob Pcat2[] = { 165, 145};
static const Prob Pcat3[] = { 173, 148, 140};
static const Prob Pcat4[] = { 176, 155, 140, 135};
static const Prob Pcat5[] = { 180, 157, 141, 134, 130};
+#if CONFIG_EXTEND_QRANGE
+static const Prob Pcat6[] =
+{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+#else
static const Prob Pcat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+#endif
+
static vp8_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[22];
void vp8_init_scan_order_mask()
@@ -112,7 +139,12 @@ void vp8_init_scan_order_mask()
{
vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
}
-
+#if CONFIG_T8X8
+ for (i = 0; i < 64; i++)
+ {
+ vp8_default_zig_zag_mask_8x8[vp8_default_zig_zag1d_8x8[i]] = 1 << i;
+ }
+#endif
}
static void init_bit_tree(vp8_tree_index *p, int n)
@@ -135,7 +167,11 @@ static void init_bit_trees()
init_bit_tree(cat3, 3);
init_bit_tree(cat4, 4);
init_bit_tree(cat5, 5);
+#if CONFIG_EXTEND_QRANGE
+ init_bit_tree(cat6, 13);
+#else
init_bit_tree(cat6, 11);
+#endif
}
vp8_extra_bit_struct vp8_extra_bits[12] =
@@ -150,7 +186,11 @@ vp8_extra_bit_struct vp8_extra_bits[12] =
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
+#if CONFIG_EXTEND_QRANGE
+ { cat6, Pcat6, 13, 67},
+#else
{ cat6, Pcat6, 11, 67},
+#endif
{ 0, 0, 0, 0}
};
#include "defaultcoefcounts.h"
@@ -183,6 +223,31 @@ void vp8_default_coef_probs(VP8_COMMON *pc)
while (++i < COEF_BANDS);
}
while (++h < BLOCK_TYPES);
+#if CONFIG_T8X8
+ h = 0;
+ do
+ {
+ int i = 0;
+
+ do
+ {
+ int k = 0;
+
+ do
+ {
+ unsigned int branch_ct [ENTROPY_NODES] [2];
+ vp8_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+ pc->fc.coef_probs_8x8 [h][i][k], branch_ct, vp8_default_coef_counts_8x8 [h][i][k],
+ 256, 1);
+
+ }
+ while (++k < PREV_COEF_CONTEXTS);
+ }
+ while (++i < COEF_BANDS);
+ }
+ while (++h < BLOCK_TYPES);
+#endif
}
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 66d282b61..d3e841c3e 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -14,7 +14,7 @@
#include "treecoder.h"
#include "blockd.h"
-
+#include "common.h"
/* Coefficient token alphabet */
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
@@ -27,7 +27,7 @@
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
-#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
+#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define MAX_ENTROPY_TOKENS 12
@@ -50,8 +50,11 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
+#if CONFIG_EXTEND_QRANGE
+#define DCT_MAX_VALUE 8192
+#else
#define DCT_MAX_VALUE 2048
-
+#endif
/* Coefficients are predicted via a 3-dimensional probability table. */
@@ -64,6 +67,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
#define COEF_BANDS 8
extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
+#if CONFIG_T8X8
+extern DECLARE_ALIGNED(64, const unsigned char, vp8_coef_bands_8x8[64]);
+#endif
/* Inside dimension is 3-valued measure of nearby complexity, that is,
the extent to which nearby coefficients are nonzero. For the first
@@ -87,14 +93,19 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-
+#if CONFIG_T8X8
+extern const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#endif
struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);
-
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
extern short vp8_default_zig_zag_mask[16];
+#if CONFIG_T8X8
+extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
+extern short vp8_default_zig_zag_mask_8x8[64];//int64_t
+#endif
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
void vp8_coef_tree_initialize(void);
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c61629407..1acc0157b 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -75,7 +75,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
-
+#if CONFIG_T8X8
+ rtcd->idct.idct8 = vp8_short_idct8x8_c;
+ rtcd->idct.idct8_1 = vp8_short_idct8x8_1_c;
+ rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c;
+ rtcd->idct.ihaar2 = vp8_short_ihaar2x2_c;
+ rtcd->idct.ihaar2_1 = vp8_short_ihaar2x2_1_c;
+#endif
rtcd->recon.copy16x16 = vp8_copy_mem16x16_c;
rtcd->recon.copy8x8 = vp8_copy_mem8x8_c;
rtcd->recon.copy8x4 = vp8_copy_mem8x4_c;
@@ -129,9 +135,19 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_arch_x86_common_init(ctx);
#endif
+
#if ARCH_ARM
vp8_arch_arm_common_init(ctx);
#endif
+#if CONFIG_EXTEND_QRANGE
+ rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
+ rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
+ rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
+ rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
+ rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
+
+#endif
+
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index f5fd94dfd..d1890b9e5 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -31,6 +31,34 @@
#include "arm/idct_arm.h"
#endif
+#if CONFIG_T8X8
+#ifndef vp8_idct_idct8
+#define vp8_idct_idct8 vp8_short_idct8x8_c
+#endif
+extern prototype_idct(vp8_idct_idct8);
+
+#ifndef vp8_idct_idct8_1
+#define vp8_idct_idct8_1 vp8_short_idct8x8_1_c
+#endif
+extern prototype_idct(vp8_idct_idct8_1);
+
+#ifndef vp8_idct_ihaar2
+#define vp8_idct_ihaar2 vp8_short_ihaar2x2_c
+#endif
+extern prototype_idct(vp8_idct_ihaar2);
+
+#ifndef vp8_idct_ihaar2_1
+#define vp8_idct_ihaar2_1 vp8_short_ihaar2x2_1_c
+#endif
+extern prototype_idct(vp8_idct_ihaar2_1);
+
+#ifndef vp8_idct_idct1_scalar_add_8x8
+#define vp8_idct_idct1_scalar_add_8x8 vp8_dc_only_idct_add_8x8_c
+#endif
+extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add_8x8);
+
+#endif
+
#ifndef vp8_idct_idct1
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_c
#endif
@@ -69,6 +97,14 @@ typedef struct
vp8_second_order_fn_t iwalsh1;
vp8_second_order_fn_t iwalsh16;
+
+#if CONFIG_T8X8
+ vp8_idct_fn_t idct8;
+ vp8_idct_fn_t idct8_1;
+ vp8_idct_scalar_add_fn_t idct1_scalar_add_8x8;
+ vp8_idct_fn_t ihaar2;
+ vp8_idct_fn_t ihaar2_1;
+#endif
} vp8_idct_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 196062df6..4f3a01b1b 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -22,9 +22,15 @@
* so
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
**************************************************************************/
+#include "vpx_ports/config.h"
+
+
+#include <math.h>
+
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int rounding = 0;
+
void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
{
int i;
@@ -75,11 +81,19 @@ void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
d1 = temp1 + temp2;
+#if !CONFIG_EXTEND_QRANGE
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
+#else
+ op[0] = (a1 + d1 + 16) >> 5;
+ op[3] = (a1 - d1 + 16) >> 5;
+
+ op[1] = (b1 + c1 + 16) >> 5;
+ op[2] = (b1 - c1 + 16) >> 5;
+#endif
ip += shortpitch;
op += shortpitch;
@@ -92,8 +106,11 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
int a1;
short *op = output;
int shortpitch = pitch >> 1;
+#if !CONFIG_EXTEND_QRANGE
a1 = ((input[0] + 4) >> 3);
-
+#else
+ a1 = ((input[0] + 16) >> 5);
+#endif
for (i = 0; i < 4; i++)
{
op[0] = a1;
@@ -106,7 +123,11 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
{
+#if !CONFIG_EXTEND_QRANGE
int a1 = ((input_dc + 4) >> 3);
+#else
+ int a1 = ((input_dc + 16) >> 5);
+#endif
int r, c;
for (r = 0; r < 4; r++)
@@ -168,11 +189,17 @@ void vp8_short_inv_walsh4x4_c(short *input, short *output)
c2 = a1 - b1;
d2 = d1 - c1;
+#if !CONFIG_EXTEND_QRANGE
op[0] = (a2 + 3) >> 3;
op[1] = (b2 + 3) >> 3;
op[2] = (c2 + 3) >> 3;
op[3] = (d2 + 3) >> 3;
-
+#else
+ op[0] = (a2 + 1) >> 2;
+ op[1] = (b2 + 1) >> 2;
+ op[2] = (c2 + 1) >> 2;
+ op[3] = (d2 + 1) >> 2;
+#endif
ip += 4;
op += 4;
}
@@ -184,7 +211,11 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
int a1;
short *op = output;
- a1 = ((input[0] + 3) >> 3);
+#if !CONFIG_EXTEND_QRANGE
+ a1 = (input[0] + 3 )>> 3;
+#else
+ a1 = (input[0] + 1 )>> 2;
+#endif
for (i = 0; i < 4; i++)
{
@@ -195,3 +226,312 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
op += 4;
}
}
+
+#if CONFIG_T8X8
+
+#define FAST_IDCT_8X8
+
+void vp8_short_idct8x8_1_c(short *input, short *output, int pitch)
+{
+ int i, b;
+ int a1;
+ short *op = output;
+ short *orig_op = output;
+ int shortpitch = pitch >> 1;
+ a1 = ((input[0] + 4) >> 3);
+ for (b = 0; b < 4; b++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ op[0] = a1;
+ op[1] = a1;
+ op[2] = a1;
+ op[3] = a1;
+ op += shortpitch;
+ }
+ op = orig_op + (b+1)%2*4 +(b+1)/2*4*shortpitch;
+ }
+}
+
+void vp8_dc_only_idct_add_8x8_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+{
+ int a1 = ((input_dc + 4) >> 3);
+ int r, c, b;
+ unsigned char *orig_pred = pred_ptr;
+ unsigned char *orig_dst = dst_ptr;
+ for (b = 0; b < 4; b++)
+ {
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int a = a1 + pred_ptr[c] ;
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a ;
+ }
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
+ }
+ dst_ptr = orig_dst + (b+1)%2*4 + (b+1)/2*4*stride;
+ pred_ptr = orig_pred + (b+1)%2*4 + (b+1)/2*4*pitch;
+ }
+}
+
+#ifdef FAST_IDCT_8X8
+
+#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
+#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
+#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
+#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
+#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
+#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
+
+/* row (horizontal) IDCT
+ *
+ * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
+ * ( k + - ) * l ) l=0 8 2
+ *
+ * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
+
+static void idctrow (int *blk)
+{
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
+ (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3])))
+ {
+ blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+ return;
+ }
+ x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */
+
+ /* first stage */
+ x8 = W7 * (x4 + x5);
+ x4 = x8 + (W1 - W7) * x4;
+ x5 = x8 - (W1 + W7) * x5;
+ x8 = W3 * (x6 + x7);
+ x6 = x8 - (W3 - W5) * x6;
+ x7 = x8 - (W3 + W5) * x7;
+
+ /* second stage */
+ x8 = x0 + x1;
+ x0 -= x1;
+ x1 = W6 * (x3 + x2);
+ x2 = x1 - (W2 + W6) * x2;
+ x3 = x1 + (W2 - W6) * x3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x8 + x3;
+ x8 -= x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[0] = (x7 + x1) >> 8;
+ blk[1] = (x3 + x2) >> 8;
+ blk[2] = (x0 + x4) >> 8;
+ blk[3] = (x8 + x6) >> 8;
+ blk[4] = (x8 - x6) >> 8;
+ blk[5] = (x0 - x4) >> 8;
+ blk[6] = (x3 - x2) >> 8;
+ blk[7] = (x7 - x1) >> 8;
+}
+
+/* column (vertical) IDCT
+ *
+ * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] *
+ * cos( -- * ( k + - ) * l ) l=0 8 2
+ *
+ * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
+static void idctcol (int *blk)
+{
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
+ (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | (x7 = blk[8 * 3])))
+ {
+ blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] = blk[8 * 7] =
+ ((blk[8 * 0] + 32) >> 6);
+ return;
+ }
+ x0 = (blk[8 * 0] << 8) + 8192;
+
+ /* first stage */
+ x8 = W7 * (x4 + x5) + 4;
+ x4 = (x8 + (W1 - W7) * x4) >> 3;
+ x5 = (x8 - (W1 + W7) * x5) >> 3;
+ x8 = W3 * (x6 + x7) + 4;
+ x6 = (x8 - (W3 - W5) * x6) >> 3;
+ x7 = (x8 - (W3 + W5) * x7) >> 3;
+
+ /* second stage */
+ x8 = x0 + x1;
+ x0 -= x1;
+ x1 = W6 * (x3 + x2) + 4;
+ x2 = (x1 - (W2 + W6) * x2) >> 3;
+ x3 = (x1 + (W2 - W6) * x3) >> 3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x8 + x3;
+ x8 -= x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[8 * 0] = (x7 + x1) >> 14;
+ blk[8 * 1] = (x3 + x2) >> 14;
+ blk[8 * 2] = (x0 + x4) >> 14;
+ blk[8 * 3] = (x8 + x6) >> 14;
+ blk[8 * 4] = (x8 - x6) >> 14;
+ blk[8 * 5] = (x0 - x4) >> 14;
+ blk[8 * 6] = (x3 - x2) >> 14;
+ blk[8 * 7] = (x7 - x1) >> 14;
+}
+
+#define TX_DIM 8
+void vp8_short_idct8x8_c(short *coefs, short *block, int pitch)
+// an approximate 8x8 dct implementation, but not used
+{
+ int X[TX_DIM*TX_DIM];
+ int i,j;
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < TX_DIM; i++)
+ {
+ for (j = 0; j < TX_DIM; j++)
+ {
+ X[i * TX_DIM + j] = (int)coefs[i * TX_DIM + j];
+ }
+ }
+ for (i = 0; i < 8; i++)
+ idctrow (X + 8 * i);
+
+ for (i = 0; i < 8; i++)
+ idctcol (X + i);
+
+ for (i = 0; i < TX_DIM; i++)
+ {
+ for (j = 0; j < TX_DIM; j++)
+ {
+ block[i*shortpitch+j] = X[i * TX_DIM + j]>>1;
+ }
+ }
+}
+
+#else
+
+/* This is really for testing */
+void vp8_short_idct8x8_c(short *input, short *output, int pitch)
+{
+ int X[8][8];
+ double C[8][8]={{0.0}}, Ct[8][8]={{0.0}}, temp[8][8]={{0.0}};
+ int i,j,k;
+ double temp1=0.0;
+ double pi = atan( 1.0 ) * 4.0;
+ //static int count=0;
+
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ X[i][j] = input[i * 8 + j];
+ }
+ }
+
+ // TODO: DCT matrix should be calculated once for all
+ for ( j = 0 ; j < 8 ; j++ ) {
+ C[ 0 ][ j ] = 1.0 / sqrt( (double) 8 );
+ Ct[ j ][ 0 ] = C[ 0 ][ j ];
+ }
+ for ( i = 1 ; i < 8 ; i++ ) {
+ for ( j = 0 ; j < 8 ; j++ ) {
+ C[ i ][ j ] = sqrt( 2.0 / 8 ) *
+ cos( pi * ( 2 * j + 1 ) * i / ( 2.0 * 8 ) );
+ Ct[ j ][ i ] = C[ i ][ j ];
+ }
+ }
+ /* MatrixMultiply( temp, input, C ); */
+ for ( i = 0 ; i < 8 ; i++ ) {
+ for ( j = 0 ; j < 8 ; j++ ) {
+ temp[ i ][ j ] = 0.0;
+ for ( k = 0 ; k < 8 ; k++ )
+ temp[ i ][ j ] += X[ i ][ k ] * C[ k ][ j ];
+ }
+ }
+
+ /* MatrixMultiply( output, Ct, temp ); */
+ for ( i = 0 ; i < 8 ; i++ ) {
+ for ( j = 0 ; j < 8 ; j++ ) {
+ temp1 = 0.0;
+ for ( k = 0 ; k < 8 ; k++ )
+ temp1 += Ct[ i ][ k ] * temp[ k ][ j ];
+ X[ i ][ j ] = floor( temp1/ 2.0 + 0.5);
+ }
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ output[i*shortpitch+j] = X[i][j];
+ }
+ }
+}
+#endif
+
+void vp8_short_ihaar2x2_c(short *input, short *output, int pitch)
+{
+ int i, x;
+ short *ip = input; //0,1, 4, 8
+ short *op = output;
+ for (i = 0; i < 16; i++)
+ {
+ op[i] = 0;
+ }
+
+ x = (ip[0] + ip[1] + ip[4] + ip[8]);
+ op[0] = (x>=0?x+1:x-1)>>2;
+ x = (ip[0] - ip[1] + ip[4] - ip[8]);
+ op[1] = (x>=0?x+1:x-1)>>2;
+ x = (ip[0] + ip[1] - ip[4] - ip[8]);
+ op[4] = (x>=0?x+1:x-1)>>2;
+ x = (ip[0] - ip[1] - ip[4] + ip[8]);
+ op[8] = (x>=0?x+1:x-1)>>2;
+}
+
+void vp8_short_ihaar2x2_1_c(short *input, short *output, int pitch)
+{
+ int a1;
+ short *ip = input;
+ short *op = output;
+ a1 = ((ip[0]>=0?ip[0]+1:ip[0]-1) >> 2);
+ op[0] = a1;
+ op[2] = a1;
+ op[8] = a1;
+ op[10] = a1;
+
+}
+#endif
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 81a3f2d89..d361b654a 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -24,13 +24,24 @@ static void recon_dcblock(MACROBLOCKD *x)
}
}
+#if CONFIG_T8X8
+static void recon_dcblock_8x8(MACROBLOCKD *x)
+{
+ BLOCKD *b = &x->block[24]; //for coeff 0, 2, 8, 10
+ x->block[0].dqcoeff[0] = b->diff[0];
+ x->block[4].dqcoeff[0] = b->diff[1];
+ x->block[8].dqcoeff[0] = b->diff[4];
+ x->block[12].dqcoeff[0] = b->diff[8];
+
+}
+#endif
void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch)
{
- if (b->eob > 1)
- IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch);
- else
+ if (b->eob <= 1)
IDCT_INVOKE(rtcd, idct1)(b->dqcoeff, b->diff, pitch);
+ else
+ IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch);
}
@@ -86,3 +97,77 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
}
}
+
+#if CONFIG_T8X8
+void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch)//pay attention to use when 8x8
+{
+ // int b,i;
+ //if (b->eob > 1)
+ IDCT_INVOKE(rtcd, idct8)(input_dqcoeff, output_coeff, pitch);
+ //else
+ //IDCT_INVOKE(rtcd, idct8_1)(b->dqcoeff, b->diff, pitch);//pitch
+
+}
+
+
+void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+ int i;
+
+ // do 2nd order transform on the dc block
+ IDCT_INVOKE(rtcd, ihaar2)(x->block[24].dqcoeff, x->block[24].diff, 8);
+
+ recon_dcblock_8x8(x); //need to change for 8x8
+ for (i = 0; i < 9; i += 8)
+ {
+ vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32);
+ }
+ for (i = 2; i < 11; i += 8)
+ {
+ vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32);
+ }
+
+}
+void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+ int i;
+
+ for (i = 16; i < 24; i += 4)
+ {
+ vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16);
+ }
+
+}
+
+
+void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+ int i;
+
+ if (x->mode_info_context->mbmi.mode != B_PRED &&
+ x->mode_info_context->mbmi.mode != SPLITMV)
+ {
+ // do 2nd order transform on the dc block
+
+ IDCT_INVOKE(rtcd, ihaar2)(&x->block[24].dqcoeff[0], x->block[24].diff, 8);//dqcoeff[0]
+ recon_dcblock_8x8(x); //need to change for 8x8
+
+ }
+
+ for (i = 0; i < 9; i += 8)
+ {
+ vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32);
+ }
+ for (i = 2; i < 11; i += 8)
+ {
+ vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32);
+ }
+
+
+ for (i = 16; i < 24; i += 4)
+ {
+ vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16);
+ }
+
+}
+#endif
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index b3ffb7073..1466a5844 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -20,4 +20,11 @@ extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBL
extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+#if CONFIG_T8X8
+extern void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch);
+extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+#endif
+
#endif
diff --git a/vp8/common/maskingmv.c b/vp8/common/maskingmv.c
new file mode 100644
index 000000000..d01a18fc8
--- /dev/null
+++ b/vp8/common/maskingmv.c
@@ -0,0 +1,855 @@
+/*
+ ============================================================================
+ Name : maskingmv.c
+ Author : jimbankoski
+ Version :
+ Copyright : Your copyright notice
+ Description : Hello World in C, Ansi-style
+ ============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+extern unsigned int vp8_sad16x16_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int max_err);
+
+extern void vp8_sad16x16x3_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int *results);
+
+extern int vp8_growmaskmb_sse3(
+ unsigned char *om,
+ unsigned char *nm);
+
+extern void vp8_makemask_sse3(
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ unsigned char *ym,
+ int yp,
+ int uvp,
+ int ys,
+ int us,
+ int vs,
+ int yt,
+ int ut,
+ int vt);
+
+unsigned int vp8_sad16x16_unmasked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_sad16x16_masked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_masked_predictor_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_masked_predictor_uv_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_uv_from_y_mask(
+ unsigned char *ymask,
+ unsigned char *uvmask);
+int yp=16;
+unsigned char sxy[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90
+};
+
+unsigned char sts[]=
+{
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+};
+unsigned char str[]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+unsigned char y[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40
+};
+int uvp=8;
+unsigned char u[]=
+{
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+84,70,70,90,90,90,17,17,
+84,70,70,90,90,90,17,17,
+80,70,70,90,90,90,17,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17
+};
+
+unsigned char v[]=
+{
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80
+};
+
+unsigned char ym[256];
+unsigned char uvm[64];
+typedef struct
+{
+ unsigned char y;
+ unsigned char yt;
+ unsigned char u;
+ unsigned char ut;
+ unsigned char v;
+ unsigned char vt;
+ unsigned char use;
+} COLOR_SEG_ELEMENT;
+
+/*
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+};
+*/
+
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 79,44,92,44, 237,60, 1},
+};
+
+unsigned char pixel_mask(unsigned char y,unsigned char u,unsigned char v,
+ COLOR_SEG_ELEMENT sgm[],
+ int c)
+{
+ COLOR_SEG_ELEMENT *s=sgm;
+ unsigned char m =0;
+ int i;
+ for(i=0;i<c;i++,s++)
+ m |= ( abs(y-s->y)< s->yt &&
+ abs(u-s->u)< s->ut &&
+ abs(v-s->v)< s->vt ? 255 : 0 );
+
+ return m;
+}
+int neighbors[256][8];
+int makeneighbors(void)
+{
+ int i,j;
+ for(i=0;i<256;i++)
+ {
+ int r=(i>>4),c=(i&15);
+ int ni=0;
+ for(j=0;j<8;j++)
+ neighbors[i][j]=i;
+ for(j=0;j<256;j++)
+ {
+ int nr=(j>>4),nc=(j&15);
+ if(abs(nr-r)<2&&abs(nc-c)<2)
+ neighbors[i][ni++]=j;
+ }
+ }
+ return 0;
+}
+void grow_ymask(unsigned char *ym)
+{
+ unsigned char nym[256];
+ int i,j;
+
+ for(i=0;i<256;i++)
+ {
+ nym[i]=ym[i];
+ for(j=0;j<8;j++)
+ {
+ nym[i]|=ym[neighbors[i][j]];
+ }
+ }
+ for(i=0;i<256;i++)
+ ym[i]=nym[i];
+}
+void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
+ unsigned char *ym, unsigned char *uvm,
+ int yp, int uvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count)
+{
+ int r,c;
+ unsigned char *oym = ym;
+
+ memset(ym,20,256);
+ for(r=0;r<8;r++,uvm+=8,u+=uvp,v+=uvp,y+=(yp+yp),ym+=32)
+ for(c=0;c<8;c++)
+ {
+ int y1=y[c<<1];
+ int u1=u[c];
+ int v1=v[c];
+ int m = pixel_mask(y1,u1,v1,sgm,count);
+ uvm[c] = m;
+ ym[c<<1] = uvm[c];// = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
+ ym[(c<<1)+1] = pixel_mask(y[1+(c<<1)],u[c],v[c],sgm,count);
+ ym[(c<<1)+16] = pixel_mask(y[yp+(c<<1)],u[c],v[c],sgm,count);
+ ym[(c<<1)+17] = pixel_mask(y[1+yp+(c<<1)],u[c],v[c],sgm,count);
+ }
+ grow_ymask(oym);
+}
+
+int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym )
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+ for(j=0;j<16;j++)
+ if(ym[j])
+ sad+= abs(src[j]-dst[j]);
+
+ return sad;
+}
+
+int compare_masks(unsigned char *sym, unsigned char *ym)
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,sym += 16,ym+=16)
+ for(j=0;j<16;j++)
+ sad+= (sym[j]!=ym[j]?1:0);
+
+ return sad;
+}
+int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym)
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+ for(j=0;j<16;j++)
+ if(!ym[j])
+ sad+= abs(src[j]-dst[j]);
+
+ return sad;
+}
+int masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm)
+{
+ int i,j;
+
+ unsigned char ym[256];
+ unsigned char uvm[64];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0 ;
+ int beste=256;
+ int bmi=-32,bmj=-32;
+ int bui=-32,buj=-32;
+ int beste1=256;
+ int bmi1=-32,bmj1=-32;
+ int bui1=-32,buj1=-32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+ e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ //bui=0;buj=0;
+ // best mv masked destination
+ make_mb_mask(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+ dym, duvm, dyp, duvp,sgm,count);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = masked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+ beste1=beste+obeste;
+ bmi1=bmi;bmj1=bmj;
+ bui1=bui;buj1=buj;
+
+ beste = 0xffffffff;
+ // source mask
+ make_mb_mask(y,u, v, ym, uvm, yp, uvp,sgm,count);
+
+ // find best mask
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+ e = compare_masks(ym, dym);
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+
+
+ // best mv masked destination
+ make_mb_mask(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+ dym, duvm, dyp, duvp,sgm,count);
+
+ obeste = masked_sad(y, yp, dy+bmi*dyp+bmj, dyp, dym );
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ beste += obeste;
+
+
+ if(beste<beste1)
+ {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ }
+ else
+ {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+
+ }
+ return 0;
+}
+
+int predict(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym, unsigned char *prd )
+{
+ int i,j;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16, prd+=16)
+ for(j=0;j<16;j++)
+ prd[j]=(ym[j] ? src[j]:dst[j]);
+ return 0;
+}
+
+int fast_masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm)
+{
+ int i,j;
+
+ unsigned char ym[256];
+ unsigned char ym2[256];
+ unsigned char uvm[64];
+ unsigned char dym2[256];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0 ;
+ int beste=256;
+ int bmi=-32,bmj=-32;
+ int bui=-32,buj=-32;
+ int beste1=256;
+ int bmi1=-32,bmj1=-32;
+ int bui1=-32,buj1=-32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+#if 0
+ for(i=0;i<16;i++)
+ {
+ unsigned char *dy = i*yp + y;
+ for(j=0;j<16;j++)
+ printf("%2x",dy[j]);
+ printf("\n");
+ }
+ printf("\n");
+
+ for(i=-32;i<48;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<48;j++)
+ printf("%2x",dyz[j]);
+ printf("\n");
+ }
+#endif
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ //bui=0;buj=0;
+ // best mv masked destination
+
+ vp8_makemask_sse3(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+ dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = vp8_sad16x16_masked_wmt(y, yp, dyz+j, dyp, dym2 );
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+ beste1=beste+obeste;
+ bmi1=bmi;bmj1=bmj;
+ bui1=bui;buj1=buj;
+
+ // source mask
+ vp8_makemask_sse3(y,u, v,
+ ym, yp, uvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(ym,ym2);
+
+ // find best mask
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ e = compare_masks(ym2, dym2);
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+
+ vp8_makemask_sse3(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+ dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ obeste = vp8_sad16x16_masked_wmt(y, yp, dy+bmi*dyp+bmj, dyp, dym2 );
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ beste += obeste;
+
+ if(beste<beste1)
+ {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ }
+ else
+ {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+ beste=beste1;
+
+ }
+ return beste;
+}
+
+int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
+ int ymp, int uvmp,
+ unsigned char *yp, unsigned char *up, unsigned char *vp,
+ int ypp, int uvpp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int mi,
+ int mj,
+ int ui,
+ int uj,
+ int wm)
+{
+ int i,j;
+ unsigned char dym[256];
+ unsigned char dym2[256];
+ unsigned char duvm[64];
+ unsigned char *yu=ym,*uu=um, *vu=vm;
+
+ unsigned char *dym3=dym2;
+
+ ym+=mi*ymp+mj;
+ um+=mi/2*uvmp+mj/2;
+ vm+=mi/2*uvmp+mj/2;
+
+ yu+=ui*ymp+uj;
+ uu+=ui/2*uvmp+uj/2;
+ vu+=ui/2*uvmp+uj/2;
+
+ // best mv masked destination
+ if(wm)
+ vp8_makemask_sse3(ym,um, vm, dym, ymp, uvmp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+ else
+ vp8_makemask_sse3(yu,uu, vu, dym, ymp, uvmp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+ vp8_masked_predictor_wmt(ym,yu,ymp,yp,ypp,dym3);
+ vp8_uv_from_y_mask(dym3,duvm);
+ vp8_masked_predictor_uv_wmt(um,uu,uvmp,up,uvpp,duvm);
+ vp8_masked_predictor_uv_wmt(vm,vu,uvmp,vp,uvpp,duvm);
+
+ return 0;
+}
+
+unsigned char f0p[1280*720*3/2];
+unsigned char f1p[1280*720*3/2];
+unsigned char prd[1280*720*3/2];
+unsigned char msk[1280*720*3/2];
+
+
+int mainz(int argc, char *argv[]) {
+
+ FILE *f=fopen(argv[1],"rb");
+ FILE *g=fopen(argv[2],"wb");
+ int w=atoi(argv[3]),h=atoi(argv[4]);
+ int y_stride=w,uv_stride=w/2;
+ int r,c;
+ unsigned char *f0=f0p,*f1=f1p,*t;
+ unsigned char ym[256],uvm[64];
+ unsigned char ym2[256],uvm2[64];
+ unsigned char ym3[256],uvm3[64];
+ int a,b;
+
+ COLOR_SEG_ELEMENT last={ 20,20,20,20, 230,20, 1},best;
+#if 0
+ makeneighbors();
+ COLOR_SEG_ELEMENT segmentation[]=
+ {
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+ };
+ make_mb_mask(y, u, v,ym2,uvm2,16,8,segmentation,1);
+
+ vp8_makemask_sse3(y,u,v,ym, (int) 16,(int) 8,
+ (int) segmentation[0].y,(int) segmentation[0].u,(int) segmentation[0].v,
+ segmentation[0].yt,segmentation[0].ut,segmentation[0].vt);
+
+ vp8_growmaskmb_sse3(ym,ym3);
+
+ a = vp8_sad16x16_masked_wmt(str,16,sts,16,ym3);
+ b = vp8_sad16x16_unmasked_wmt(str,16,sts,16,ym3);
+
+ vp8_masked_predictor_wmt(str,sts,16,ym,16,ym3);
+
+ vp8_uv_from_y_mask(ym3,uvm3);
+
+ return 4;
+#endif
+ makeneighbors();
+
+
+ memset(prd,128,w*h*3/2);
+
+ fread(f0,w*h*3/2,1,f);
+
+ while(!feof(f))
+ {
+ unsigned char *ys=f1,*yd=f0,*yp=prd;
+ unsigned char *us=f1+w*h,*ud=f0+w*h,*up=prd+w*h;
+ unsigned char *vs=f1+w*h*5/4,*vd=f0+w*h*5/4,*vp=prd+w*h*5/4;
+ fread(f1,w*h*3/2,1,f);
+
+ ys+=32*y_stride;yd+=32*y_stride;yp+=32*y_stride;
+ us+=16*uv_stride;ud+=16*uv_stride;up+=16*uv_stride;
+ vs+=16*uv_stride;vd+=16*uv_stride;vp+=16*uv_stride;
+ for(r=32;r<h-32;r+=16,
+ ys+=16*w,yd+=16*w,yp+=16*w,
+ us+=8*uv_stride,ud+=8*uv_stride,up+=8*uv_stride,
+ vs+=8*uv_stride,vd+=8*uv_stride,vp+=8*uv_stride)
+ {
+ for(c=32;c<w-32;c+=16)
+ {
+ int mi,mj,ui,uj,wm;
+ int bmi,bmj,bui,buj,bwm;
+ unsigned char ym[256];
+
+ if(vp8_sad16x16_sse3( ys+c,y_stride, yd+c,y_stride,0xffff) == 0)
+ bmi=bmj=bui=buj=bwm=0;
+ else
+ {
+ COLOR_SEG_ELEMENT cs[5];
+ int j;
+ unsigned int beste=0xfffffff;
+ unsigned int bestj=0;
+
+ // try color from last mb segmentation
+ cs[0] = last;
+
+ // try color segs from 4 pixels in mb recon as segmentation
+ cs[1].y = yd[c + y_stride + 1];cs[1].u = ud[c/2 + uv_stride];
+ cs[1].v = vd[c/2 + uv_stride];
+ cs[1].yt = cs[1].ut = cs[1].vt = 20;
+ cs[2].y = yd[c + w + 14];
+ cs[2].u = ud[c/2 + uv_stride+7];
+ cs[2].v = vd[c/2 + uv_stride+7];
+ cs[2].yt = cs[2].ut = cs[2].vt = 20;
+ cs[3].y = yd[c + w*14 + 1];
+ cs[3].u = ud[c/2 + uv_stride*7];
+ cs[3].v = vd[c/2 + uv_stride*7];
+ cs[3].yt = cs[3].ut = cs[3].vt = 20;
+ cs[4].y = yd[c + w*14 + 14];
+ cs[4].u = ud[c/2 + uv_stride*7+7];
+ cs[4].v = vd[c/2 + uv_stride*7+7];
+ cs[4].yt = cs[4].ut = cs[4].vt = 20;
+
+ for(j=0;j<5;j++)
+ {
+ int e;
+
+ e = fast_masked_motion_search(
+ ys+c, us+c/2, vs+c/2, y_stride, uv_stride,
+ yd+c, ud+c/2, vd+c/2, y_stride, uv_stride,
+ &cs[j], 1, &mi,&mj,&ui,&uj,&wm);
+
+ if(e<beste)
+ {
+ bmi=mi;bmj=mj;bui=ui;buj=uj,bwm=wm;
+ bestj=j;
+ beste=e;
+ }
+ }
+ best = cs[bestj];
+ //best = segmentation[0];
+ last = best;
+ }
+ predict_all(yd+c, ud+c/2, vd+c/2, w, uv_stride,
+ yp+c, up+c/2, vp+c/2, w, uv_stride,
+ &best, 1, bmi,bmj,bui,buj,bwm);
+
+ }
+ }
+ fwrite(prd,w*h*3/2,1,g);
+ t=f0;
+ f0=f1;
+ f1=t;
+
+ }
+ fclose(f);
+ fclose(g);
+ return;
+}
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 4356b5133..246fa116d 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -44,6 +44,9 @@ typedef struct frame_contexts
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_T8X8
+ vp8_prob coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#endif
MV_CONTEXT mvc[2];
MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */
} FRAME_CONTEXT;
@@ -121,6 +124,7 @@ typedef struct VP8Common
int mode_info_stride;
/* profile settings */
+ int experimental;
int mb_no_coeff_skip;
int no_lpf;
int use_bilinear_mc_filter;
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index e9833fe33..b8e6e2972 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -11,6 +11,8 @@
#include "quant_common.h"
+
+#if !CONFIG_EXTEND_QRANGE
static const int dc_qlookup[QINDEX_RANGE] =
{
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
@@ -34,7 +36,32 @@ static const int ac_qlookup[QINDEX_RANGE] =
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
};
+#else
+
+static const int dc_qlookup[QINDEX_RANGE] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 32, 34, 36, 38, 40, 42,
+ 44, 46, 49, 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88,
+ 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
+ 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 205, 210, 215, 220,
+ 225, 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300,
+ 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460,
+ 472, 484, 496, 508, 520, 532, 544, 556, 572, 588, 608, 628, 648, 668, 692, 720,
+};
+static const int ac_qlookup[QINDEX_RANGE] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 51,
+ 54, 57, 60, 63, 66, 69, 72, 76, 80, 84, 88, 92, 96, 100, 105, 110,
+ 115, 120, 125, 130, 135, 140, 146, 152, 158, 164, 170, 176, 182, 188, 194, 200,
+ 206, 212, 218, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320,
+ 330, 340, 350, 360, 370, 380, 392, 404, 416, 428, 440, 454, 468, 482, 496, 510,
+ 524, 540, 556, 572, 588, 604, 622, 640, 658, 676, 696, 716, 736, 756, 776, 796,
+ 820, 844, 868, 892, 916, 944, 972, 1000, 1032, 1064, 1096, 1128, 1168, 1208, 1252, 1300
+};
+#endif
int vp8_dc_quant(int QIndex, int Delta)
{
@@ -62,7 +89,11 @@ int vp8_dc2quant(int QIndex, int Delta)
else if (QIndex < 0)
QIndex = 0;
+#if !CONFIG_EXTEND_QRANGE
retval = dc_qlookup[ QIndex ] * 2;
+#else
+ retval = dc_qlookup[ QIndex ];
+#endif
return retval;
}
@@ -72,16 +103,13 @@ int vp8_dc_uv_quant(int QIndex, int Delta)
QIndex = QIndex + Delta;
- if (QIndex > 127)
- QIndex = 127;
+ if (QIndex > 117)
+ QIndex = 117;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ];
- if (retval > 132)
- retval = 132;
-
return retval;
}
@@ -108,12 +136,13 @@ int vp8_ac2quant(int QIndex, int Delta)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
-
+#if !CONFIG_EXTEND_QRANGE
retval = (ac_qlookup[ QIndex ] * 155) / 100;
-
if (retval < 8)
retval = 8;
-
+#else
+ retval = ac_qlookup[ QIndex ];
+#endif
return retval;
}
int vp8_ac_uv_quant(int QIndex, int Delta)
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 18c514541..12e2e60c7 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -81,10 +81,10 @@ void vp8_intra4x4_predict(BLOCKD *x,
{
unsigned int ap[4];
- ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
- ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
- ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
- ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
+ ap[0] = Above[0];
+ ap[1] = Above[1];
+ ap[2] = Above[2];
+ ap[3] = Above[3];
for (r = 0; r < 4; r++)
{
@@ -105,10 +105,10 @@ void vp8_intra4x4_predict(BLOCKD *x,
{
unsigned int lp[4];
- lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
- lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
- lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
- lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
+ lp[0] = Left[0];
+ lp[1] = Left[1];
+ lp[2] = Left[2];
+ lp[3] = Left[3];
for (r = 0; r < 4; r++)
{
diff --git a/vp8/common/x86/mask_sse3.asm b/vp8/common/x86/mask_sse3.asm
new file mode 100644
index 000000000..0d90cfa86
--- /dev/null
+++ b/vp8/common/x86/mask_sse3.asm
@@ -0,0 +1,484 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void int vp8_makemask_sse3(
+; unsigned char *y,
+; unsigned char *u,
+; unsigned char *v,
+; unsigned char *ym,
+; unsigned char *uvm,
+; int yp,
+; int uvp,
+; int ys,
+; int us,
+; int vs,
+; int yt,
+; int ut,
+; int vt)
+global sym(vp8_makemask_sse3)
+sym(vp8_makemask_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 14
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;y
+ mov rdi, arg(1) ;u
+ mov rcx, arg(2) ;v
+ mov rax, arg(3) ;ym
+ movsxd rbx, dword arg(4) ;yp
+ movsxd rdx, dword arg(5) ;uvp
+
+ pxor xmm0,xmm0
+
+ ;make 16 copies of the center y value
+ movd xmm1, arg(6)
+ pshufb xmm1, xmm0
+
+ ; make 16 copies of the center u value
+ movd xmm2, arg(7)
+ pshufb xmm2, xmm0
+
+ ; make 16 copies of the center v value
+ movd xmm3, arg(8)
+ pshufb xmm3, xmm0
+ unpcklpd xmm2, xmm3
+
+ ;make 16 copies of the y tolerance
+ movd xmm3, arg(9)
+ pshufb xmm3, xmm0
+
+ ;make 16 copies of the u tolerance
+ movd xmm4, arg(10)
+ pshufb xmm4, xmm0
+
+ ;make 16 copies of the v tolerance
+ movd xmm5, arg(11)
+ pshufb xmm5, xmm0
+ unpckhpd xmm4, xmm5
+
+ mov r8,8
+
+NextPairOfRows:
+
+ ;grab the y source values
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm6, xmm7
+ por xmm0, xmm6
+
+ ;compute abs difference between
+ movdqa xmm6, xmm3
+ pcmpgtb xmm6, xmm0
+
+ ;grab the y source values
+ add rsi, rbx
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm11, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm11, xmm7
+ por xmm0, xmm11
+
+ ;compute abs difference between
+ movdqa xmm11, xmm3
+ pcmpgtb xmm11, xmm0
+
+
+ ;grab the u and v source values
+ movdqu xmm7, [rdi]
+ movdqu xmm8, [rcx]
+ unpcklpd xmm7, xmm8
+
+ ;compute abs difference between source and uv targets
+ movdqa xmm9, xmm2
+ movdqa xmm10, xmm7
+ psubusb xmm7, xmm2
+ psubusb xmm9, xmm10
+ por xmm7, xmm9
+
+ ;check whether the number is < tolerance
+ movdqa xmm0, xmm4
+ pcmpgtb xmm0, xmm7
+
+ ;double u and v masks
+ movdqa xmm8, xmm0
+ punpckhbw xmm0, xmm0
+ punpcklbw xmm8, xmm8
+
+ ;mask row 0 and output
+ pand xmm6, xmm8
+ pand xmm6, xmm0
+ movdqa [rax],xmm6
+
+ ;mask row 1 and output
+ pand xmm11, xmm8
+ pand xmm11, xmm0
+ movdqa [rax+16],xmm11
+
+
+ ; to the next row or set of rows
+ add rsi, rbx
+ add rdi, rdx
+ add rcx, rdx
+ add rax,32
+ dec r8
+ jnz NextPairOfRows
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;GROW_HORIZ (register for result, source register or mem local)
+; takes source and shifts left and ors with source
+; then shifts right and ors with source
+%macro GROW_HORIZ 2
+ movdqa %1, %2
+ movdqa xmm14, %1
+ movdqa xmm15, %1
+ pslldq xmm14, 1
+ psrldq xmm15, 1
+ por %1,xmm14
+ por %1,xmm15
+%endmacro
+;GROW_VERT (result, center row, above row, below row)
+%macro GROW_VERT 4
+ movdqa %1,%2
+ por %1,%3
+ por %1,%4
+%endmacro
+
+;GROW_NEXTLINE (new line to grow, new source, line to write)
+%macro GROW_NEXTLINE 3
+ GROW_HORIZ %1, %2
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ movdqa %3,xmm3
+%endmacro
+
+
+;void int vp8_growmaskmb_sse3(
+; unsigned char *om,
+; unsigned char *nm,
+global sym(vp8_growmaskmb_sse3)
+sym(vp8_growmaskmb_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src
+ mov rdi, arg(1) ;rst
+
+ GROW_HORIZ xmm0, [rsi]
+ GROW_HORIZ xmm1, [rsi+16]
+ GROW_HORIZ xmm2, [rsi+32]
+
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ por xmm0,xmm1
+ movdqa [rdi], xmm0
+ movdqa [rdi+16],xmm3
+
+ GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
+ GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
+ GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
+ GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
+ GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
+ GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
+ GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
+ GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
+ GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
+ GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
+ GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
+ GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
+ GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
+
+ por xmm0,xmm2
+ movdqa [rdi+240], xmm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;unsigned int vp8_sad16x16_masked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_masked_wmt)
+sym(vp8_sad16x16_masked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+NextSadRow:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ pand xmm0, xmm2
+ pand xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz NextSadRow
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_sad16x16_unmasked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_unmasked_wmt)
+sym(vp8_sad16x16_unmasked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_sad16x16_unmasked_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ por xmm0, xmm2
+ por xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_sad16x16_unmasked_wmt
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_masked_predictor_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_wmt)
+sym(vp8_masked_predictor_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movdqu [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_masked_predictor_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;unsigned int vp8_masked_predictor_uv_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_uv_wmt)
+sym(vp8_masked_predictor_uv_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_uv_wmt:
+ movq xmm0, [rsi]
+ movq xmm1, [rdi]
+ movq xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movq [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rax
+ add rbx, 8
+
+ dec rcx
+ jnz next_vp8_masked_predictor_uv_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_uv_from_y_mask(
+; unsigned char *ymask,
+; unsigned char *uvmask)
+global sym(vp8_uv_from_y_mask)
+sym(vp8_uv_from_y_mask):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_p8_uv_from_y_mask:
+ movdqu xmm0, [rsi]
+ pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
+ movq [rdi],xmm0
+ add rdi, 8
+ add rsi,32
+
+ dec rcx
+ jnz next_p8_uv_from_y_mask
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+ db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+