diff options
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/arm/neon/dc_only_idct_add_neon.asm | 2 | ||||
-rw-r--r-- | vp8/common/blockd.h | 3 | ||||
-rw-r--r-- | vp8/common/entropy.c | 2 | ||||
-rw-r--r-- | vp8/common/entropymode.h | 4 | ||||
-rw-r--r-- | vp8/common/extend.c | 7 | ||||
-rw-r--r-- | vp8/common/filter.h | 2 | ||||
-rw-r--r-- | vp8/common/generic/systemdependent.c | 53 | ||||
-rw-r--r-- | vp8/common/onyx.h | 140 | ||||
-rw-r--r-- | vp8/common/postproc.c | 29 | ||||
-rw-r--r-- | vp8/common/rtcd.c | 57 | ||||
-rw-r--r-- | vp8/common/variance_c.c | 24 | ||||
-rw-r--r--[-rwxr-xr-x] | vp8/common/vp8_entropymodedata.h | 0 | ||||
-rw-r--r-- | vp8/common/x86/postproc_x86.c | 3 | ||||
-rw-r--r-- | vp8/common/x86/variance_sse2.c | 5 | ||||
-rw-r--r-- | vp8/common/x86/variance_ssse3.c | 5 | ||||
-rw-r--r-- | vp8/common/x86/vp8_asm_stubs.c | 117 |
16 files changed, 278 insertions, 175 deletions
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm index 65a4680c1..79ff02c69 100644 --- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm +++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm @@ -46,7 +46,7 @@ vst1.32 {d2[1]}, [r3], r12 vst1.32 {d4[0]}, [r3], r12 vst1.32 {d4[1]}, [r3] - + bx lr ENDP diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index c7206b23c..c715f6547 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -174,8 +174,7 @@ typedef struct MB_PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame; int_mv mv; - //union b_mode_info bmi[16]; - int dissim; // dissimilarity level of the macroblock + int dissim; /* dissimilarity level of the macroblock */ } LOWER_RES_MB_INFO; /* The frame-level information needed to be stored for higher-resolution diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index a95a923e8..8c046a4f5 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -101,7 +101,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ /* vp8_coef_encodings generated with: vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); */ -const vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = +vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = { {2, 2}, {6, 3}, diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h index 3a2fa84cc..1df0f641e 100644 --- a/vp8/common/entropymode.h +++ b/vp8/common/entropymode.h @@ -24,11 +24,11 @@ typedef enum SUBMVREF_LEFT_ABOVE_ZED } sumvfref_t; -typedef const int vp8_mbsplit[16]; +typedef int vp8_mbsplit[16]; #define VP8_NUMMBSPLITS 4 -extern vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; +extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ diff --git a/vp8/common/extend.c b/vp8/common/extend.c index 9089e1629..c9bdd2189 100644 --- a/vp8/common/extend.c +++ b/vp8/common/extend.c @@ -116,7 +116,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - // If the side is not touching the bounder then don't extend. + /* If the side is not touching the bounder then don't extend. */ if (srcy) et = 0; if (srcx) @@ -157,7 +157,10 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, /* note the extension is only for the last row, for intra prediction purpose */ -void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr) +void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, + unsigned char *YPtr, + unsigned char *UPtr, + unsigned char *VPtr) { int i; diff --git a/vp8/common/filter.h b/vp8/common/filter.h index 0f225c25a..b7591f268 100644 --- a/vp8/common/filter.h +++ b/vp8/common/filter.h @@ -19,4 +19,4 @@ extern const short vp8_bilinear_filters[8][2]; extern const short vp8_sub_pel_filters[8][6]; -#endif //FILTER_H +#endif diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 2a3016618..5a6ac7b0e 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -83,57 +83,6 @@ static int get_cpu_count() #endif -#if HAVE_PTHREAD_H -#include <pthread.h> -static void once(void (*func)(void)) -{ - static pthread_once_t lock = PTHREAD_ONCE_INIT; - pthread_once(&lock, func); -} - - -#elif defined(_WIN32) -static void once(void (*func)(void)) -{ - /* Using a static initializer here rather than InitializeCriticalSection() - * since there's no race-free context in which to execute it. Protecting - * it with an atomic op like InterlockedCompareExchangePointer introduces - * an x86 dependency, and InitOnceExecuteOnce requires Vista. - */ - static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0}; - static int done; - - EnterCriticalSection(&lock); - - if (!done) - { - func(); - done = 1; - } - - LeaveCriticalSection(&lock); -} - - -#else -/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, - * so as long as your platform provides atomic loads/stores of pointers - * no synchronization is strictly necessary. - */ - -static void once(void (*func)(void)) -{ - static int done; - - if(!done) - { - func(); - done = 1; - } -} -#endif - - void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_MULTITHREAD @@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) #elif ARCH_X86 || ARCH_X86_64 ctx->cpu_caps = x86_simd_caps(); #endif - - once(vpx_rtcd); } diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 67cb77c51..766b4ea1e 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -94,83 +94,101 @@ extern "C" typedef struct { - int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode - int Width; // width of data passed to the compressor - int Height; // height of data passed to the compressor + /* 4 versions of bitstream defined: + * 0 best quality/slowest decode, 3 lowest quality/fastest decode + */ + int Version; + int Width; + int Height; struct vpx_rational timebase; - unsigned int target_bandwidth; // bandwidth to be used in kilobits per second + unsigned int target_bandwidth; /* kilobits per second */ - int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0 - int Sharpness; // parameter used for sharpening output: recommendation 0: + /* parameter used for applying pre processing blur: recommendation 0 */ + int noise_sensitivity; + + /* parameter used for sharpening output: recommendation 0: */ + int Sharpness; int cpu_used; unsigned int rc_max_intra_bitrate_pct; - // mode -> - //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing - // a television signal or feed from a live camera). ( speed setting controls how fast ) - //(1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to - // encode the output. ( speed setting controls how fast ) - //(2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding - // speed. The output is compressed at the highest possible quality. This option takes the longest - // amount of time to encode. ( speed setting ignored ) - //(3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding - // pass. ( speed setting controls how fast ) - //(4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding - // pass to create the compressed output. ( speed setting controls how fast ) - //(5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first - // encoding pass to create the compressed output using the highest possible quality, and taking a - // longer amount of time to encode.. ( speed setting ignored ) - int Mode; // - - // Key Framing Operations - int auto_key; // automatically detect cut scenes and set the keyframes - int key_freq; // maximum distance to key frame. - - int allow_lag; // allow lagged compression (if 0 lagin frames is ignored) - int lag_in_frames; // how many frames lag before we start encoding - - //---------------------------------------------------------------- - // DATARATE CONTROL OPTIONS - - int end_usage; // vbr or cbr - - // buffer targeting aggressiveness + /* mode -> + *(0)=Realtime/Live Encoding. This mode is optimized for realtim + * encoding (for example, capturing a television signal or feed + * from a live camera). ( speed setting controls how fast ) + *(1)=Good Quality Fast Encoding. The encoder balances quality with + * the amount of time it takes to encode the output. ( speed + * setting controls how fast ) + *(2)=One Pass - Best Quality. The encoder places priority on the + * quality of the output over encoding speed. The output is + * compressed at the highest possible quality. This option takes + * the longest amount of time to encode. ( speed setting ignored + * ) + *(3)=Two Pass - First Pass. The encoder generates a file of + * statistics for use in the second encoding pass. ( speed + * setting controls how fast ) + *(4)=Two Pass - Second Pass. The encoder uses the statistics that + * were generated in the first encoding pass to create the + * compressed output. ( speed setting controls how fast ) + *(5)=Two Pass - Second Pass Best. The encoder uses the statistics + * that were generated in the first encoding pass to create the + * compressed output using the highest possible quality, and + * taking a longer amount of time to encode.. ( speed setting + * ignored ) + */ + int Mode; + + /* Key Framing Operations */ + int auto_key; /* automatically detect cut scenes */ + int key_freq; /* maximum distance to key frame. */ + + /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */ + int allow_lag; + int lag_in_frames; /* how many frames lag before we start encoding */ + + /* + * DATARATE CONTROL OPTIONS + */ + + int end_usage; /* vbr or cbr */ + + /* buffer targeting aggressiveness */ int under_shoot_pct; int over_shoot_pct; - // buffering parameters - int64_t starting_buffer_level; // in bytes + /* buffering parameters */ + int64_t starting_buffer_level; int64_t optimal_buffer_level; int64_t maximum_buffer_size; - int64_t starting_buffer_level_in_ms; // in milli-seconds + int64_t starting_buffer_level_in_ms; int64_t optimal_buffer_level_in_ms; int64_t maximum_buffer_size_in_ms; - // controlling quality + /* controlling quality */ int fixed_q; int worst_allowed_q; int best_allowed_q; int cq_level; - // allow internal resizing ( currently disabled in the build !!!!!) + /* allow internal resizing */ int allow_spatial_resampling; int resample_down_water_mark; int resample_up_water_mark; - // allow internal frame rate alterations + /* allow internal frame rate alterations */ int allow_df; int drop_frames_water_mark; - // two pass datarate control - int two_pass_vbrbias; // two pass datarate control tweaks + /* two pass datarate control */ + int two_pass_vbrbias; int two_pass_vbrmin_section; int two_pass_vbrmax_section; - // END DATARATE CONTROL OPTIONS - //---------------------------------------------------------------- + /* + * END DATARATE CONTROL OPTIONS + */ - // these parameters aren't to be used in final build don't use!!! + /* these parameters aren't to be used in final build don't use!!! */ int play_alternate; int alt_freq; int alt_q; @@ -178,26 +196,28 @@ extern "C" int gold_q; - int multi_threaded; // how many threads to run the encoder on - int token_partitions; // how many token partitions to create for multi core decoding - int encode_breakout; // early breakout encode threshold : for video conf recommend 800 + int multi_threaded; /* how many threads to run the encoder on */ + int token_partitions; /* how many token partitions to create */ + + /* early breakout threshold: for video conf recommend 800 */ + int encode_breakout; - unsigned int error_resilient_mode; // Bitfield defining the error - // resiliency features to enable. Can provide - // decodable frames after losses in previous - // frames and decodable partitions after - // losses in the same frame. + /* Bitfield defining the error resiliency features to enable. + * Can provide decodable frames after losses in previous + * frames and decodable partitions after losses in the same frame. + */ + unsigned int error_resilient_mode; int arnr_max_frames; - int arnr_strength ; - int arnr_type ; + int arnr_strength; + int arnr_type; - struct vpx_fixed_buf two_pass_stats_in; + struct vpx_fixed_buf two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; vp8e_tuning tuning; - // Temporal scaling parameters + /* Temporal scaling parameters */ unsigned int number_of_layers; unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY]; unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY]; @@ -228,8 +248,6 @@ extern "C" void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); -// receive a frames worth of data caller can assume that a copy of this frame is made -// and not just a copy of the pointer.. int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp); int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index a94ae0006..a7509ff4e 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -143,9 +143,7 @@ void vp8_post_proc_down_and_across_c int col; int i; int v; - int pitch = src_pixels_per_line; unsigned char d[8]; - (void)dst_pixels_per_line; for (row = 0; row < rows; row++) { @@ -161,10 +159,10 @@ void vp8_post_proc_down_and_across_c for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col+i*pitch]) > flimit) + if (abs(v - p_src[col+i*src_pixels_per_line]) > flimit) goto down_skip_convolve; - kernel += kernel5[2+i] * p_src[col+i*pitch]; + kernel += kernel5[2+i] * p_src[col+i*src_pixels_per_line]; } v = (kernel >> 3); @@ -211,10 +209,9 @@ void vp8_post_proc_down_and_across_c p_dst[col-2] = d[(col-2)&7]; p_dst[col-1] = d[(col-1)&7]; - /* next row */ - src_ptr += pitch; - dst_ptr += pitch; + src_ptr += src_pixels_per_line; + dst_ptr += dst_pixels_per_line; } } @@ -240,8 +237,9 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co for (i = -8; i<0; i++) s[i]=s[0]; - // 17 avoids valgrind warning - we buffer values in c in d - // and only write them when we've read 8 ahead... + /* 17 avoids valgrind warning - we buffer values in c in d + * and only write them when we've read 8 ahead... + */ for (i = cols; i<cols+17; i++) s[i]=s[cols-1]; @@ -275,9 +273,6 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co } - - - void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit) { int r, c, i; @@ -294,8 +289,9 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i for (i = -8; i < 0; i++) s[i*pitch]=s[0]; - // 17 avoids valgrind warning - we buffer values in c in d - // and only write them when we've read 8 ahead... + /* 17 avoids valgrind warning - we buffer values in c in d + * and only write them when we've read 8 ahead... + */ for (i = rows; i < rows+17; i++) s[i*pitch]=s[(rows-1)*pitch]; @@ -731,8 +727,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t oci->post_proc_buffer_int_used = 1; - // insure that postproc is set to all 0's so that post proc - // doesn't pull random data in from edge + /* insure that postproc is set to all 0's so that post proc + * doesn't pull random data in from edge + */ vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); } diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c index 232640dc8..4980f48ad 100644 --- a/vp8/common/rtcd.c +++ b/vp8/common/rtcd.c @@ -10,3 +10,60 @@ #include "vpx_config.h" #define RTCD_C #include "vpx_rtcd.h" + +#if CONFIG_MULTITHREAD && HAVE_PTHREAD_H +#include <pthread.h> +static void once(void (*func)(void)) +{ + static pthread_once_t lock = PTHREAD_ONCE_INIT; + pthread_once(&lock, func); +} + + +#elif CONFIG_MULTITHREAD && defined(_WIN32) +#include <windows.h> +static void once(void (*func)(void)) +{ + /* Using a static initializer here rather than InitializeCriticalSection() + * since there's no race-free context in which to execute it. Protecting + * it with an atomic op like InterlockedCompareExchangePointer introduces + * an x86 dependency, and InitOnceExecuteOnce requires Vista. + */ + static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0}; + static int done; + + EnterCriticalSection(&lock); + + if (!done) + { + func(); + done = 1; + } + + LeaveCriticalSection(&lock); +} + + +#else +/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, + * so as long as your platform provides atomic loads/stores of pointers + * no synchronization is strictly necessary. + */ + +static void once(void (*func)(void)) +{ + static int done; + + if(!done) + { + func(); + done = 1; + } +} +#endif + + +void vpx_rtcd() +{ + once(setup_rtcd_internal); +} diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c index 996404dd6..da08affb8 100644 --- a/vp8/common/variance_c.c +++ b/vp8/common/variance_c.c @@ -205,14 +205,14 @@ static void var_filter_block2d_bil_first_pass { for (j = 0; j < output_width; j++) { - // Apply bilinear filter + /* Apply bilinear filter */ output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -264,15 +264,15 @@ static void var_filter_block2d_bil_second_pass { for (j = 0; j < output_width; j++) { - // Apply filter - Temp = ((int)src_ptr[0] * vp8_filter[0]) + + /* Apply filter */ + Temp = ((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -292,15 +292,15 @@ unsigned int vp8_sub_pixel_variance4x4_c { unsigned char temp2[20*16]; const short *HFilter, *VFilter; - unsigned short FData3[5*4]; // Temp data bufffer used in filtering + unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */ HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; - // First filter 1d Horizontal + /* First filter 1d Horizontal */ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); - // Now filter Verticaly + /* Now filter Verticaly */ var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); @@ -318,7 +318,7 @@ unsigned int vp8_sub_pixel_variance8x8_c unsigned int *sse ) { - unsigned short FData3[9*8]; // Temp data bufffer used in filtering + unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; @@ -342,7 +342,7 @@ unsigned int vp8_sub_pixel_variance16x16_c unsigned int *sse ) { - unsigned short FData3[17*16]; // Temp data bufffer used in filtering + unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; @@ -418,7 +418,7 @@ unsigned int vp8_sub_pixel_variance16x8_c unsigned int *sse ) { - unsigned short FData3[16*9]; // Temp data bufffer used in filtering + unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; @@ -442,7 +442,7 @@ unsigned int vp8_sub_pixel_variance8x16_c unsigned int *sse ) { - unsigned short FData3[9*16]; // Temp data bufffer used in filtering + unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h index 13e9a92fc..13e9a92fc 100755..100644 --- a/vp8/common/vp8_entropymodedata.h +++ b/vp8/common/vp8_entropymodedata.h diff --git a/vp8/common/x86/postproc_x86.c b/vp8/common/x86/postproc_x86.c index a25921bee..3ec0106a8 100644 --- a/vp8/common/x86/postproc_x86.c +++ b/vp8/common/x86/postproc_x86.c @@ -18,4 +18,7 @@ extern int rand(void) { return __rand(); } +#else +/* ISO C forbids an empty translation unit. */ +int vp8_unused; #endif diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c index 2769a302b..afd642915 100644 --- a/vp8/common/x86/variance_sse2.c +++ b/vp8/common/x86/variance_sse2.c @@ -332,8 +332,9 @@ unsigned int vp8_sub_pixel_variance16x16_wmt unsigned int xxsum0, xxsum1; - // note we could avoid these if statements if the calling function - // just called the appropriate functions inside. + /* note we could avoid these if statements if the calling function + * just called the appropriate functions inside. + */ if (xoffset == 4 && yoffset == 0) { vp8_half_horiz_variance16x_h_sse2( diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c index 1be0d929d..ba2055cc9 100644 --- a/vp8/common/x86/variance_ssse3.c +++ b/vp8/common/x86/variance_ssse3.c @@ -79,8 +79,9 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 int xsum0; unsigned int xxsum0; - // note we could avoid these if statements if the calling function - // just called the appropriate functions inside. + /* note we could avoid these if statements if the calling function + * just called the appropriate functions inside. + */ if (xoffset == 4 && yoffset == 0) { vp8_half_horiz_variance16x_h_sse2( diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c index 23a7fdcd2..3437a2367 100644 --- a/vp8/common/x86/vp8_asm_stubs.c +++ b/vp8/common/x86/vp8_asm_stubs.c @@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3 { if (yoffset) { - vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset); - vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset); + vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 16, 21, xoffset); + vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, + 16, yoffset); } else { /* First-pass only */ - vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset); + vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 16, xoffset); } } else { - /* Second-pass only */ - vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset); + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 16, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } } } @@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3 { if (yoffset) { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 8, 13, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, + 8, yoffset); } else { - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 8, xoffset); } } else { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset); + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 8, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } } } @@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3 { if (yoffset) { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 8, 9, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, + 4, yoffset); } else { /* First-pass only */ - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 4, xoffset); } } else { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } } } @@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3 { if (yoffset) { - vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset); - vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset); + vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + FData2, 4, 9, xoffset); + vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, + 4, yoffset); } else { - vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); + vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 4, xoffset); } } else { - vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); + if (yoffset) + { + vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + int r; + + for (r = 0; r < 4; r++) + { + #if !(CONFIG_FAST_UNALIGNED) + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + dst_ptr[2] = src_ptr[2]; + dst_ptr[3] = src_ptr[3]; + #else + *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ; + #endif + dst_ptr += dst_pitch; + src_ptr += src_pixels_per_line; + } + } } - } #endif |