summaryrefslogtreecommitdiff
path: root/vpx_scale/win32/scaleopt.c
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2012-07-13 15:21:29 -0700
committerJohn Koleszar <jkoleszar@google.com>2012-07-17 11:46:03 -0700
commitc6b9039fd94aede59ac1086a379955137fc8e1b8 (patch)
treef9b20b2ca2114fe9303c8226bb3b368568fd5509 /vpx_scale/win32/scaleopt.c
parent8697c6e454e02c6cf644daa9d29fabd07e846f18 (diff)
downloadlibvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.tar
libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.tar.gz
libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.tar.bz2
libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.zip
Restyle code
Approximate the Google style guide[1] so that that there's a written document to follow and tools to check compliance[2]. [1]: http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml [2]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py Change-Id: Idf40e3d8dddcc72150f6af127b13e5dab838685f
Diffstat (limited to 'vpx_scale/win32/scaleopt.c')
-rw-r--r--vpx_scale/win32/scaleopt.c1922
1 files changed, 945 insertions, 977 deletions
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index 3711fe5eb..2d96cc7c1 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51,
static
void horizontal_line_3_5_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
- __asm
- {
+ __asm {
- push ebx
+ push ebx
- mov esi, source
- mov edi, dest
+ mov esi, source
+ mov edi, dest
- mov ecx, source_width
- lea edx, [esi+ecx-3];
+ mov ecx, source_width
+ lea edx, [esi+ecx-3];
- movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
- movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
+ movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
+ movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
+ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
+ pxor mm7, mm7 // clear mm7
- horiz_line_3_5_loop:
+ horiz_line_3_5_loop:
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03
+ mov ebx, eax
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
+ and ebx, 0xffff00 // ebx = xx 01 02 xx
+ mov ecx, eax // ecx = 00 01 02 03
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
+ and eax, 0xffff0000 // eax = xx xx 02 03
+ xor ecx, eax // ecx = 00 01 xx xx
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
+ shr ebx, 8 // ebx = 01 02 xx xx
+ or eax, ebx // eax = 01 02 02 03
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
+ shl ebx, 16 // ebx = xx xx 01 02
+ movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
+ or ebx, ecx // ebx = 00 01 01 02
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
+ movd mm0, ebx // mm0 = 00 01 01 02
+ pmullw mm1, mm6 //
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
+ pmullw mm0, mm5 //
- mov [edi], ebx // writeoutput 00 xx xx xx
- add esi, 3
+ mov [edi], ebx // writeoutput 00 xx xx xx
+ add esi, 3
- add edi, 5
- paddw mm0, mm1
+ add edi, 5
+ paddw mm0, mm1
- paddw mm0, mm4
- psrlw mm0, 8
+ paddw mm0, mm4
+ psrlw mm0, 8
- cmp esi, edx
- packuswb mm0, mm7
+ cmp esi, edx
+ packuswb mm0, mm7
- movd DWORD Ptr [edi-4], mm0
- jl horiz_line_3_5_loop
+ movd DWORD Ptr [edi-4], mm0
+ jl horiz_line_3_5_loop
-//Exit:
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
+// Exit:
+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03
+ mov ebx, eax
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
+ and ebx, 0xffff00 // ebx = xx 01 02 xx
+ mov ecx, eax // ecx = 00 01 02 03
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
+ and eax, 0xffff0000 // eax = xx xx 02 03
+ xor ecx, eax // ecx = 00 01 xx xx
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
+ shr ebx, 8 // ebx = 01 02 xx xx
+ or eax, ebx // eax = 01 02 02 03
- shl eax, 8 // eax = xx 01 02 02
- and eax, 0xffff0000 // eax = xx xx 02 02
+ shl eax, 8 // eax = xx 01 02 02
+ and eax, 0xffff0000 // eax = xx xx 02 02
- or eax, ebx // eax = 01 02 02 02
+ or eax, ebx // eax = 01 02 02 02
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
+ shl ebx, 16 // ebx = xx xx 01 02
+ movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
+ or ebx, ecx // ebx = 00 01 01 02
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
+ movd mm0, ebx // mm0 = 00 01 01 02
+ pmullw mm1, mm6 //
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
+ pmullw mm0, mm5 //
- mov [edi], ebx // writeoutput 00 xx xx xx
- paddw mm0, mm1
+ mov [edi], ebx // writeoutput 00 xx xx xx
+ paddw mm0, mm1
- paddw mm0, mm4
- psrlw mm0, 8
+ paddw mm0, mm4
+ psrlw mm0, 8
- packuswb mm0, mm7
- movd DWORD Ptr [edi+1], mm0
+ packuswb mm0, mm7
+ movd DWORD Ptr [edi+1], mm0
- pop ebx
+ pop ebx
- }
+ }
}
@@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx
static
void horizontal_line_4_5_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void)dest_width;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void)dest_width;
- __asm
- {
+ __asm {
- mov esi, source
- mov edi, dest
+ mov esi, source
+ mov edi, dest
- mov ecx, source_width
- lea edx, [esi+ecx-8];
+ mov ecx, source_width
+ lea edx, [esi+ecx-8];
- movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
- movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
+ movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
+ movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
+ movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
+ pxor mm7, mm7 // clear mm7
- horiz_line_4_5_loop:
+ horiz_line_4_5_loop:
- movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
+ movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
+ movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
+ movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
+ pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
+ paddw mm0, mm1 // added round values
+ paddw mm0, mm4
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7
+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
+ packuswb mm0, mm7
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- add edi, 10
+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
+ add edi, 10
- add esi, 8
- paddw mm2, mm3 //
+ add esi, 8
+ paddw mm2, mm3 //
- paddw mm2, mm4 // added round values
- cmp esi, edx
+ paddw mm2, mm4 // added round values
+ cmp esi, edx
- psrlw mm2, 8
- packuswb mm2, mm7
+ psrlw mm2, 8
+ packuswb mm2, mm7
- movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
- jl horiz_line_4_5_loop
+ movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
+ jl horiz_line_4_5_loop
-//Exit:
- movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
+// Exit:
+ movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
+ movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
+ psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
- movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
- pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
+ movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
+ pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
- psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
- por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
+ psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
+ por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
- movq mm3, mm1
+ movq mm3, mm1
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
+ pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
+ paddw mm0, mm1 // added round values
+ paddw mm0, mm4
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
+ packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- paddw mm2, mm3 //
+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
+ paddw mm2, mm3 //
- paddw mm2, mm4 // added round values
- psrlw mm2, 8
+ paddw mm2, mm4 // added round values
+ psrlw mm2, 8
- packuswb mm2, mm7
- movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
+ packuswb mm2, mm7
+ movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
- }
+ }
}
/****************************************************************************
@@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx
static
void vertical_band_4_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- vs_4_5_loop:
+ vs_4_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, one_fifth
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, one_fifth
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 1/5
+ pmullw mm0, mm5 // a * 1/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 1/5
- movq mm6, four_fifths // constan
+ pmullw mm2, mm5 // a * 1/5
+ movq mm6, four_fifths // constan
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 4/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
+ pmullw mm5, mm6 // b * 4/5
+ paddw mm0, mm4 // a * 1/5 + b * 4/5
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 1/5 + b * 4/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm5, two_fifths
- movq mm2, mm0 // make a copy
+ movq mm5, two_fifths
+ movq mm2, mm0 // make a copy
- pmullw mm1, mm5 // b * 2/5
- movq mm6, three_fifths
+ pmullw mm1, mm5 // b * 2/5
+ movq mm6, three_fifths
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
+ punpcklbw mm0, mm7 // unpack low to word
+ pmullw mm3, mm5 // b * 2/5
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm4, mm0 // make copy of c
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
+ pmullw mm4, mm6 // c * 3/5
+ movq mm5, mm2
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
+ pmullw mm5, mm6 // c * 3/5
+ paddw mm1, mm4 // b * 2/5 + c * 3/5
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, round_values // + 128
+ paddw mm3, mm5 // b * 2/5 + c * 3/5
+ paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
- psrlw mm1, 8
+ paddw mm3, round_values // + 128
+ psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ movq mm1, [edi] // mm1=Src[3];
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
+ // mm0, mm2 --- Src[2]
+ // mm1 --- Src[3]
+ // mm6 --- 3/5
+ // mm7 for unpacking
- pmullw mm0, mm6 // c * 3/5
- movq mm5, two_fifths // mm5 = 2/5
+ pmullw mm0, mm6 // c * 3/5
+ movq mm5, two_fifths // mm5 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
+ movq mm3, mm1 // make a copy
+ pmullw mm2, mm6 // c * 3/5
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low
+ movq mm4, mm1 // make a copy
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
+ punpckhbw mm3, mm7 // unpack high
+ pmullw mm4, mm5 // d * 2/5
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
+ movq mm6, mm3 // make a copy
+ pmullw mm6, mm5 // d * 2/5
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
+ paddw mm0, mm4 // c * 3/5 + d * 2/5
+ paddw mm2, mm6 // c * 3/5 + d * 2/5
- paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
+ paddw mm0, round_values // + 128
+ paddw mm2, round_values // + 128
- psrlw mm0, 8
- psrlw mm2, 8
+ psrlw mm0, 8
+ psrlw mm2, 8
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
+ packuswb mm0, mm2 // des[3]
+ movq QWORD ptr [edi], mm0 // write des[3]
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
+ // mm1, mm3 --- Src[3]
+ // mm7 -- cleared for unpacking
- movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
+ movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
- movq mm5, four_fifths // mm5 = 4/5
- pmullw mm1, mm5 // d * 4/5
+ movq mm5, four_fifths // mm5 = 4/5
+ pmullw mm1, mm5 // d * 4/5
- movq mm6, one_fifth // mm6 = 1/5
- movq mm2, mm0 // make a copy
+ movq mm6, one_fifth // mm6 = 1/5
+ movq mm2, mm0 // make a copy
- pmullw mm3, mm5 // d * 4/5
- punpcklbw mm0, mm7 // unpack low
+ pmullw mm3, mm5 // d * 4/5
+ punpcklbw mm0, mm7 // unpack low
- pmullw mm0, mm6 // an * 1/5
- punpckhbw mm2, mm7 // unpack high
+ pmullw mm0, mm6 // an * 1/5
+ punpckhbw mm2, mm7 // unpack high
- paddw mm1, mm0 // d * 4/5 + an * 1/5
- pmullw mm2, mm6 // an * 1/5
+ paddw mm1, mm0 // d * 4/5 + an * 1/5
+ pmullw mm2, mm6 // an * 1/5
- paddw mm3, mm2 // d * 4/5 + an * 1/5
- paddw mm1, round_values // + 128
+ paddw mm3, mm2 // d * 4/5 + an * 1/5
+ paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
- psrlw mm1, 8
+ paddw mm3, round_values // + 128
+ psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[4]
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[4]
- movq QWORD ptr [edi+ecx], mm1 // write des[4]
+ movq QWORD ptr [edi+ecx], mm1 // write des[4]
- add edi, 8
- add esi, 8
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg vs_4_5_loop
- }
+ sub edx, 8
+ jg vs_4_5_loop
+ }
}
/****************************************************************************
@@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx
static
void last_vertical_band_4_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- last_vs_4_5_loop:
+ last_vs_4_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, one_fifth
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, one_fifth
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 1/5
+ pmullw mm0, mm5 // a * 1/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 1/5
- movq mm6, four_fifths // constan
+ pmullw mm2, mm5 // a * 1/5
+ movq mm6, four_fifths // constan
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 4/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
+ pmullw mm5, mm6 // b * 4/5
+ paddw mm0, mm4 // a * 1/5 + b * 4/5
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 1/5 + b * 4/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm5, two_fifths
- movq mm2, mm0 // make a copy
+ movq mm5, two_fifths
+ movq mm2, mm0 // make a copy
- pmullw mm1, mm5 // b * 2/5
- movq mm6, three_fifths
+ pmullw mm1, mm5 // b * 2/5
+ movq mm6, three_fifths
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
+ punpcklbw mm0, mm7 // unpack low to word
+ pmullw mm3, mm5 // b * 2/5
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm4, mm0 // make copy of c
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
+ pmullw mm4, mm6 // c * 3/5
+ movq mm5, mm2
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
+ pmullw mm5, mm6 // c * 3/5
+ paddw mm1, mm4 // b * 2/5 + c * 3/5
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, round_values // + 128
+ paddw mm3, mm5 // b * 2/5 + c * 3/5
+ paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
- psrlw mm1, 8
+ paddw mm3, round_values // + 128
+ psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ movq mm1, [edi] // mm1=Src[3];
- movq QWORD ptr [edi+ecx], mm1 // write des[4];
+ movq QWORD ptr [edi+ecx], mm1 // write des[4];
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
+ // mm0, mm2 --- Src[2]
+ // mm1 --- Src[3]
+ // mm6 --- 3/5
+ // mm7 for unpacking
- pmullw mm0, mm6 // c * 3/5
- movq mm5, two_fifths // mm5 = 2/5
+ pmullw mm0, mm6 // c * 3/5
+ movq mm5, two_fifths // mm5 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
+ movq mm3, mm1 // make a copy
+ pmullw mm2, mm6 // c * 3/5
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low
+ movq mm4, mm1 // make a copy
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
+ punpckhbw mm3, mm7 // unpack high
+ pmullw mm4, mm5 // d * 2/5
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
+ movq mm6, mm3 // make a copy
+ pmullw mm6, mm5 // d * 2/5
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
+ paddw mm0, mm4 // c * 3/5 + d * 2/5
+ paddw mm2, mm6 // c * 3/5 + d * 2/5
- paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
+ paddw mm0, round_values // + 128
+ paddw mm2, round_values // + 128
- psrlw mm0, 8
- psrlw mm2, 8
+ psrlw mm0, 8
+ psrlw mm2, 8
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
+ packuswb mm0, mm2 // des[3]
+ movq QWORD ptr [edi], mm0 // write des[3]
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
- add edi, 8
- add esi, 8
+ // mm1, mm3 --- Src[3]
+ // mm7 -- cleared for unpacking
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg last_vs_4_5_loop
- }
+ sub edx, 8
+ jg last_vs_4_5_loop
+ }
}
/****************************************************************************
@@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx
static
void vertical_band_3_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- vs_3_5_loop:
+ vs_3_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, two_fifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, two_fifths // mm5 = 2/5
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 2/5
+ pmullw mm0, mm5 // a * 2/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 2/5
- movq mm6, three_fifths // mm6 = 3/5
+ pmullw mm2, mm5 // a * 2/5
+ movq mm6, three_fifths // mm6 = 3/5
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 3/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
+ pmullw mm5, mm6 // b * 3/5
+ paddw mm0, mm4 // a * 2/5 + b * 3/5
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 2/5 + b * 3/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm4, mm1 // b low
- pmullw mm1, four_fifths // b * 4/5 low
+ movq mm4, mm1 // b low
+ pmullw mm1, four_fifths // b * 4/5 low
- movq mm5, mm3 // b high
- pmullw mm3, four_fifths // b * 4/5 high
+ movq mm5, mm3 // b high
+ pmullw mm3, four_fifths // b * 4/5 high
- movq mm2, mm0 // c
- pmullw mm4, one_fifth // b * 1/5
+ movq mm2, mm0 // c
+ pmullw mm4, one_fifth // b * 1/5
- punpcklbw mm0, mm7 // c low
- pmullw mm5, one_fifth // b * 1/5
+ punpcklbw mm0, mm7 // c low
+ pmullw mm5, one_fifth // b * 1/5
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
+ movq mm6, mm0 // make copy of c low
+ punpckhbw mm2, mm7 // c high
- pmullw mm6, one_fifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, one_fifth // c * 1/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, one_fifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
+ pmullw mm7, one_fifth // c * 1/5 high
+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high
+ movq mm6, mm0 // make copy of c low
- pmullw mm6, four_fifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, four_fifths // c * 4/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, four_fifths // c * 4/5 high
+ pmullw mm7, four_fifths // c * 4/5 high
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low
+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high
- paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
+ paddw mm1, round_values // + 128
+ paddw mm3, round_values // + 128
- psrlw mm1, 8
- psrlw mm3, 8
+ psrlw mm1, 8
+ psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ packuswb mm1, mm3 // des[2]
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- paddw mm4, round_values // + 128
- paddw mm5, round_values // + 128
+ paddw mm4, round_values // + 128
+ paddw mm5, round_values // + 128
- psrlw mm4, 8
- psrlw mm5, 8
+ psrlw mm4, 8
+ psrlw mm5, 8
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
+ packuswb mm4, mm5 // des[3]
+ movq QWORD ptr [edi], mm4 // write des[3]
- // mm0, mm2 --- Src[3]
+ // mm0, mm2 --- Src[3]
- pxor mm7, mm7 // clear mm7 for unpacking
- movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
+ pxor mm7, mm7 // clear mm7 for unpacking
+ movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
- movq mm5, three_fifths // mm5 = 3/5
- pmullw mm0, mm5 // d * 3/5
+ movq mm5, three_fifths // mm5 = 3/5
+ pmullw mm0, mm5 // d * 3/5
- movq mm6, two_fifths // mm6 = 2/5
- movq mm3, mm1 // make a copy
+ movq mm6, two_fifths // mm6 = 2/5
+ movq mm3, mm1 // make a copy
- pmullw mm2, mm5 // d * 3/5
- punpcklbw mm1, mm7 // unpack low
+ pmullw mm2, mm5 // d * 3/5
+ punpcklbw mm1, mm7 // unpack low
- pmullw mm1, mm6 // an * 2/5
- punpckhbw mm3, mm7 // unpack high
+ pmullw mm1, mm6 // an * 2/5
+ punpckhbw mm3, mm7 // unpack high
- paddw mm0, mm1 // d * 3/5 + an * 2/5
- pmullw mm3, mm6 // an * 2/5
+ paddw mm0, mm1 // d * 3/5 + an * 2/5
+ pmullw mm3, mm6 // an * 2/5
- paddw mm2, mm3 // d * 3/5 + an * 2/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm3 // d * 3/5 + an * 2/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des[4]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des[4]
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
+ movq QWORD ptr [edi+ecx], mm0 // write des[4]
- add edi, 8
- add esi, 8
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg vs_3_5_loop
- }
+ sub edx, 8
+ jg vs_3_5_loop
+ }
}
/****************************************************************************
@@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx
static
void last_vertical_band_3_5_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- last_vs_3_5_loop:
+ last_vs_3_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
- movq mm5, two_fifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
+ movq mm5, two_fifths // mm5 = 2/5
+ punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 2/5
+ pmullw mm0, mm5 // a * 2/5
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 2/5
- movq mm6, three_fifths // mm6 = 3/5
+ pmullw mm2, mm5 // a * 2/5
+ movq mm6, three_fifths // mm6 = 3/5
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 3/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
+ pmullw mm5, mm6 // b * 3/5
+ paddw mm0, mm4 // a * 2/5 + b * 3/5
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, round_values // + 128
+ paddw mm2, mm5 // a * 2/5 + b * 3/5
+ paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
- psrlw mm0, 8
+ paddw mm2, round_values // + 128
+ psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
- movq mm4, mm1 // b low
- pmullw mm1, four_fifths // b * 4/5 low
+ movq mm4, mm1 // b low
+ pmullw mm1, four_fifths // b * 4/5 low
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
+ movq QWORD ptr [edi+ecx], mm0 // write des[4]
- movq mm5, mm3 // b high
- pmullw mm3, four_fifths // b * 4/5 high
+ movq mm5, mm3 // b high
+ pmullw mm3, four_fifths // b * 4/5 high
- movq mm2, mm0 // c
- pmullw mm4, one_fifth // b * 1/5
+ movq mm2, mm0 // c
+ pmullw mm4, one_fifth // b * 1/5
- punpcklbw mm0, mm7 // c low
- pmullw mm5, one_fifth // b * 1/5
+ punpcklbw mm0, mm7 // c low
+ pmullw mm5, one_fifth // b * 1/5
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
+ movq mm6, mm0 // make copy of c low
+ punpckhbw mm2, mm7 // c high
- pmullw mm6, one_fifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, one_fifth // c * 1/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, one_fifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
+ pmullw mm7, one_fifth // c * 1/5 high
+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high
+ movq mm6, mm0 // make copy of c low
- pmullw mm6, four_fifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
+ pmullw mm6, four_fifths // c * 4/5 low
+ movq mm7, mm2 // make copy of c high
- pmullw mm7, four_fifths // c * 4/5 high
+ pmullw mm7, four_fifths // c * 4/5 high
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low
+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high
- paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
+ paddw mm1, round_values // + 128
+ paddw mm3, round_values // + 128
- psrlw mm1, 8
- psrlw mm3, 8
+ psrlw mm1, 8
+ psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ packuswb mm1, mm3 // des[2]
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- paddw mm4, round_values // + 128
- paddw mm5, round_values // + 128
+ paddw mm4, round_values // + 128
+ paddw mm5, round_values // + 128
- psrlw mm4, 8
- psrlw mm5, 8
+ psrlw mm4, 8
+ psrlw mm5, 8
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
+ packuswb mm4, mm5 // des[3]
+ movq QWORD ptr [edi], mm4 // write des[3]
- // mm0, mm2 --- Src[3]
+ // mm0, mm2 --- Src[3]
- add edi, 8
- add esi, 8
+ add edi, 8
+ add esi, 8
- sub edx, 8
- jg last_vs_3_5_loop
- }
+ sub edx, 8
+ jg last_vs_3_5_loop
+ }
}
/****************************************************************************
@@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx
static
void vertical_band_1_2_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
+ pxor mm7, mm7 // clear out mm7
+ mov edx, dest_width // Loop counter
- vs_1_2_loop:
+ vs_1_2_loop:
- movq mm0, [esi] // get Src[0]
- movq mm1, [esi + ecx * 2] // get Src[1]
+ movq mm0, [esi] // get Src[0]
+ movq mm1, [esi + ecx * 2] // get Src[1]
- movq mm2, mm0 // make copy before unpack
- movq mm3, mm1 // make copy before unpack
+ movq mm2, mm0 // make copy before unpack
+ movq mm3, mm1 // make copy before unpack
- punpcklbw mm0, mm7 // low Src[0]
- movq mm6, four_ones // mm6= 1, 1, 1, 1
+ punpcklbw mm0, mm7 // low Src[0]
+ movq mm6, four_ones // mm6= 1, 1, 1, 1
- punpcklbw mm1, mm7 // low Src[1]
- paddw mm0, mm1 // low (a + b)
+ punpcklbw mm1, mm7 // low Src[1]
+ paddw mm0, mm1 // low (a + b)
- punpckhbw mm2, mm7 // high Src[0]
- paddw mm0, mm6 // low (a + b + 1)
+ punpckhbw mm2, mm7 // high Src[0]
+ paddw mm0, mm6 // low (a + b + 1)
- punpckhbw mm3, mm7
- paddw mm2, mm3 // high (a + b )
+ punpckhbw mm3, mm7
+ paddw mm2, mm3 // high (a + b )
- psraw mm0, 1 // low (a + b +1 )/2
- paddw mm2, mm6 // high (a + b + 1)
+ psraw mm0, 1 // low (a + b +1 )/2
+ paddw mm2, mm6 // high (a + b + 1)
- psraw mm2, 1 // high (a + b + 1)/2
- packuswb mm0, mm2 // pack results
+ psraw mm2, 1 // high (a + b + 1)/2
+ packuswb mm0, mm2 // pack results
- movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
+ movq [esi+ecx], mm0 // write out eight bytes
+ add esi, 8
- sub edx, 8
- jg vs_1_2_loop
- }
+ sub edx, 8
+ jg vs_1_2_loop
+ }
}
@@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx
static
void last_vertical_band_1_2_scale_mmx
(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-)
-{
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
+ unsigned char *dest,
+ unsigned int dest_pitch,
+ unsigned int dest_width
+) {
+ __asm {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, dest_pitch // Get the pitch size
- mov edx, dest_width // Loop counter
+ mov edx, dest_width // Loop counter
- last_vs_1_2_loop:
+ last_vs_1_2_loop:
- movq mm0, [esi] // get Src[0]
- movq [esi+ecx], mm0 // write out eight bytes
+ movq mm0, [esi] // get Src[0]
+ movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
- sub edx, 8
+ add esi, 8
+ sub edx, 8
- jg last_vs_1_2_loop
- }
+ jg last_vs_1_2_loop
+ }
}
/****************************************************************************
@@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx
static
void horizontal_line_1_2_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
- __asm
- {
- mov esi, source
- mov edi, dest
+ __asm {
+ mov esi, source
+ mov edi, dest
- pxor mm7, mm7
- movq mm6, four_ones
+ pxor mm7, mm7
+ movq mm6, four_ones
- mov ecx, source_width
+ mov ecx, source_width
- hs_1_2_loop:
+ hs_1_2_loop:
- movq mm0, [esi]
- movq mm1, [esi+1]
+ movq mm0, [esi]
+ movq mm1, [esi+1]
- movq mm2, mm0
- movq mm3, mm1
+ movq mm2, mm0
+ movq mm3, mm1
- movq mm4, mm0
- punpcklbw mm0, mm7
+ movq mm4, mm0
+ punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- paddw mm0, mm1
+ punpcklbw mm1, mm7
+ paddw mm0, mm1
- paddw mm0, mm6
- punpckhbw mm2, mm7
+ paddw mm0, mm6
+ punpckhbw mm2, mm7
- punpckhbw mm3, mm7
- paddw mm2, mm3
+ punpckhbw mm3, mm7
+ paddw mm2, mm3
- paddw mm2, mm6
- psraw mm0, 1
+ paddw mm2, mm6
+ psraw mm0, 1
- psraw mm2, 1
- packuswb mm0, mm2
+ psraw mm2, 1
+ packuswb mm0, mm2
- movq mm2, mm4
- punpcklbw mm2, mm0
+ movq mm2, mm4
+ punpcklbw mm2, mm0
- movq [edi], mm2
- punpckhbw mm4, mm0
+ movq [edi], mm2
+ punpckhbw mm4, mm0
- movq [edi+8], mm4
- add esi, 8
+ movq [edi+8], mm4
+ add esi, 8
- add edi, 16
- sub ecx, 8
+ add edi, 16
+ sub ecx, 8
- cmp ecx, 8
- jg hs_1_2_loop
+ cmp ecx, 8
+ jg hs_1_2_loop
// last eight pixel
- movq mm0, [esi]
- movq mm1, mm0
+ movq mm0, [esi]
+ movq mm1, mm0
- movq mm2, mm0
- movq mm3, mm1
+ movq mm2, mm0
+ movq mm3, mm1
- psrlq mm1, 8
- psrlq mm3, 56
+ psrlq mm1, 8
+ psrlq mm3, 56
- psllq mm3, 56
- por mm1, mm3
+ psllq mm3, 56
+ por mm1, mm3
- movq mm3, mm1
- movq mm4, mm0
+ movq mm3, mm1
+ movq mm4, mm0
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
- paddw mm0, mm1
- paddw mm0, mm6
+ paddw mm0, mm1
+ paddw mm0, mm6
- punpckhbw mm2, mm7
- punpckhbw mm3, mm7
+ punpckhbw mm2, mm7
+ punpckhbw mm3, mm7
- paddw mm2, mm3
- paddw mm2, mm6
+ paddw mm2, mm3
+ paddw mm2, mm6
- psraw mm0, 1
- psraw mm2, 1
+ psraw mm0, 1
+ psraw mm2, 1
- packuswb mm0, mm2
- movq mm2, mm4
+ packuswb mm0, mm2
+ movq mm2, mm4
- punpcklbw mm2, mm0
- movq [edi], mm2
+ punpcklbw mm2, mm0
+ movq [edi], mm2
- punpckhbw mm4, mm0
- movq [edi+8], mm4
- }
+ punpckhbw mm4, mm0
+ movq [edi+8], mm4
+ }
}
@@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128,
static
void horizontal_line_5_4_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- /*
- unsigned i;
- unsigned int a, b, c, d, e;
- unsigned char *des = dest;
- const unsigned char *src = source;
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ /*
+ unsigned i;
+ unsigned int a, b, c, d, e;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
- (void) dest_width;
+ (void) dest_width;
- for ( i=0; i<source_width; i+=5 )
- {
- a = src[0];
- b = src[1];
- c = src[2];
- d = src[3];
- e = src[4];
+ for ( i=0; i<source_width; i+=5 )
+ {
+ a = src[0];
+ b = src[1];
+ c = src[2];
+ d = src[3];
+ e = src[4];
- des[0] = a;
- des[1] = ((b*192 + c* 64 + 128)>>8);
- des[2] = ((c*128 + d*128 + 128)>>8);
- des[3] = ((d* 64 + e*192 + 128)>>8);
+ des[0] = a;
+ des[1] = ((b*192 + c* 64 + 128)>>8);
+ des[2] = ((c*128 + d*128 + 128)>>8);
+ des[3] = ((d* 64 + e*192 + 128)>>8);
- src += 5;
- des += 4;
- }
- */
- (void) dest_width;
+ src += 5;
+ des += 4;
+ }
+ */
+ (void) dest_width;
- __asm
- {
+ __asm {
- mov esi, source ;
- mov edi, dest ;
+ mov esi, source;
+ mov edi, dest;
- mov ecx, source_width ;
- movq mm5, const54_1 ;
+ mov ecx, source_width;
+ movq mm5, const54_1;
- pxor mm7, mm7 ;
- movq mm6, const54_2 ;
+ pxor mm7, mm7;
+ movq mm6, const54_2;
- movq mm4, round_values ;
- lea edx, [esi+ecx] ;
- horizontal_line_5_4_loop:
+ movq mm4, round_values;
+ lea edx, [esi+ecx];
+ horizontal_line_5_4_loop:
- movq mm0, QWORD PTR [esi] ;
- 00 01 02 03 04 05 06 07
- movq mm1, mm0 ;
- 00 01 02 03 04 05 06 07
+ movq mm0, QWORD PTR [esi];
+ 00 01 02 03 04 05 06 07
+ movq mm1, mm0;
+ 00 01 02 03 04 05 06 07
- psrlq mm0, 8 ;
- 01 02 03 04 05 06 07 xx
- punpcklbw mm1, mm7 ;
- xx 00 xx 01 xx 02 xx 03
+ psrlq mm0, 8;
+ 01 02 03 04 05 06 07 xx
+ punpcklbw mm1, mm7;
+ xx 00 xx 01 xx 02 xx 03
- punpcklbw mm0, mm7 ;
- xx 01 xx 02 xx 03 xx 04
- pmullw mm1, mm5
+ punpcklbw mm0, mm7;
+ xx 01 xx 02 xx 03 xx 04
+ pmullw mm1, mm5
- pmullw mm0, mm6
- add esi, 5
+ pmullw mm0, mm6
+ add esi, 5
- add edi, 4
- paddw mm1, mm0
+ add edi, 4
+ paddw mm1, mm0
- paddw mm1, mm4
- psrlw mm1, 8
+ paddw mm1, mm4
+ psrlw mm1, 8
- cmp esi, edx
- packuswb mm1, mm7
+ cmp esi, edx
+ packuswb mm1, mm7
- movd DWORD PTR [edi-4], mm1
+ movd DWORD PTR [edi-4], mm1
- jl horizontal_line_5_4_loop
+ jl horizontal_line_5_4_loop
- }
+ }
}
__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 };
@@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128,
__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 };
static
-void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
+void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- __asm
- {
- push ebx
+ __asm {
+ push ebx
- mov esi, source // Get the source and destination pointer
- mov ecx, src_pitch // Get the pitch size
+ mov esi, source // Get the source and destination pointer
+ mov ecx, src_pitch // Get the pitch size
- mov edi, dest // tow lines below
- pxor mm7, mm7 // clear out mm7
+ mov edi, dest // tow lines below
+ pxor mm7, mm7 // clear out mm7
- mov edx, dest_pitch // Loop counter
- mov ebx, dest_width
+ mov edx, dest_pitch // Loop counter
+ mov ebx, dest_width
- vs_5_4_loop:
+ vs_5_4_loop:
- movd mm0, DWORD ptr [esi] // src[0];
- movd mm1, DWORD ptr [esi+ecx] // src[1];
+ movd mm0, DWORD ptr [esi] // src[0];
+ movd mm1, DWORD ptr [esi+ecx] // src[1];
- movd mm2, DWORD ptr [esi+ecx*2]
- lea eax, [esi+ecx*2] //
+ movd mm2, DWORD ptr [esi+ecx*2]
+ lea eax, [esi+ecx*2] //
- punpcklbw mm1, mm7
- punpcklbw mm2, mm7
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
- movq mm3, mm2
- pmullw mm1, three_fourths
+ movq mm3, mm2
+ pmullw mm1, three_fourths
- pmullw mm2, one_fourths
- movd mm4, [eax+ecx]
+ pmullw mm2, one_fourths
+ movd mm4, [eax+ecx]
- pmullw mm3, two_fourths
- punpcklbw mm4, mm7
+ pmullw mm3, two_fourths
+ punpcklbw mm4, mm7
- movq mm5, mm4
- pmullw mm4, two_fourths
+ movq mm5, mm4
+ pmullw mm4, two_fourths
- paddw mm1, mm2
- movd mm6, [eax+ecx*2]
+ paddw mm1, mm2
+ movd mm6, [eax+ecx*2]
- pmullw mm5, one_fourths
- paddw mm1, round_values;
+ pmullw mm5, one_fourths
+ paddw mm1, round_values;
- paddw mm3, mm4
- psrlw mm1, 8
+ paddw mm3, mm4
+ psrlw mm1, 8
- punpcklbw mm6, mm7
- paddw mm3, round_values
+ punpcklbw mm6, mm7
+ paddw mm3, round_values
- pmullw mm6, three_fourths
- psrlw mm3, 8
+ pmullw mm6, three_fourths
+ psrlw mm3, 8
- packuswb mm1, mm7
- packuswb mm3, mm7
+ packuswb mm1, mm7
+ packuswb mm3, mm7
- movd DWORD PTR [edi], mm0
- movd DWORD PTR [edi+edx], mm1
+ movd DWORD PTR [edi], mm0
+ movd DWORD PTR [edi+edx], mm1
- paddw mm5, mm6
- movd DWORD PTR [edi+edx*2], mm3
+ paddw mm5, mm6
+ movd DWORD PTR [edi+edx*2], mm3
- lea eax, [edi+edx*2]
- paddw mm5, round_values
+ lea eax, [edi+edx*2]
+ paddw mm5, round_values
- psrlw mm5, 8
- add edi, 4
+ psrlw mm5, 8
+ add edi, 4
- packuswb mm5, mm7
- movd DWORD PTR [eax+edx], mm5
+ packuswb mm5, mm7
+ movd DWORD PTR [eax+edx], mm5
- add esi, 4
- sub ebx, 4
+ add esi, 4
+ sub ebx, 4
- jg vs_5_4_loop
+ jg vs_5_4_loop
- pop ebx
- }
+ pop ebx
+ }
}
@@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85,
static
void horizontal_line_5_3_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
- (void) dest_width;
- __asm
- {
+ (void) dest_width;
+ __asm {
- mov esi, source ;
- mov edi, dest ;
+ mov esi, source;
+ mov edi, dest;
- mov ecx, source_width ;
- movq mm5, const53_1 ;
+ mov ecx, source_width;
+ movq mm5, const53_1;
- pxor mm7, mm7 ;
- movq mm6, const53_2 ;
+ pxor mm7, mm7;
+ movq mm6, const53_2;
- movq mm4, round_values ;
- lea edx, [esi+ecx-5] ;
- horizontal_line_5_3_loop:
+ movq mm4, round_values;
+ lea edx, [esi+ecx-5];
+ horizontal_line_5_3_loop:
- movq mm0, QWORD PTR [esi] ;
- 00 01 02 03 04 05 06 07
- movq mm1, mm0 ;
- 00 01 02 03 04 05 06 07
+ movq mm0, QWORD PTR [esi];
+ 00 01 02 03 04 05 06 07
+ movq mm1, mm0;
+ 00 01 02 03 04 05 06 07
- psllw mm0, 8 ;
- xx 00 xx 02 xx 04 xx 06
- psrlw mm1, 8 ;
- 01 xx 03 xx 05 xx 07 xx
+ psllw mm0, 8;
+ xx 00 xx 02 xx 04 xx 06
+ psrlw mm1, 8;
+ 01 xx 03 xx 05 xx 07 xx
- psrlw mm0, 8 ;
- 00 xx 02 xx 04 xx 06 xx
- psllq mm1, 16 ;
- xx xx 01 xx 03 xx 05 xx
+ psrlw mm0, 8;
+ 00 xx 02 xx 04 xx 06 xx
+ psllq mm1, 16;
+ xx xx 01 xx 03 xx 05 xx
- pmullw mm0, mm6
+ pmullw mm0, mm6
- pmullw mm1, mm5
- add esi, 5
+ pmullw mm1, mm5
+ add esi, 5
- add edi, 3
- paddw mm1, mm0
+ add edi, 3
+ paddw mm1, mm0
- paddw mm1, mm4
- psrlw mm1, 8
+ paddw mm1, mm4
+ psrlw mm1, 8
- cmp esi, edx
- packuswb mm1, mm7
+ cmp esi, edx
+ packuswb mm1, mm7
- movd DWORD PTR [edi-3], mm1
- jl horizontal_line_5_3_loop
+ movd DWORD PTR [edi-3], mm1
+ jl horizontal_line_5_3_loop
-//exit condition
- movq mm0, QWORD PTR [esi] ;
- 00 01 02 03 04 05 06 07
- movq mm1, mm0 ;
- 00 01 02 03 04 05 06 07
+// exit condition
+ movq mm0, QWORD PTR [esi];
+ 00 01 02 03 04 05 06 07
+ movq mm1, mm0;
+ 00 01 02 03 04 05 06 07
- psllw mm0, 8 ;
- xx 00 xx 02 xx 04 xx 06
- psrlw mm1, 8 ;
- 01 xx 03 xx 05 xx 07 xx
+ psllw mm0, 8;
+ xx 00 xx 02 xx 04 xx 06
+ psrlw mm1, 8;
+ 01 xx 03 xx 05 xx 07 xx
- psrlw mm0, 8 ;
- 00 xx 02 xx 04 xx 06 xx
- psllq mm1, 16 ;
- xx xx 01 xx 03 xx 05 xx
+ psrlw mm0, 8;
+ 00 xx 02 xx 04 xx 06 xx
+ psllq mm1, 16;
+ xx xx 01 xx 03 xx 05 xx
- pmullw mm0, mm6
+ pmullw mm0, mm6
- pmullw mm1, mm5
- paddw mm1, mm0
+ pmullw mm1, mm5
+ paddw mm1, mm0
- paddw mm1, mm4
- psrlw mm1, 8
+ paddw mm1, mm4
+ psrlw mm1, 8
- packuswb mm1, mm7
- movd eax, mm1
+ packuswb mm1, mm7
+ movd eax, mm1
- mov edx, eax
- shr edx, 16
+ mov edx, eax
+ shr edx, 16
- mov WORD PTR[edi], ax
- mov BYTE PTR[edi+2], dl
+ mov WORD PTR[edi], ax
+ mov BYTE PTR[edi+2], dl
- }
+ }
}
@@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85
__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 };
static
-void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
+void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- __asm
- {
- push ebx
+ __asm {
+ push ebx
- mov esi, source // Get the source and destination pointer
- mov ecx, src_pitch // Get the pitch size
+ mov esi, source // Get the source and destination pointer
+ mov ecx, src_pitch // Get the pitch size
- mov edi, dest // tow lines below
- pxor mm7, mm7 // clear out mm7
+ mov edi, dest // tow lines below
+ pxor mm7, mm7 // clear out mm7
- mov edx, dest_pitch // Loop counter
- movq mm5, one_thirds
+ mov edx, dest_pitch // Loop counter
+ movq mm5, one_thirds
- movq mm6, two_thirds
- mov ebx, dest_width;
+ movq mm6, two_thirds
+ mov ebx, dest_width;
- vs_5_3_loop:
+ vs_5_3_loop:
- movd mm0, DWORD ptr [esi] // src[0];
- movd mm1, DWORD ptr [esi+ecx] // src[1];
+ movd mm0, DWORD ptr [esi] // src[0];
+ movd mm1, DWORD ptr [esi+ecx] // src[1];
- movd mm2, DWORD ptr [esi+ecx*2]
- lea eax, [esi+ecx*2] //
+ movd mm2, DWORD ptr [esi+ecx*2]
+ lea eax, [esi+ecx*2] //
- punpcklbw mm1, mm7
- punpcklbw mm2, mm7
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
- pmullw mm1, mm5
- pmullw mm2, mm6
+ pmullw mm1, mm5
+ pmullw mm2, mm6
- movd mm3, DWORD ptr [eax+ecx]
- movd mm4, DWORD ptr [eax+ecx*2]
+ movd mm3, DWORD ptr [eax+ecx]
+ movd mm4, DWORD ptr [eax+ecx*2]
- punpcklbw mm3, mm7
- punpcklbw mm4, mm7
+ punpcklbw mm3, mm7
+ punpcklbw mm4, mm7
- pmullw mm3, mm6
- pmullw mm4, mm5
+ pmullw mm3, mm6
+ pmullw mm4, mm5
- movd DWORD PTR [edi], mm0
- paddw mm1, mm2
+ movd DWORD PTR [edi], mm0
+ paddw mm1, mm2
- paddw mm1, round_values
- psrlw mm1, 8
+ paddw mm1, round_values
+ psrlw mm1, 8
- packuswb mm1, mm7
- paddw mm3, mm4
+ packuswb mm1, mm7
+ paddw mm3, mm4
- paddw mm3, round_values
- movd DWORD PTR [edi+edx], mm1
+ paddw mm3, round_values
+ movd DWORD PTR [edi+edx], mm1
- psrlw mm3, 8
- packuswb mm3, mm7
+ psrlw mm3, 8
+ packuswb mm3, mm7
- movd DWORD PTR [edi+edx*2], mm3
+ movd DWORD PTR [edi+edx*2], mm3
- add edi, 4
- add esi, 4
+ add edi, 4
+ add esi, 4
- sub ebx, 4
- jg vs_5_3_loop
+ sub ebx, 4
+ jg vs_5_3_loop
- pop ebx
- }
+ pop ebx
+ }
}
@@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch,
static
void horizontal_line_2_1_scale_mmx
(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-)
-{
- (void) dest_width;
- (void) source_width;
- __asm
- {
- mov esi, source
- mov edi, dest
-
- pxor mm7, mm7
- mov ecx, dest_width
-
- xor edx, edx
- hs_2_1_loop:
-
- movq mm0, [esi+edx*2]
- psllw mm0, 8
-
- psrlw mm0, 8
- packuswb mm0, mm7
-
- movd DWORD Ptr [edi+edx], mm0;
- add edx, 4
-
- cmp edx, ecx
- jl hs_2_1_loop
-
- }
+ const unsigned char *source,
+ unsigned int source_width,
+ unsigned char *dest,
+ unsigned int dest_width
+) {
+ (void) dest_width;
+ (void) source_width;
+ __asm {
+ mov esi, source
+ mov edi, dest
+
+ pxor mm7, mm7
+ mov ecx, dest_width
+
+ xor edx, edx
+ hs_2_1_loop:
+
+ movq mm0, [esi+edx*2]
+ psllw mm0, 8
+
+ psrlw mm0, 8
+ packuswb mm0, mm7
+
+ movd DWORD Ptr [edi+edx], mm0;
+ add edx, 4
+
+ cmp edx, ecx
+ jl hs_2_1_loop
+
+ }
}
static
-void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
- (void) dest_pitch;
- (void) src_pitch;
- vpx_memcpy(dest, source, dest_width);
+void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
+ (void) dest_pitch;
+ (void) src_pitch;
+ vpx_memcpy(dest, source, dest_width);
}
@@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4
__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 };
static
-void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
-{
+void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- (void) dest_pitch;
- __asm
- {
- mov esi, source
- mov edi, dest
+ (void) dest_pitch;
+ __asm {
+ mov esi, source
+ mov edi, dest
- mov eax, src_pitch
- mov edx, dest_width
+ mov eax, src_pitch
+ mov edx, dest_width
- pxor mm7, mm7
- sub esi, eax //back one line
+ pxor mm7, mm7
+ sub esi, eax // back one line
- lea ecx, [esi+edx];
- movq mm6, round_values;
+ lea ecx, [esi+edx];
+ movq mm6, round_values;
- movq mm5, three_sixteenths;
- movq mm4, ten_sixteenths;
+ movq mm5, three_sixteenths;
+ movq mm4, ten_sixteenths;
- vs_2_1_i_loop:
- movd mm0, [esi] //
- movd mm1, [esi+eax] //
+ vs_2_1_i_loop:
+ movd mm0, [esi] //
+ movd mm1, [esi+eax] //
- movd mm2, [esi+eax*2] //
- punpcklbw mm0, mm7
+ movd mm2, [esi+eax*2] //
+ punpcklbw mm0, mm7
- pmullw mm0, mm5
- punpcklbw mm1, mm7
+ pmullw mm0, mm5
+ punpcklbw mm1, mm7
- pmullw mm1, mm4
- punpcklbw mm2, mm7
+ pmullw mm1, mm4
+ punpcklbw mm2, mm7
- pmullw mm2, mm5
- paddw mm0, round_values
+ pmullw mm2, mm5
+ paddw mm0, round_values
- paddw mm1, mm2
- paddw mm0, mm1
+ paddw mm1, mm2
+ paddw mm0, mm1
- psrlw mm0, 8
- packuswb mm0, mm7
+ psrlw mm0, 8
+ packuswb mm0, mm7
- movd DWORD PTR [edi], mm0
- add esi, 4
+ movd DWORD PTR [edi], mm0
+ add esi, 4
- add edi, 4;
- cmp esi, ecx
- jl vs_2_1_i_loop
+ add edi, 4;
+ cmp esi, ecx
+ jl vs_2_1_i_loop
- }
+ }
}
void
-register_mmxscalers(void)
-{
- vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx;
- vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx;
- vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx;
- vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx;
- vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx;
- vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx;
- vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx;
- vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx;
- vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx;
-
- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
-
-
-
- vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
- vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
- vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
- vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx;
- vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
- vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
- vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;
+register_mmxscalers(void) {
+ vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx;
+ vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx;
+ vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx;
+ vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx;
+ vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx;
+ vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx;
+ vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx;
+ vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx;
+ vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx;
+
+ vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
+ vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
+ vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
+ vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
+ vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
+ vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
+
+
+
+ vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
+ vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
+ vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
+ vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx;
+ vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
+ vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
+ vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;