diff options
author | John Koleszar <jkoleszar@google.com> | 2012-07-13 15:21:29 -0700 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2012-07-17 11:46:03 -0700 |
commit | c6b9039fd94aede59ac1086a379955137fc8e1b8 (patch) | |
tree | f9b20b2ca2114fe9303c8226bb3b368568fd5509 /vpx_scale/win32/scaleopt.c | |
parent | 8697c6e454e02c6cf644daa9d29fabd07e846f18 (diff) | |
download | libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.tar libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.tar.gz libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.tar.bz2 libvpx-c6b9039fd94aede59ac1086a379955137fc8e1b8.zip |
Restyle code
Approximate the Google style guide[1] so that that there's a written
document to follow and tools to check compliance[2].
[1]: http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
[2]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
Change-Id: Idf40e3d8dddcc72150f6af127b13e5dab838685f
Diffstat (limited to 'vpx_scale/win32/scaleopt.c')
-rw-r--r-- | vpx_scale/win32/scaleopt.c | 1922 |
1 files changed, 945 insertions, 977 deletions
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c index 3711fe5eb..2d96cc7c1 100644 --- a/vpx_scale/win32/scaleopt.c +++ b/vpx_scale/win32/scaleopt.c @@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, static void horizontal_line_3_5_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; - __asm - { + __asm { - push ebx + push ebx - mov esi, source - mov edi, dest + mov esi, source + mov edi, dest - mov ecx, source_width - lea edx, [esi+ecx-3]; + mov ecx, source_width + lea edx, [esi+ecx-3]; - movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx - movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx + movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx + movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 + movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx + pxor mm7, mm7 // clear mm7 - horiz_line_3_5_loop: + horiz_line_3_5_loop: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax + mov eax, DWORD PTR [esi] // eax = 00 01 02 03 + mov ebx, eax - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 + and ebx, 0xffff00 // ebx = xx 01 02 xx + mov ecx, eax // ecx = 00 01 02 03 - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx + and eax, 0xffff0000 // eax = xx xx 02 03 + xor ecx, eax // ecx = 00 01 xx xx - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 + shr ebx, 8 // ebx = 01 02 xx xx + or eax, ebx // eax = 01 02 02 03 - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx + shl ebx, 16 // ebx = xx xx 01 02 + movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx + or ebx, ecx // ebx = 00 01 01 02 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // + movd mm0, ebx // mm0 = 00 01 01 02 + pmullw mm1, mm6 // - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx + pmullw mm0, mm5 // - mov [edi], ebx // writeoutput 00 xx xx xx - add esi, 3 + mov [edi], ebx // writeoutput 00 xx xx xx + add esi, 3 - add edi, 5 - paddw mm0, mm1 + add edi, 5 + paddw mm0, mm1 - paddw mm0, mm4 - psrlw mm0, 8 + paddw mm0, mm4 + psrlw mm0, 8 - cmp esi, edx - packuswb mm0, mm7 + cmp esi, edx + packuswb mm0, mm7 - movd DWORD Ptr [edi-4], mm0 - jl horiz_line_3_5_loop + movd DWORD Ptr [edi-4], mm0 + jl horiz_line_3_5_loop -//Exit: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax +// Exit: + mov eax, DWORD PTR [esi] // eax = 00 01 02 03 + mov ebx, eax - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 + and ebx, 0xffff00 // ebx = xx 01 02 xx + mov ecx, eax // ecx = 00 01 02 03 - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx + and eax, 0xffff0000 // eax = xx xx 02 03 + xor ecx, eax // ecx = 00 01 xx xx - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 + shr ebx, 8 // ebx = 01 02 xx xx + or eax, ebx // eax = 01 02 02 03 - shl eax, 8 // eax = xx 01 02 02 - and eax, 0xffff0000 // eax = xx xx 02 02 + shl eax, 8 // eax = xx 01 02 02 + and eax, 0xffff0000 // eax = xx xx 02 02 - or eax, ebx // eax = 01 02 02 02 + or eax, ebx // eax = 01 02 02 02 - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx + shl ebx, 16 // ebx = xx xx 01 02 + movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx + or ebx, ecx // ebx = 00 01 01 02 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // + movd mm0, ebx // mm0 = 00 01 01 02 + pmullw mm1, mm6 // - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx + pmullw mm0, mm5 // - mov [edi], ebx // writeoutput 00 xx xx xx - paddw mm0, mm1 + mov [edi], ebx // writeoutput 00 xx xx xx + paddw mm0, mm1 - paddw mm0, mm4 - psrlw mm0, 8 + paddw mm0, mm4 + psrlw mm0, 8 - packuswb mm0, mm7 - movd DWORD Ptr [edi+1], mm0 + packuswb mm0, mm7 + movd DWORD Ptr [edi+1], mm0 - pop ebx + pop ebx - } + } } @@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx static void horizontal_line_4_5_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void)dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void)dest_width; - __asm - { + __asm { - mov esi, source - mov edi, dest + mov esi, source + mov edi, dest - mov ecx, source_width - lea edx, [esi+ecx-8]; + mov ecx, source_width + lea edx, [esi+ecx-8]; - movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx - movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx + movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx + movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 + movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx + pxor mm7, mm7 // clear mm7 - horiz_line_4_5_loop: + horiz_line_4_5_loop: - movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 + movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 + movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 + movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 + movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + movd DWORD PTR [edi], mm0 // write output 00 xx xx xx + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx + pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 + punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx + pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 + punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx + pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - paddw mm0, mm1 // added round values - paddw mm0, mm4 + paddw mm0, mm1 // added round values + paddw mm0, mm4 - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 + psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx + packuswb mm0, mm7 - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - add edi, 10 + movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 + add edi, 10 - add esi, 8 - paddw mm2, mm3 // + add esi, 8 + paddw mm2, mm3 // - paddw mm2, mm4 // added round values - cmp esi, edx + paddw mm2, mm4 // added round values + cmp esi, edx - psrlw mm2, 8 - packuswb mm2, mm7 + psrlw mm2, 8 + packuswb mm2, mm7 - movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 - jl horiz_line_4_5_loop + movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 + jl horiz_line_4_5_loop -//Exit: - movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 +// Exit: + movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 + movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 + movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 + psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 - pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 + movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 + pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 - por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 + psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 + por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - movq mm3, mm1 + movq mm3, mm1 - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + movd DWORD PTR [edi], mm0 // write output 00 xx xx xx + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx + pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 + punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx + pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 + punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx + pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - paddw mm0, mm1 // added round values - paddw mm0, mm4 + paddw mm0, mm1 // added round values + paddw mm0, mm4 - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx + psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx + packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - paddw mm2, mm3 // + movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 + paddw mm2, mm3 // - paddw mm2, mm4 // added round values - psrlw mm2, 8 + paddw mm2, mm4 // added round values + psrlw mm2, 8 - packuswb mm2, mm7 - movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 + packuswb mm2, mm7 + movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 - } + } } /**************************************************************************** @@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx static void vertical_band_4_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_4_5_loop: + vs_4_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word + movq mm5, one_fifth + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 1/5 + pmullw mm0, mm5 // a * 1/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan + pmullw mm2, mm5 // a * 1/5 + movq mm6, four_fifths // constan - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 4/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 + pmullw mm5, mm6 // b * 4/5 + paddw mm0, mm4 // a * 1/5 + b * 4/5 - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 1/5 + b * 4/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm5, two_fifths - movq mm2, mm0 // make a copy + movq mm5, two_fifths + movq mm2, mm0 // make a copy - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths + pmullw mm1, mm5 // b * 2/5 + movq mm6, three_fifths - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 + punpcklbw mm0, mm7 // unpack low to word + pmullw mm3, mm5 // b * 2/5 - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word + movq mm4, mm0 // make copy of c + punpckhbw mm2, mm7 // unpack high to word - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 + pmullw mm4, mm6 // c * 3/5 + movq mm5, mm2 - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 + pmullw mm5, mm6 // c * 3/5 + paddw mm1, mm4 // b * 2/5 + c * 3/5 - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 + paddw mm3, mm5 // b * 2/5 + c * 3/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] + psrlw mm3, 8 + packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + movq mm1, [edi] // mm1=Src[3]; - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking + // mm0, mm2 --- Src[2] + // mm1 --- Src[3] + // mm6 --- 3/5 + // mm7 for unpacking - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 + pmullw mm0, mm6 // c * 3/5 + movq mm5, two_fifths // mm5 = 2/5 - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 + movq mm3, mm1 // make a copy + pmullw mm2, mm6 // c * 3/5 - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low + movq mm4, mm1 // make a copy - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 + punpckhbw mm3, mm7 // unpack high + pmullw mm4, mm5 // d * 2/5 - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 + movq mm6, mm3 // make a copy + pmullw mm6, mm5 // d * 2/5 - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 + paddw mm0, mm4 // c * 3/5 + d * 2/5 + paddw mm2, mm6 // c * 3/5 + d * 2/5 - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 + paddw mm0, round_values // + 128 + paddw mm2, round_values // + 128 - psrlw mm0, 8 - psrlw mm2, 8 + psrlw mm0, 8 + psrlw mm2, 8 - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] + packuswb mm0, mm2 // des[3] + movq QWORD ptr [edi], mm0 // write des[3] - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking + // mm1, mm3 --- Src[3] + // mm7 -- cleared for unpacking - movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group + movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group - movq mm5, four_fifths // mm5 = 4/5 - pmullw mm1, mm5 // d * 4/5 + movq mm5, four_fifths // mm5 = 4/5 + pmullw mm1, mm5 // d * 4/5 - movq mm6, one_fifth // mm6 = 1/5 - movq mm2, mm0 // make a copy + movq mm6, one_fifth // mm6 = 1/5 + movq mm2, mm0 // make a copy - pmullw mm3, mm5 // d * 4/5 - punpcklbw mm0, mm7 // unpack low + pmullw mm3, mm5 // d * 4/5 + punpcklbw mm0, mm7 // unpack low - pmullw mm0, mm6 // an * 1/5 - punpckhbw mm2, mm7 // unpack high + pmullw mm0, mm6 // an * 1/5 + punpckhbw mm2, mm7 // unpack high - paddw mm1, mm0 // d * 4/5 + an * 1/5 - pmullw mm2, mm6 // an * 1/5 + paddw mm1, mm0 // d * 4/5 + an * 1/5 + pmullw mm2, mm6 // an * 1/5 - paddw mm3, mm2 // d * 4/5 + an * 1/5 - paddw mm1, round_values // + 128 + paddw mm3, mm2 // d * 4/5 + an * 1/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[4] + psrlw mm3, 8 + packuswb mm1, mm3 // des[4] - movq QWORD ptr [edi+ecx], mm1 // write des[4] + movq QWORD ptr [edi+ecx], mm1 // write des[4] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg vs_4_5_loop - } + sub edx, 8 + jg vs_4_5_loop + } } /**************************************************************************** @@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx static void last_vertical_band_4_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - last_vs_4_5_loop: + last_vs_4_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word + movq mm5, one_fifth + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 1/5 + pmullw mm0, mm5 // a * 1/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan + pmullw mm2, mm5 // a * 1/5 + movq mm6, four_fifths // constan - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 4/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 + pmullw mm5, mm6 // b * 4/5 + paddw mm0, mm4 // a * 1/5 + b * 4/5 - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 1/5 + b * 4/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm5, two_fifths - movq mm2, mm0 // make a copy + movq mm5, two_fifths + movq mm2, mm0 // make a copy - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths + pmullw mm1, mm5 // b * 2/5 + movq mm6, three_fifths - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 + punpcklbw mm0, mm7 // unpack low to word + pmullw mm3, mm5 // b * 2/5 - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word + movq mm4, mm0 // make copy of c + punpckhbw mm2, mm7 // unpack high to word - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 + pmullw mm4, mm6 // c * 3/5 + movq mm5, mm2 - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 + pmullw mm5, mm6 // c * 3/5 + paddw mm1, mm4 // b * 2/5 + c * 3/5 - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 + paddw mm3, mm5 // b * 2/5 + c * 3/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] + psrlw mm3, 8 + packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + movq mm1, [edi] // mm1=Src[3]; - movq QWORD ptr [edi+ecx], mm1 // write des[4]; + movq QWORD ptr [edi+ecx], mm1 // write des[4]; - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking + // mm0, mm2 --- Src[2] + // mm1 --- Src[3] + // mm6 --- 3/5 + // mm7 for unpacking - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 + pmullw mm0, mm6 // c * 3/5 + movq mm5, two_fifths // mm5 = 2/5 - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 + movq mm3, mm1 // make a copy + pmullw mm2, mm6 // c * 3/5 - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low + movq mm4, mm1 // make a copy - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 + punpckhbw mm3, mm7 // unpack high + pmullw mm4, mm5 // d * 2/5 - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 + movq mm6, mm3 // make a copy + pmullw mm6, mm5 // d * 2/5 - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 + paddw mm0, mm4 // c * 3/5 + d * 2/5 + paddw mm2, mm6 // c * 3/5 + d * 2/5 - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 + paddw mm0, round_values // + 128 + paddw mm2, round_values // + 128 - psrlw mm0, 8 - psrlw mm2, 8 + psrlw mm0, 8 + psrlw mm2, 8 - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] + packuswb mm0, mm2 // des[3] + movq QWORD ptr [edi], mm0 // write des[3] - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - add edi, 8 - add esi, 8 + // mm1, mm3 --- Src[3] + // mm7 -- cleared for unpacking + add edi, 8 + add esi, 8 - sub edx, 8 - jg last_vs_4_5_loop - } + sub edx, 8 + jg last_vs_4_5_loop + } } /**************************************************************************** @@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx static void vertical_band_3_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_3_5_loop: + vs_3_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word + movq mm5, two_fifths // mm5 = 2/5 + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 2/5 + pmullw mm0, mm5 // a * 2/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 + pmullw mm2, mm5 // a * 2/5 + movq mm6, three_fifths // mm6 = 3/5 - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 3/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 + pmullw mm5, mm6 // b * 3/5 + paddw mm0, mm4 // a * 2/5 + b * 3/5 - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 2/5 + b * 3/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low + movq mm4, mm1 // b low + pmullw mm1, four_fifths // b * 4/5 low - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high + movq mm5, mm3 // b high + pmullw mm3, four_fifths // b * 4/5 high - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 + movq mm2, mm0 // c + pmullw mm4, one_fifth // b * 1/5 - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 + punpcklbw mm0, mm7 // c low + pmullw mm5, one_fifth // b * 1/5 - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high + movq mm6, mm0 // make copy of c low + punpckhbw mm2, mm7 // c high - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, one_fifth // c * 1/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low + pmullw mm7, one_fifth // c * 1/5 high + paddw mm1, mm6 // b * 4/5 + c * 1/5 low - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low + paddw mm3, mm7 // b * 4/5 + c * 1/5 high + movq mm6, mm0 // make copy of c low - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, four_fifths // c * 4/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, four_fifths // c * 4/5 high + pmullw mm7, four_fifths // c * 4/5 high - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high + paddw mm4, mm6 // b * 1/5 + c * 4/5 low + paddw mm5, mm7 // b * 1/5 + c * 4/5 high - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 + paddw mm1, round_values // + 128 + paddw mm3, round_values // + 128 - psrlw mm1, 8 - psrlw mm3, 8 + psrlw mm1, 8 + psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + packuswb mm1, mm3 // des[2] + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 + paddw mm4, round_values // + 128 + paddw mm5, round_values // + 128 - psrlw mm4, 8 - psrlw mm5, 8 + psrlw mm4, 8 + psrlw mm5, 8 - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] + packuswb mm4, mm5 // des[3] + movq QWORD ptr [edi], mm4 // write des[3] - // mm0, mm2 --- Src[3] + // mm0, mm2 --- Src[3] - pxor mm7, mm7 // clear mm7 for unpacking - movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group + pxor mm7, mm7 // clear mm7 for unpacking + movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group - movq mm5, three_fifths // mm5 = 3/5 - pmullw mm0, mm5 // d * 3/5 + movq mm5, three_fifths // mm5 = 3/5 + pmullw mm0, mm5 // d * 3/5 - movq mm6, two_fifths // mm6 = 2/5 - movq mm3, mm1 // make a copy + movq mm6, two_fifths // mm6 = 2/5 + movq mm3, mm1 // make a copy - pmullw mm2, mm5 // d * 3/5 - punpcklbw mm1, mm7 // unpack low + pmullw mm2, mm5 // d * 3/5 + punpcklbw mm1, mm7 // unpack low - pmullw mm1, mm6 // an * 2/5 - punpckhbw mm3, mm7 // unpack high + pmullw mm1, mm6 // an * 2/5 + punpckhbw mm3, mm7 // unpack high - paddw mm0, mm1 // d * 3/5 + an * 2/5 - pmullw mm3, mm6 // an * 2/5 + paddw mm0, mm1 // d * 3/5 + an * 2/5 + pmullw mm3, mm6 // an * 2/5 - paddw mm2, mm3 // d * 3/5 + an * 2/5 - paddw mm0, round_values // + 128 + paddw mm2, mm3 // d * 3/5 + an * 2/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des[4] + psrlw mm2, 8 + packuswb mm0, mm2 // des[4] - movq QWORD ptr [edi+ecx], mm0 // write des[4] + movq QWORD ptr [edi+ecx], mm0 // write des[4] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg vs_3_5_loop - } + sub edx, 8 + jg vs_3_5_loop + } } /**************************************************************************** @@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx static void last_vertical_band_3_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - last_vs_3_5_loop: + last_vs_3_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word + movq mm5, two_fifths // mm5 = 2/5 + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 2/5 + pmullw mm0, mm5 // a * 2/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 + pmullw mm2, mm5 // a * 2/5 + movq mm6, three_fifths // mm6 = 3/5 - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 3/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 + pmullw mm5, mm6 // b * 3/5 + paddw mm0, mm4 // a * 2/5 + b * 3/5 - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 2/5 + b * 3/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low + movq mm4, mm1 // b low + pmullw mm1, four_fifths // b * 4/5 low - movq QWORD ptr [edi+ecx], mm0 // write des[4] + movq QWORD ptr [edi+ecx], mm0 // write des[4] - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high + movq mm5, mm3 // b high + pmullw mm3, four_fifths // b * 4/5 high - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 + movq mm2, mm0 // c + pmullw mm4, one_fifth // b * 1/5 - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 + punpcklbw mm0, mm7 // c low + pmullw mm5, one_fifth // b * 1/5 - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high + movq mm6, mm0 // make copy of c low + punpckhbw mm2, mm7 // c high - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, one_fifth // c * 1/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low + pmullw mm7, one_fifth // c * 1/5 high + paddw mm1, mm6 // b * 4/5 + c * 1/5 low - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low + paddw mm3, mm7 // b * 4/5 + c * 1/5 high + movq mm6, mm0 // make copy of c low - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, four_fifths // c * 4/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, four_fifths // c * 4/5 high + pmullw mm7, four_fifths // c * 4/5 high - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high + paddw mm4, mm6 // b * 1/5 + c * 4/5 low + paddw mm5, mm7 // b * 1/5 + c * 4/5 high - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 + paddw mm1, round_values // + 128 + paddw mm3, round_values // + 128 - psrlw mm1, 8 - psrlw mm3, 8 + psrlw mm1, 8 + psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + packuswb mm1, mm3 // des[2] + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 + paddw mm4, round_values // + 128 + paddw mm5, round_values // + 128 - psrlw mm4, 8 - psrlw mm5, 8 + psrlw mm4, 8 + psrlw mm5, 8 - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] + packuswb mm4, mm5 // des[3] + movq QWORD ptr [edi], mm4 // write des[3] - // mm0, mm2 --- Src[3] + // mm0, mm2 --- Src[3] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg last_vs_3_5_loop - } + sub edx, 8 + jg last_vs_3_5_loop + } } /**************************************************************************** @@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx static void vertical_band_1_2_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_1_2_loop: + vs_1_2_loop: - movq mm0, [esi] // get Src[0] - movq mm1, [esi + ecx * 2] // get Src[1] + movq mm0, [esi] // get Src[0] + movq mm1, [esi + ecx * 2] // get Src[1] - movq mm2, mm0 // make copy before unpack - movq mm3, mm1 // make copy before unpack + movq mm2, mm0 // make copy before unpack + movq mm3, mm1 // make copy before unpack - punpcklbw mm0, mm7 // low Src[0] - movq mm6, four_ones // mm6= 1, 1, 1, 1 + punpcklbw mm0, mm7 // low Src[0] + movq mm6, four_ones // mm6= 1, 1, 1, 1 - punpcklbw mm1, mm7 // low Src[1] - paddw mm0, mm1 // low (a + b) + punpcklbw mm1, mm7 // low Src[1] + paddw mm0, mm1 // low (a + b) - punpckhbw mm2, mm7 // high Src[0] - paddw mm0, mm6 // low (a + b + 1) + punpckhbw mm2, mm7 // high Src[0] + paddw mm0, mm6 // low (a + b + 1) - punpckhbw mm3, mm7 - paddw mm2, mm3 // high (a + b ) + punpckhbw mm3, mm7 + paddw mm2, mm3 // high (a + b ) - psraw mm0, 1 // low (a + b +1 )/2 - paddw mm2, mm6 // high (a + b + 1) + psraw mm0, 1 // low (a + b +1 )/2 + paddw mm2, mm6 // high (a + b + 1) - psraw mm2, 1 // high (a + b + 1)/2 - packuswb mm0, mm2 // pack results + psraw mm2, 1 // high (a + b + 1)/2 + packuswb mm0, mm2 // pack results - movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 + movq [esi+ecx], mm0 // write out eight bytes + add esi, 8 - sub edx, 8 - jg vs_1_2_loop - } + sub edx, 8 + jg vs_1_2_loop + } } @@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx static void last_vertical_band_1_2_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - mov edx, dest_width // Loop counter + mov edx, dest_width // Loop counter - last_vs_1_2_loop: + last_vs_1_2_loop: - movq mm0, [esi] // get Src[0] - movq [esi+ecx], mm0 // write out eight bytes + movq mm0, [esi] // get Src[0] + movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 - sub edx, 8 + add esi, 8 + sub edx, 8 - jg last_vs_1_2_loop - } + jg last_vs_1_2_loop + } } /**************************************************************************** @@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx static void horizontal_line_1_2_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; - __asm - { - mov esi, source - mov edi, dest + __asm { + mov esi, source + mov edi, dest - pxor mm7, mm7 - movq mm6, four_ones + pxor mm7, mm7 + movq mm6, four_ones - mov ecx, source_width + mov ecx, source_width - hs_1_2_loop: + hs_1_2_loop: - movq mm0, [esi] - movq mm1, [esi+1] + movq mm0, [esi] + movq mm1, [esi+1] - movq mm2, mm0 - movq mm3, mm1 + movq mm2, mm0 + movq mm3, mm1 - movq mm4, mm0 - punpcklbw mm0, mm7 + movq mm4, mm0 + punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - paddw mm0, mm1 + punpcklbw mm1, mm7 + paddw mm0, mm1 - paddw mm0, mm6 - punpckhbw mm2, mm7 + paddw mm0, mm6 + punpckhbw mm2, mm7 - punpckhbw mm3, mm7 - paddw mm2, mm3 + punpckhbw mm3, mm7 + paddw mm2, mm3 - paddw mm2, mm6 - psraw mm0, 1 + paddw mm2, mm6 + psraw mm0, 1 - psraw mm2, 1 - packuswb mm0, mm2 + psraw mm2, 1 + packuswb mm0, mm2 - movq mm2, mm4 - punpcklbw mm2, mm0 + movq mm2, mm4 + punpcklbw mm2, mm0 - movq [edi], mm2 - punpckhbw mm4, mm0 + movq [edi], mm2 + punpckhbw mm4, mm0 - movq [edi+8], mm4 - add esi, 8 + movq [edi+8], mm4 + add esi, 8 - add edi, 16 - sub ecx, 8 + add edi, 16 + sub ecx, 8 - cmp ecx, 8 - jg hs_1_2_loop + cmp ecx, 8 + jg hs_1_2_loop // last eight pixel - movq mm0, [esi] - movq mm1, mm0 + movq mm0, [esi] + movq mm1, mm0 - movq mm2, mm0 - movq mm3, mm1 + movq mm2, mm0 + movq mm3, mm1 - psrlq mm1, 8 - psrlq mm3, 56 + psrlq mm1, 8 + psrlq mm3, 56 - psllq mm3, 56 - por mm1, mm3 + psllq mm3, 56 + por mm1, mm3 - movq mm3, mm1 - movq mm4, mm0 + movq mm3, mm1 + movq mm4, mm0 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 - paddw mm0, mm1 - paddw mm0, mm6 + paddw mm0, mm1 + paddw mm0, mm6 - punpckhbw mm2, mm7 - punpckhbw mm3, mm7 + punpckhbw mm2, mm7 + punpckhbw mm3, mm7 - paddw mm2, mm3 - paddw mm2, mm6 + paddw mm2, mm3 + paddw mm2, mm6 - psraw mm0, 1 - psraw mm2, 1 + psraw mm0, 1 + psraw mm2, 1 - packuswb mm0, mm2 - movq mm2, mm4 + packuswb mm0, mm2 + movq mm2, mm4 - punpcklbw mm2, mm0 - movq [edi], mm2 + punpcklbw mm2, mm0 + movq [edi], mm2 - punpckhbw mm4, mm0 - movq [edi+8], mm4 - } + punpckhbw mm4, mm0 + movq [edi+8], mm4 + } } @@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, static void horizontal_line_5_4_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - /* - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + /* + unsigned i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; - (void) dest_width; + (void) dest_width; - for ( i=0; i<source_width; i+=5 ) - { - a = src[0]; - b = src[1]; - c = src[2]; - d = src[3]; - e = src[4]; + for ( i=0; i<source_width; i+=5 ) + { + a = src[0]; + b = src[1]; + c = src[2]; + d = src[3]; + e = src[4]; - des[0] = a; - des[1] = ((b*192 + c* 64 + 128)>>8); - des[2] = ((c*128 + d*128 + 128)>>8); - des[3] = ((d* 64 + e*192 + 128)>>8); + des[0] = a; + des[1] = ((b*192 + c* 64 + 128)>>8); + des[2] = ((c*128 + d*128 + 128)>>8); + des[3] = ((d* 64 + e*192 + 128)>>8); - src += 5; - des += 4; - } - */ - (void) dest_width; + src += 5; + des += 4; + } + */ + (void) dest_width; - __asm - { + __asm { - mov esi, source ; - mov edi, dest ; + mov esi, source; + mov edi, dest; - mov ecx, source_width ; - movq mm5, const54_1 ; + mov ecx, source_width; + movq mm5, const54_1; - pxor mm7, mm7 ; - movq mm6, const54_2 ; + pxor mm7, mm7; + movq mm6, const54_2; - movq mm4, round_values ; - lea edx, [esi+ecx] ; - horizontal_line_5_4_loop: + movq mm4, round_values; + lea edx, [esi+ecx]; + horizontal_line_5_4_loop: - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psrlq mm0, 8 ; - 01 02 03 04 05 06 07 xx - punpcklbw mm1, mm7 ; - xx 00 xx 01 xx 02 xx 03 + psrlq mm0, 8; + 01 02 03 04 05 06 07 xx + punpcklbw mm1, mm7; + xx 00 xx 01 xx 02 xx 03 - punpcklbw mm0, mm7 ; - xx 01 xx 02 xx 03 xx 04 - pmullw mm1, mm5 + punpcklbw mm0, mm7; + xx 01 xx 02 xx 03 xx 04 + pmullw mm1, mm5 - pmullw mm0, mm6 - add esi, 5 + pmullw mm0, mm6 + add esi, 5 - add edi, 4 - paddw mm1, mm0 + add edi, 4 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - cmp esi, edx - packuswb mm1, mm7 + cmp esi, edx + packuswb mm1, mm7 - movd DWORD PTR [edi-4], mm1 + movd DWORD PTR [edi-4], mm1 - jl horizontal_line_5_4_loop + jl horizontal_line_5_4_loop - } + } } __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; @@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; static -void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - __asm - { - push ebx + __asm { + push ebx - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size + mov esi, source // Get the source and destination pointer + mov ecx, src_pitch // Get the pitch size - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 + mov edi, dest // tow lines below + pxor mm7, mm7 // clear out mm7 - mov edx, dest_pitch // Loop counter - mov ebx, dest_width + mov edx, dest_pitch // Loop counter + mov ebx, dest_width - vs_5_4_loop: + vs_5_4_loop: - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; + movd mm0, DWORD ptr [esi] // src[0]; + movd mm1, DWORD ptr [esi+ecx] // src[1]; - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // + movd mm2, DWORD ptr [esi+ecx*2] + lea eax, [esi+ecx*2] // - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 - movq mm3, mm2 - pmullw mm1, three_fourths + movq mm3, mm2 + pmullw mm1, three_fourths - pmullw mm2, one_fourths - movd mm4, [eax+ecx] + pmullw mm2, one_fourths + movd mm4, [eax+ecx] - pmullw mm3, two_fourths - punpcklbw mm4, mm7 + pmullw mm3, two_fourths + punpcklbw mm4, mm7 - movq mm5, mm4 - pmullw mm4, two_fourths + movq mm5, mm4 + pmullw mm4, two_fourths - paddw mm1, mm2 - movd mm6, [eax+ecx*2] + paddw mm1, mm2 + movd mm6, [eax+ecx*2] - pmullw mm5, one_fourths - paddw mm1, round_values; + pmullw mm5, one_fourths + paddw mm1, round_values; - paddw mm3, mm4 - psrlw mm1, 8 + paddw mm3, mm4 + psrlw mm1, 8 - punpcklbw mm6, mm7 - paddw mm3, round_values + punpcklbw mm6, mm7 + paddw mm3, round_values - pmullw mm6, three_fourths - psrlw mm3, 8 + pmullw mm6, three_fourths + psrlw mm3, 8 - packuswb mm1, mm7 - packuswb mm3, mm7 + packuswb mm1, mm7 + packuswb mm3, mm7 - movd DWORD PTR [edi], mm0 - movd DWORD PTR [edi+edx], mm1 + movd DWORD PTR [edi], mm0 + movd DWORD PTR [edi+edx], mm1 - paddw mm5, mm6 - movd DWORD PTR [edi+edx*2], mm3 + paddw mm5, mm6 + movd DWORD PTR [edi+edx*2], mm3 - lea eax, [edi+edx*2] - paddw mm5, round_values + lea eax, [edi+edx*2] + paddw mm5, round_values - psrlw mm5, 8 - add edi, 4 + psrlw mm5, 8 + add edi, 4 - packuswb mm5, mm7 - movd DWORD PTR [eax+edx], mm5 + packuswb mm5, mm7 + movd DWORD PTR [eax+edx], mm5 - add esi, 4 - sub ebx, 4 + add esi, 4 + sub ebx, 4 - jg vs_5_4_loop + jg vs_5_4_loop - pop ebx - } + pop ebx + } } @@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, static void horizontal_line_5_3_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { - (void) dest_width; - __asm - { + (void) dest_width; + __asm { - mov esi, source ; - mov edi, dest ; + mov esi, source; + mov edi, dest; - mov ecx, source_width ; - movq mm5, const53_1 ; + mov ecx, source_width; + movq mm5, const53_1; - pxor mm7, mm7 ; - movq mm6, const53_2 ; + pxor mm7, mm7; + movq mm6, const53_2; - movq mm4, round_values ; - lea edx, [esi+ecx-5] ; - horizontal_line_5_3_loop: + movq mm4, round_values; + lea edx, [esi+ecx-5]; + horizontal_line_5_3_loop: - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx + psllw mm0, 8; + xx 00 xx 02 xx 04 xx 06 + psrlw mm1, 8; + 01 xx 03 xx 05 xx 07 xx - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx + psrlw mm0, 8; + 00 xx 02 xx 04 xx 06 xx + psllq mm1, 16; + xx xx 01 xx 03 xx 05 xx - pmullw mm0, mm6 + pmullw mm0, mm6 - pmullw mm1, mm5 - add esi, 5 + pmullw mm1, mm5 + add esi, 5 - add edi, 3 - paddw mm1, mm0 + add edi, 3 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - cmp esi, edx - packuswb mm1, mm7 + cmp esi, edx + packuswb mm1, mm7 - movd DWORD PTR [edi-3], mm1 - jl horizontal_line_5_3_loop + movd DWORD PTR [edi-3], mm1 + jl horizontal_line_5_3_loop -//exit condition - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 +// exit condition + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx + psllw mm0, 8; + xx 00 xx 02 xx 04 xx 06 + psrlw mm1, 8; + 01 xx 03 xx 05 xx 07 xx - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx + psrlw mm0, 8; + 00 xx 02 xx 04 xx 06 xx + psllq mm1, 16; + xx xx 01 xx 03 xx 05 xx - pmullw mm0, mm6 + pmullw mm0, mm6 - pmullw mm1, mm5 - paddw mm1, mm0 + pmullw mm1, mm5 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - packuswb mm1, mm7 - movd eax, mm1 + packuswb mm1, mm7 + movd eax, mm1 - mov edx, eax - shr edx, 16 + mov edx, eax + shr edx, 16 - mov WORD PTR[edi], ax - mov BYTE PTR[edi+2], dl + mov WORD PTR[edi], ax + mov BYTE PTR[edi+2], dl - } + } } @@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85 __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; static -void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - __asm - { - push ebx + __asm { + push ebx - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size + mov esi, source // Get the source and destination pointer + mov ecx, src_pitch // Get the pitch size - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 + mov edi, dest // tow lines below + pxor mm7, mm7 // clear out mm7 - mov edx, dest_pitch // Loop counter - movq mm5, one_thirds + mov edx, dest_pitch // Loop counter + movq mm5, one_thirds - movq mm6, two_thirds - mov ebx, dest_width; + movq mm6, two_thirds + mov ebx, dest_width; - vs_5_3_loop: + vs_5_3_loop: - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; + movd mm0, DWORD ptr [esi] // src[0]; + movd mm1, DWORD ptr [esi+ecx] // src[1]; - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // + movd mm2, DWORD ptr [esi+ecx*2] + lea eax, [esi+ecx*2] // - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 - pmullw mm1, mm5 - pmullw mm2, mm6 + pmullw mm1, mm5 + pmullw mm2, mm6 - movd mm3, DWORD ptr [eax+ecx] - movd mm4, DWORD ptr [eax+ecx*2] + movd mm3, DWORD ptr [eax+ecx] + movd mm4, DWORD ptr [eax+ecx*2] - punpcklbw mm3, mm7 - punpcklbw mm4, mm7 + punpcklbw mm3, mm7 + punpcklbw mm4, mm7 - pmullw mm3, mm6 - pmullw mm4, mm5 + pmullw mm3, mm6 + pmullw mm4, mm5 - movd DWORD PTR [edi], mm0 - paddw mm1, mm2 + movd DWORD PTR [edi], mm0 + paddw mm1, mm2 - paddw mm1, round_values - psrlw mm1, 8 + paddw mm1, round_values + psrlw mm1, 8 - packuswb mm1, mm7 - paddw mm3, mm4 + packuswb mm1, mm7 + paddw mm3, mm4 - paddw mm3, round_values - movd DWORD PTR [edi+edx], mm1 + paddw mm3, round_values + movd DWORD PTR [edi+edx], mm1 - psrlw mm3, 8 - packuswb mm3, mm7 + psrlw mm3, 8 + packuswb mm3, mm7 - movd DWORD PTR [edi+edx*2], mm3 + movd DWORD PTR [edi+edx*2], mm3 - add edi, 4 - add esi, 4 + add edi, 4 + add esi, 4 - sub ebx, 4 - jg vs_5_3_loop + sub ebx, 4 + jg vs_5_3_loop - pop ebx - } + pop ebx + } } @@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, static void horizontal_line_2_1_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - (void) source_width; - __asm - { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - mov ecx, dest_width - - xor edx, edx - hs_2_1_loop: - - movq mm0, [esi+edx*2] - psllw mm0, 8 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD Ptr [edi+edx], mm0; - add edx, 4 - - cmp edx, ecx - jl hs_2_1_loop - - } + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; + (void) source_width; + __asm { + mov esi, source + mov edi, dest + + pxor mm7, mm7 + mov ecx, dest_width + + xor edx, edx + hs_2_1_loop: + + movq mm0, [esi+edx*2] + psllw mm0, 8 + + psrlw mm0, 8 + packuswb mm0, mm7 + + movd DWORD Ptr [edi+edx], mm0; + add edx, 4 + + cmp edx, ecx + jl hs_2_1_loop + + } } static -void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - (void) dest_pitch; - (void) src_pitch; - vpx_memcpy(dest, source, dest_width); +void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + (void) dest_pitch; + (void) src_pitch; + vpx_memcpy(dest, source, dest_width); } @@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4 __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; static -void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - (void) dest_pitch; - __asm - { - mov esi, source - mov edi, dest + (void) dest_pitch; + __asm { + mov esi, source + mov edi, dest - mov eax, src_pitch - mov edx, dest_width + mov eax, src_pitch + mov edx, dest_width - pxor mm7, mm7 - sub esi, eax //back one line + pxor mm7, mm7 + sub esi, eax // back one line - lea ecx, [esi+edx]; - movq mm6, round_values; + lea ecx, [esi+edx]; + movq mm6, round_values; - movq mm5, three_sixteenths; - movq mm4, ten_sixteenths; + movq mm5, three_sixteenths; + movq mm4, ten_sixteenths; - vs_2_1_i_loop: - movd mm0, [esi] // - movd mm1, [esi+eax] // + vs_2_1_i_loop: + movd mm0, [esi] // + movd mm1, [esi+eax] // - movd mm2, [esi+eax*2] // - punpcklbw mm0, mm7 + movd mm2, [esi+eax*2] // + punpcklbw mm0, mm7 - pmullw mm0, mm5 - punpcklbw mm1, mm7 + pmullw mm0, mm5 + punpcklbw mm1, mm7 - pmullw mm1, mm4 - punpcklbw mm2, mm7 + pmullw mm1, mm4 + punpcklbw mm2, mm7 - pmullw mm2, mm5 - paddw mm0, round_values + pmullw mm2, mm5 + paddw mm0, round_values - paddw mm1, mm2 - paddw mm0, mm1 + paddw mm1, mm2 + paddw mm0, mm1 - psrlw mm0, 8 - packuswb mm0, mm7 + psrlw mm0, 8 + packuswb mm0, mm7 - movd DWORD PTR [edi], mm0 - add esi, 4 + movd DWORD PTR [edi], mm0 + add esi, 4 - add edi, 4; - cmp esi, ecx - jl vs_2_1_i_loop + add edi, 4; + cmp esi, ecx + jl vs_2_1_i_loop - } + } } void -register_mmxscalers(void) -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; - - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - - - - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; - vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; - vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; - vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; - vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; - vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; - vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; +register_mmxscalers(void) { + vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; + vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; + vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; + vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; + vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; + vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; + vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; + vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; + vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; + + vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; + vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; + vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; + vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; + vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; + vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; + + + + vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; + vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; + vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; + vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; + vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; + vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; + vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; |