ref: a50d54f8e8cf80a5c0313847ef488e2eb42ce409
parent: bbe51935c780fe2747cb757e8f81635be119f112
parent: a6463be0cc7503db58da27aa8001b4b8ac26f646
author: Ethan Hugg <[email protected]>
date: Sat Jan 18 03:55:49 EST 2014
Merge pull request #162 from Vproject/yasm Allow yasm to be used instead of nasm.
--- a/codec/common/mb_copy.asm
+++ b/codec/common/mb_copy.asm
@@ -392,7 +392,7 @@
;mov eax, [esp+4] ; mv_buffer
;movd xmm0, [esp+8] ; _mv
movd xmm0, r1d ; _mv
- pshufd xmm1, xmm0, $0
+ pshufd xmm1, xmm0, $00
movdqa [r0 ], xmm1
movdqa [r0+0x10], xmm1
movdqa [r0+0x20], xmm1
--- a/codec/common/vaa.asm
+++ b/codec/common/vaa.asm
@@ -101,7 +101,7 @@
punpcklwd %1, %2
punpckhwd %3, %4
punpcklwd %1, %3
- psraw %1, $4
+ psraw %1, $04
%endmacro
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
@@ -129,7 +129,7 @@
paddw %2, %4 ; block 2, 3
phaddw %1, %2 ; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
phaddw %1, xmm7 ; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
- psraw %1, $4
+ psraw %1, $04
%endmacro
@@ -178,12 +178,12 @@
mov r2,r1
- sal r2,$1 ;r2 = 2*iLineSize
+ sal r2,$01 ;r2 = 2*iLineSize
mov r3,r2
add r3,r1 ;r3 = 3*iLineSize
mov r4,r2
- sal r4,$1 ;r4 = 4*iLineSize
+ sal r4,$01 ;r4 = 4*iLineSize
pxor xmm7, xmm7
@@ -231,7 +231,7 @@
and r2, 0ffffh ; effective low work truncated
mov r3, r2
imul r2, r3
- sar r2, $4
+ sar r2, $04
movd retrd, xmm1
sub retrd, r2d
@@ -273,12 +273,12 @@
mov r2,r1
- sal r2,$1 ;r2 = 2*iLineSize
+ sal r2,$01 ;r2 = 2*iLineSize
mov r3,r2
add r3,r1 ;r3 = 3*iLineSize
mov r4,r2
- sal r4,$1 ;r4 = 4*iLineSize
+ sal r4,$01 ;r4 = 4*iLineSize
pxor xmm7, xmm7
@@ -327,7 +327,7 @@
and r2, 0ffffh ; effective low work truncated
mov r3, r2
imul r2, r3
- sar r2, $4
+ sar r2, $04
movd retrd, xmm1
sub retrd, r2d
--- a/codec/encoder/core/asm/dct.asm
+++ b/codec/encoder/core/asm/dct.asm
@@ -86,17 +86,17 @@
%macro MMX_SumSubMul2 3
movq %3, %1
- psllw %1, $1
+ psllw %1, $01
paddw %1, %2
- psllw %2, $1
+ psllw %2, $01
psubw %3, %2
%endmacro
%macro MMX_SumSubDiv2 3
movq %3, %2
- psraw %3, $1
+ psraw %3, $01
paddw %3, %1
- psraw %1, $1
+ psraw %1, $01
psubw %1, %2
%endmacro
@@ -124,7 +124,7 @@
movd %2, %6
punpcklbw %2, %4
paddw %1, %3
- psraw %1, $6
+ psraw %1, $06
paddsw %1, %2
packuswb %1, %2
movd %5, %1
@@ -255,8 +255,8 @@
%macro SSE2_SumSubDiv2 4
movdqa %4, %1
movdqa %3, %2
- psraw %2, $1
- psraw %4, $1
+ psraw %2, $01
+ psraw %4, $01
paddw %1, %2
psubw %4, %3
%endmacro
@@ -263,7 +263,7 @@
%macro SSE2_StoreDiff8p 6
paddw %1, %3
- psraw %1, $6
+ psraw %1, $06
movq %2, %6
punpcklbw %2, %4
paddsw %2, %1
@@ -282,7 +282,7 @@
%macro SSE2_Load8DC 6
movdqa %1, %6 ; %1 = dc0 dc1
paddw %1, %5
- psraw %1, $6 ; (dc + 32) >> 6
+ psraw %1, $06 ; (dc + 32) >> 6
movdqa %2, %1
psrldq %2, 4
--- a/codec/processing/src/asm/downsample_bilinear.asm
+++ b/codec/processing/src/asm/downsample_bilinear.asm
@@ -94,13 +94,13 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $4 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
+ sar eax, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 32 bytes
.xloops:
@@ -247,13 +247,13 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $3 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
+ sar eax, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 16 bytes
.xloops:
@@ -351,13 +351,13 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $2 ; (iSrcWidth >> 1) / 4 ; loop count = num_of_mb
+ sar eax, $02 ; (iSrcWidth >> 1) / 4 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 8 bytes
.xloops:
@@ -442,7 +442,7 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
@@ -449,9 +449,9 @@
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $4 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
+ sar eax, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 32 bytes
.xloops:
@@ -553,15 +553,15 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $3 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
+ sar eax, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 16 bytes
.xloops:
@@ -643,7 +643,7 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
@@ -650,9 +650,9 @@
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $4 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
+ sar eax, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 32 bytes
.xloops:
@@ -753,15 +753,15 @@
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
- sar ebp, $1 ; iSrcHeight >> 1
+ sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
.yloops:
mov eax, [esp+40] ; iSrcWidth
- sar eax, $1 ; iSrcWidth >> 1
+ sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
- sar eax, $3 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
+ sar eax, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 16 bytes
.xloops:
--- a/codec/processing/src/asm/vaa.asm
+++ b/codec/processing/src/asm/vaa.asm
@@ -121,7 +121,7 @@
punpcklwd %1, %2
punpckhwd %3, %4
punpcklwd %1, %3
- psraw %1, $4
+ psraw %1, $04
%endmacro
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
@@ -149,7 +149,7 @@
paddw %2, %4 ; block 2, 3
phaddw %1, %2 ; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
phaddw %1, xmm7 ; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
- psraw %1, $4
+ psraw %1, $04
%endmacro
%macro WELS_SAD_16x2_SSE2 0
@@ -353,7 +353,7 @@
pxor xmm0, xmm0
.hloop:
mov eax, ebx
- mov ebp, $0
+ mov ebp, $00
.wloop:
movdqa xmm1, [esi+ebp]
movdqa xmm2, [edi+ebp]