user manual

124 Use 3DNow! PAVGUSB for MPEG-2 Motion
AMD Athlon Processor x86 Code Optimization
22007E/0November 1999
Example 1 (Avoid):
MOV ESI, DWORD PTR Src_MB
MOV EDI, DWORD PTR Dst_MB
MOV EDX, DWORD PTR SrcStride
MOV EBX, DWORD PTR DstStride
MOVQ MM7, QWORD PTR [ConstFEFE]
MOVQ MM6, QWORD PTR [Const0101]
MOV ECX, 16
L1:
MOVQ MM0, [ESI] ;MM0=QWORD1
MOVQ MM1, [EDI] ;MM1=QWORD3
MOVQ MM2, MM0
MOVQ MM3, MM1
PAND MM2, MM6
PAND MM3, MM6
PAND MM0, MM7 ;MM0 = QWORD1 & 0xfefefefe
PAND MM1, MM7 ;MM1 = QWORD3 & 0xfefefefe
POR MM2, MM3 ;calculate adjustment
PSRLQ MM0, 1 ;MM0 = (QWORD1 & 0xfefefefe)/2
PSRLQ MM1, 1 ;MM1 = (QWORD3 & 0xfefefefe)/2
PAND MM2, MM6
PADDB MM0, MM1 ;MM0 = QWORD1/2 + QWORD3/2 w/o
; adjustment
PADDB MM0, MM2 ;add lsb adjustment
MOVQ [EDI], MM0
MOVQ MM4, [ESI+8] ;MM4=QWORD2
MOVQ MM5, [EDI+8] ;MM5=QWORD4
MOVQ MM2, MM4
MOVQ MM3, MM5
PAND MM2, MM6
PAND MM3, MM6
PAND MM4, MM7 ;MM0 = QWORD2 & 0xfefefefe
PAND MM5, MM7 ;MM1 = QWORD4 & 0xfefefefe
POR MM2, MM3 ;calculate adjustment
PSRLQ MM4, 1 ;MM0 = (QWORD2 & 0xfefefefe)/2
PSRLQ MM5, 1 ;MM1 = (QWORD4 & 0xfefefefe)/2
PAND MM2, MM6
PADDB MM4, MM5 ;MM0 = QWORD2/2 + QWORD4/2 w/o
; adjustment
PADDB MM4, MM2 ;add lsb adjustment
MOVQ [EDI+8], MM4
ADD ESI, EDX
ADD EDI, EBX
LOOP L1