user manual

Efficient 64-Bit Integer Arithmetic 87
22007E/0November 1999 AMD Athlon Processor x86 Code Optimization
Example 4 (Left shift):
;shift operand in EDX:EAX left, shift count in ECX (count
; applied modulo 64)
SHLD EDX, EAX, CL ;first apply shift count
SHL EAX, CL ; mod 32 to EDX:EAX
TEST ECX, 32 ;need to shift by another 32?
JZ $lshift_done ;no, done
MOV EDX, EAX ;left shift EDX:EAX
XOR EAX, EAX ; by 32 bits
$lshift_done:
Example 5 (Right shift):
SHRD EAX, EDX, CL ;first apply shift count
SHR EDX, CL ; mod 32 to EDX:EAX
TEST ECX, 32 ;need to shift by another 32?
JZ $rshift_done ;no, done
MOV EAX, EDX ;left shift EDX:EAX
XOR EDX, EDX ; by 32 bits
$rshift_done:
Example 6 (Multiplication):
;_llmul computes the low-order half of the product of its
; arguments, two 64-bit integers
;
;INPUT: [ESP+8]:[ESP+4] multiplicand
; [ESP+16]:[ESP+12] multiplier
;
;OUTPUT: EDX:EAX (multiplicand * multiplier) % 2^64
;
;DESTROYS: EAX,ECX,EDX,EFlags
_llmul PROC
MOV EDX, [ESP+8] ;multiplicand_hi
MOV ECX, [ESP+16] ;multiplier_hi
OR EDX, ECX ;one operand >= 2^32?
MOV EDX, [ESP+12] ;multiplier_lo
MOV EAX, [ESP+4] ;multiplicand_lo
JNZ $twomul ;yes, need two multiplies
MUL EDX ;multiplicand_lo * multiplier_lo
RET ;done, return to caller
$twomul:
IMUL EDX, [ESP+8] ;p3_lo = multiplicand_hi*multiplier_lo
IMUL ECX, EAX ;p2_lo = multiplier_hi*multiplicand_lo
ADD ECX, EDX ; p2_lo + p3_lo
MUL DWORD PTR [ESP+12] ;p1=multiplicand_lo*multiplier_lo
ADD EDX, ECX ;p1+p2lo+p3_lo = result in EDX:EAX
RET ;done, return to caller
_llmul ENDP