user manual

64 Replace Branches with Computation in 3DNow! Code
AMD Athlon Processor x86 Code Optimization
22007E/0November 1999
Example 5: C code:
#define PI 3.14159265358979323
float x,y,xa,ya,r,res;
int xs,df;
xs = x < 0 ? 1 : 0;
xa = fabs(x);
ya = fabs(y);
df = (xa < ya);
if (xs && df) {
res = PI/2 + r;
}
else if (xs) {
res = PI - r;
}
else if (df) {
res = PI/2 - r;
}
else {
res = r;
}
3DNow! code:
;in: MM0 = r
; MM1 = y
; MM2 = x
;out: MM0 = res
MOVQ MM7, sgn ;mask to extract sign bit
MOVQ MM6, sgn ;mask to extract sign bit
MOVQ MM5, mabs ;mask to clear sign bit
PAND MM7, MM2 ;xs = sign(x)
PAND MM1, MM5 ;ya = abs(y)
PAND MM2, MM5 ;xa = abs(x)
MOVQ MM6, MM1 ;y
PCMPGTD MM6, MM2 ;df = (xa < ya) ? 0xffffffff : 0
PSLLD MM6, 31 ;df = bit<31>
MOVQ MM5, MM7 ;xs
PXOR MM7, MM6 ;xs^df ? 0x80000000 : 0
MOVQ MM3, npio2 ;-pi/2
PXOR MM5, MM3 ;xs ? pi/2 : -pi/2
PSRAD MM6, 31 ;df ? 0xffffffff : 0
PANDN MM6, MM5 ;xs ? (df ? 0 : pi/2) : (df ? 0 : -pi/2)
PFSUB MM6, MM3 ;pr = pi/2 + (xs ? (df ? 0 : pi/2) :
; (df ? 0 : -pi/2))
POR MM0, MM7 ;ar = xs^df ? -r : r
PFADD MM0, MM6 ;res = ar + pr