;;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: System; Base: 10 -*- ;;;; ;;;; evcl - 12 - Number - Kernel rem pi/2 ;;; arch/generic/lisp/math/gen-math-f64-krem-pio2.lisp ;;; ;;; This file is part of Evita Common Lisp. ;;; ;;; Copyright (C) 1996-2007 by Project Vogue. ;;; Written by Yoshifumi "VOGUE" INOUE. (yosi@msn.com) ;;; ;;; @(#)$Id: //proj/evcl3/mainline/arch/generic/lisp/libm/float64/gen-float64-krem-pio2.lisp#1 $ ;;; ;;; Description: ;;; This file contains implementation of following macros: ;;; float64-krem-pio2 ; (in-package #:strict-math) #| /* @(#)k_rem_pio2.c 5.1 93/09/24 /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== /* * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) * double x[],y[]; int e0,nx,prec; int ipio2[]; * * __kernel_rem_pio2 return the last three digits of N with * y = x - N*pi/2 * so that |y| < pi/2. * * The method is to compute the integer (mod 8) and fraction parts of * (2/pi)*x without doing the full multiplication. In general we * skip the part of the product that are known to be a huge integer ( * more accurately, = 0 mod 8 ). Thus the number of operations are * independent of the exponent of the input. * * (2/pi) is represented by an array of 24-bit integers in ipio2[]. * * Input parameters: * x[] The input value (must be positive) is broken into nx * pieces of 24-bit integers in double precision format. * x[i] will be the i-th 24 bit of x. The scaled exponent * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 * match x's up to 24 bits. * * Example of breaking a double positive z into x[0]+x[1]+x[2]: * e0 = ilogb(z)-23 * z = scalbn(z,-e0) * for i = 0,1,2 * x[i] = floor(z) * z = (z-x[i])*2**24 * * * y[] ouput result in an array of double precision numbers. * The dimension of y[] is: * 24-bit precision 1 * 53-bit precision 2 * 64-bit precision 2 * 113-bit precision 3 * The actual value is the sum of them. Thus for 113-bit * precison, one may have to do something like: * * long double t,w,r_head, r_tail; * t = (long double)y[2] + (long double)y[1]; * w = (long double)y[0]; * r_head = t+w; * r_tail = w - (r_head - t); * * e0 The exponent of x[0] * * nx dimension of x[] * * prec an integer indicating the precision: * 0 24 bits (single) * 1 53 bits (double) * 2 64 bits (extended) * 3 113 bits (quad) * * ipio2[] * integer array, contains the (24*i)-th to (24*i+23)-th * bit of 2/pi after binary point. The corresponding * floating value is * * ipio2[i] * 2^(-24(i+1)). * * External function: * double scalbn(), floor(); * * * Here is the description of some local variables: * * jk jk+1 is the initial number of terms of ipio2[] needed * in the computation. The recommended value is 2,3,4, * 6 for single, double, extended,and quad. * * jz local integer variable indicating the number of * terms of ipio2[] used. * * jx nx - 1 * * jv index for pointing to the suitable ipio2[] for the * computation. In general, we want * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 * is an integer. Thus * e0-3-24*jv >= 0 or (e0-3)/24 >= jv * Hence jv = max(0,(e0-3)/24). * * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. * * q[] double array with integral value, representing the * 24-bits chunk of the product of x and 2/pi. * * q0 the corresponding exponent of q[0]. Note that the * exponent for q[i] would be q0-24*i. * * PIo2[] double precision array, obtained by cutting pi/2 * into 24 bits chunks. * * f[] ipio2[] in floating point * * iq[] integer array by breaking up q[] in 24-bits chunk. * * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] * * ih integer. If >0 it indicates q[] is >= 0.5, hence * it also indicates the *sign* of the result. * |# ;;; Constants: ;;; The hexadecimal values are the intended ones for the following ;;; constants. The decimal values may be used, provided that the ;;; compiler will convert from decimal to binary accurately enough ;;; to produce the hexadecimal values shown. ;#ifdef __STDC__ ;static const double PIo2[] = { ;#else ;static double PIo2[] = { ;#endif ;}; (defparameter +pi-over-two+ (make-array 8 :element-type 'double-float :initial-contents #( 1.57079625129699707031d+00 7.54978941586159635335d-08 5.39030252995776476554d-15 3.28200341580791294123d-22 1.27065575308067607349d-29 1.22933308981111328932d-36 2.73370053816464559624d-44 2.16741683877804819444d-51 ) ) ) (defun float64-kernel-rem-pio2 (x e0 nx prec ipio2) ;(declare (values (member -1 0 1) double-float double-float)) (declare (type (simple-array double-float (*)) x)) (declare (type fixnum e0 nx)) (declare (type (integer 0 3) prec)) (prog* ( (zero 0.0d0) (one 1.0d0) (two24 #+nil 1.67772160000000000000e+07 #.(encode-float64 #x41700000 #x00000000) ) (twon24 #+nil 5.96046447753906250000e-08 #.(encode-float64 #x3E700000 #x00000000) ) ;; initialize jk (jk (svref #(2 3 4 6) prec)) (jp jk) ;; determine jx,jv,q0, note that 3>q0 (jx (- nx 1)) (jv (truncate (- e0 3) 24)) ; if(jv<0) jv=0; (q0 (- e0 (* 24 (+ jv 1)))) ;; set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] (f (loop with f = (make-array 20 :element-type 'double-float) for i to (+ jx jk) for j = (- jv jx) then (1+ j) do (setf (elt f i) (if (minusp j) zero (float (elt ipio2 j) 0d0)) ) finally (return f) ) ) ;;compute q[0],q[1],...q[jk] (q (loop with q = (make-array 20 :element-type 'double-float) for i to jk do (loop with fw of-type double-float = 0d0 for j to jx do (incf fw (* (elt x j) (elt f (- (+ jx i) j)))) (setf (elt q i) fw) ) finally (return q) ) ) (jz jk) (iq (make-array 20 :element-type '(signed-byte 32))) (fq (make-array jk :element-type 'double-float)) (z (elt q jz)) (n 0) ) recompute ;; distill q[] into iq[] reversingly (loop for i = 0 then (1+ i) for j = jz then (1- j) while (> j 0) do (let ((fw (float (truncate (* twon24 z)) 0d0))) (setf (elt iq i) (truncate (- z (* two24 fw)))) (setq z (+ (elt q (1- j)) fw)) )) ;; compute n (setq z (scale-float64 z q0)) ; actual value of z (decf z (* 8.0d0 (floor (* z 0.125d0)))) ; trim off integer >= 8 (setq n (truncate z)) (decf z (float n 0d0)) (let ((ih (cond ((> q0 0) ; need iq[jz-1] to determine n (let ((i (ash (elt iq (- jz 1)) (- (- 24 q0))))) (incf n i) (decf (elt iq (- jz 1)) (ash i (- 24 q0))) (ash (elt iq (- jz 1)) (- (- 23 q0))) ) ) ((eql q0 0) (ash (elt iq (- jz 1)) -23) ) ((>= z 0.5d0) 2) (t 0) ) )) (when (> ih 0) ; q > 0.5 (incf n) (let ((carry 0)) ;; compute 1-q (loop for i below jz do (let ((j (elt iq i))) (cond ((not (eql carry 0)) (setf (elt iq i) (- #xffffff j)) ) ((not (eql j 0)) (setq carry 1) (setf (elt iq i) (- #x1000000 j)) )) )) ;; rare case: chance is 1 in 12 (when (> q0 0) (case q0 (1 (setf (elt iq (- jz 1)) (logand (elt iq (- jz 1)) #x7fffff) ) ) (2 (setf (elt iq (- jz 1)) (logand (elt iq (- jz 1)) #x3fffff) ) ))) (when (eql ih 2) (setq z (- one z)) (unless (eql carry 0) (decf z (scale-float64 one (truncate q0))) )) )) ;; check if recomputation is needed (when (= z zero) (let ((j 0)) (loop for i = (- jz 1) then (1- i) while (>= i jk) do (setq j (logior j (elt iq i)))) (when (= j 0) (let ((k 1)) (loop while (= (elt iq (- jk k)) 0) do (incf k)) (loop for i = (1+ jz) then (1+ i) while (zerop (elt iq (- jk k))) do (setf (elt f (+ jx i)) (float (elt ipio2 (+ jv i)) 0d0)) do (setf (elt q i) (loop for j = 0 then (1+ j) with fw of-type double-float = 0d0 while (<= j jx) do (setq fw (+ fw (* (elt x j) (elt f (- (+ jx i) j))))) finally (return fw)))) (decf jz k) (go recompute))))) ; if(z==zero) { ; j = 0; ; for (i=jz-1;i>=jk;i--) j |= iq[i]; ; if(j==0) { /* need recomputation ; for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed ; ; for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] ; f[jx+i] = (double) ipio2[jv+i]; ; for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ; q[i] = fw; ; } ; jz += k; ; goto recompute; ; } ; } ;; chop off zero terms (if (= z 0.0d0) (progn (decf jz) (decf q0 24) (loop while (zerop (elt iq jz)) do (decf jz) do (decf q0 24))) (progn (setq z (scale-float64 z (- (truncate q0)))) (if (>= z two24) (let ((fw (float (truncate (* twon24 z)) 0d0))) (setf (elt iq jz) (truncate (- z (* two24 fw)))) (incf jz) (incf q0 24) (setf (elt iq jz) (truncate fw))) (setf (elt iq jz) (truncate z))))) ; if(z==0.0) { ; jz -= 1; q0 -= 24; ; while(iq[jz]==0) { jz--; q0-=24;} ; } else { /* break z into 24-bit if necessary ; z = scalbn(z,-(int)q0); ; if(z>=two24) { ; fw = (double)((__int32_t)(twon24*z)); ; iq[jz] = (__int32_t)(z-two24*fw); ; jz += 1; q0 += 24; ; iq[jz] = (__int32_t) fw; ; } else iq[jz] = (__int32_t) z ; ; } ; ;; convert integer "bit" chunk to floating-point value (let ((fw (scale-float64 one (truncate q0)))) (loop for i = jz then (1- i) while (>= i 0) do (setf (elt q i) (* fw (float (elt iq i) 0d0))) do (setq fw (* fw twon24)))) ; fw = scalbn(one,(int)q0); ; for(i=jz;i>=0;i--) { ; q[i] = fw*(double)iq[i]; fw*=twon24; ; } ;; compute PIo2[0,...,jp]*q[jz,...,0] (let ((fw 0.0d0)) (loop for i = jz then (1- i) while (>= i 0) do (loop for k = 0 then (1+ k) while (and (<= k jp) (<= k (- jz i))) do (setq fw (+ fw (* (float (elt +pi-over-two+ k) 0d0) (elt q (+ i k)))))) do (setf (elt fq (- jz i)) fw))) ; for(i=jz;i>=0;i--) { ; for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ; fq[jz-i] = fw; ; } ; ;; compress fq[] into y[] (let ((y0 0) (y1 0) (fw 0.0d0)) (case prec ; of course this will be 2 (0 (loop for i = jz then (1- i) while (>= i 0) do (setq fw (+ fw (elt fq i)))) (setq y0 (if (= ih 0) fw (- fw)))) ((1 2) (loop for i = jz then (1- i) while (>= i 0) do (setq fw (+ fw (elt fq i)))) (setq y0 (if (= ih 0) fw (- fw))) (setq fw (- (elt fq 0) fw)) (loop for i = 0 then (1+ i) while (<= i jz) do (setq fw (+ fw (elt fq i)))) (setq y1 (if (= ih 0) fw (- fw)))) (3 ; anytime the source comment says painful, and I don't have to do it.... (error "Don't support 113 bit precision"))) ; switch(prec) { ; case 0: ; fw = 0.0; ; for (i=jz;i>=0;i--) fw += fq[i]; ; y[0] = (ih==0)? fw: -fw; ; break; ; case 1: ; case 2: ; fw = 0.0; ; for (i=jz;i>=0;i--) fw += fq[i]; ; y[0] = (ih==0)? fw: -fw; ; fw = fq[0]-fw; ; for (i=1;i<=jz;i++) fw += fq[i]; ; y[1] = (ih==0)? fw: -fw; ; break; ; case 3: /* painful ; for (i=jz;i>0;i--) { ; fw = fq[i-1]+fq[i]; ; fq[i] += fq[i-1]-fw; ; fq[i-1] = fw; ; } ; for (i=jz;i>1;i--) { ; fw = fq[i-1]+fq[i]; ; fq[i] += fq[i-1]-fw; ; fq[i-1] = fw; ; } ; for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; ; if(ih==0) { ; y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; ; } else { ; y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; ; } ; } (values (logand n 7) y0 y1))))) ; return n&7; ;}