1 ;;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: System; Base: 10 -*-
3 ;;;; evcl - 12 - Number - Kernel rem pi/2
4 ;;; arch/generic/lisp/math/gen-math-f64-krem-pio2.lisp
6 ;;; This file is part of Evita Common Lisp.
8 ;;; Copyright (C) 1996-2007 by Project Vogue.
9 ;;; Written by Yoshifumi "VOGUE" INOUE. (yosi@msn.com)
11 ;;; @(#)$Id: //proj/evcl3/mainline/arch/generic/lisp/libm/float64/gen-float64-krem-pio2.lisp#1 $
14 ;;; This file contains implementation of following macros:
17 (in-package #:strict-math)
20 /* @(#)k_rem_pio2.c 5.1 93/09/24
22 * ====================================================
23 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
25 * Developed at SunPro, a Sun Microsystems, Inc. business.
26 * Permission to use, copy, modify, and distribute this
27 * software is freely granted, provided that this notice
29 * ====================================================
33 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
34 * double x[],y[]; int e0,nx,prec; int ipio2[];
36 * __kernel_rem_pio2 return the last three digits of N with
40 * The method is to compute the integer (mod 8) and fraction parts of
41 * (2/pi)*x without doing the full multiplication. In general we
42 * skip the part of the product that are known to be a huge integer (
43 * more accurately, = 0 mod 8 ). Thus the number of operations are
44 * independent of the exponent of the input.
46 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
49 * x[] The input value (must be positive) is broken into nx
50 * pieces of 24-bit integers in double precision format.
51 * x[i] will be the i-th 24 bit of x. The scaled exponent
52 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
53 * match x's up to 24 bits.
55 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
63 * y[] ouput result in an array of double precision numbers.
64 * The dimension of y[] is:
69 * The actual value is the sum of them. Thus for 113-bit
70 * precison, one may have to do something like:
72 * long double t,w,r_head, r_tail;
73 * t = (long double)y[2] + (long double)y[1];
74 * w = (long double)y[0];
76 * r_tail = w - (r_head - t);
78 * e0 The exponent of x[0]
82 * prec an integer indicating the precision:
85 * 2 64 bits (extended)
89 * integer array, contains the (24*i)-th to (24*i+23)-th
90 * bit of 2/pi after binary point. The corresponding
93 * ipio2[i] * 2^(-24(i+1)).
96 * double scalbn(), floor();
99 * Here is the description of some local variables:
101 * jk jk+1 is the initial number of terms of ipio2[] needed
102 * in the computation. The recommended value is 2,3,4,
103 * 6 for single, double, extended,and quad.
105 * jz local integer variable indicating the number of
106 * terms of ipio2[] used.
110 * jv index for pointing to the suitable ipio2[] for the
111 * computation. In general, we want
112 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
113 * is an integer. Thus
114 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
115 * Hence jv = max(0,(e0-3)/24).
117 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
119 * q[] double array with integral value, representing the
120 * 24-bits chunk of the product of x and 2/pi.
122 * q0 the corresponding exponent of q[0]. Note that the
123 * exponent for q[i] would be q0-24*i.
125 * PIo2[] double precision array, obtained by cutting pi/2
126 * into 24 bits chunks.
128 * f[] ipio2[] in floating point
130 * iq[] integer array by breaking up q[] in 24-bits chunk.
132 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
134 * ih integer. If >0 it indicates q[] is >= 0.5, hence
135 * it also indicates the *sign* of the result.
141 ;;; The hexadecimal values are the intended ones for the following
142 ;;; constants. The decimal values may be used, provided that the
143 ;;; compiler will convert from decimal to binary accurately enough
144 ;;; to produce the hexadecimal values shown.
147 ;static const double PIo2[] = {
149 ;static double PIo2[] = {
152 (defparameter +pi-over-two+ (make-array 8 :element-type 'double-float
154 1.57079625129699707031d+00
155 7.54978941586159635335d-08
156 5.39030252995776476554d-15
157 3.28200341580791294123d-22
158 1.27065575308067607349d-29
159 1.22933308981111328932d-36
160 2.73370053816464559624d-44
161 2.16741683877804819444d-51
164 (defun float64-kernel-rem-pio2 (x e0 nx prec ipio2)
165 ;(declare (values (member -1 0 1) double-float double-float))
166 (declare (type (simple-array double-float (*)) x))
167 (declare (type fixnum e0 nx))
168 (declare (type (integer 0 3) prec))
172 (two24 #+nil 1.67772160000000000000e+07
173 #.(encode-float64 #x41700000 #x00000000) )
174 (twon24 #+nil 5.96046447753906250000e-08
175 #.(encode-float64 #x3E700000 #x00000000) )
177 (jk (svref #(2 3 4 6) prec))
179 ;; determine jx,jv,q0, note that 3>q0
181 (jv (truncate (- e0 3) 24)) ; if(jv<0) jv=0;
182 (q0 (- e0 (* 24 (+ jv 1))))
183 ;; set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk]
186 with f = (make-array 20 :element-type 'double-float)
188 for j = (- jv jx) then (1+ j) do
190 (if (minusp j) zero (float (elt ipio2 j) 0d0)) )
191 finally (return f) ) )
192 ;;compute q[0],q[1],...q[jk]
195 with q = (make-array 20 :element-type 'double-float)
198 with fw of-type double-float = 0d0
200 (incf fw (* (elt x j) (elt f (- (+ jx i) j))))
201 (setf (elt q i) fw) )
202 finally (return q) ) )
204 (iq (make-array 20 :element-type '(signed-byte 32)))
205 (fq (make-array jk :element-type 'double-float))
210 ;; distill q[] into iq[] reversingly
212 for i = 0 then (1+ i)
213 for j = jz then (1- j)
215 (let ((fw (float (truncate (* twon24 z)) 0d0)))
216 (setf (elt iq i) (truncate (- z (* two24 fw))))
217 (setq z (+ (elt q (1- j)) fw)) ))
220 (setq z (scale-float64 z q0)) ; actual value of z
221 (decf z (* 8.0d0 (floor (* z 0.125d0)))) ; trim off integer >= 8
222 (setq n (truncate z))
223 (decf z (float n 0d0))
226 ((> q0 0) ; need iq[jz-1] to determine n
227 (let ((i (ash (elt iq (- jz 1)) (- (- 24 q0)))))
229 (decf (elt iq (- jz 1)) (ash i (- 24 q0)))
230 (ash (elt iq (- jz 1)) (- (- 23 q0))) ) )
231 ((eql q0 0) (ash (elt iq (- jz 1)) -23) )
234 (when (> ih 0) ; q > 0.5
238 (loop for i below jz do
239 (let ((j (elt iq i)))
242 (setf (elt iq i) (- #xffffff j)) )
245 (setf (elt iq i) (- #x1000000 j)) )) ))
247 ;; rare case: chance is 1 in 12
250 (1 (setf (elt iq (- jz 1))
251 (logand (elt iq (- jz 1)) #x7fffff) ) )
252 (2 (setf (elt iq (- jz 1))
253 (logand (elt iq (- jz 1)) #x3fffff) ) )))
257 (unless (eql carry 0)
258 (decf z (scale-float64 one (truncate q0))) )) ))
260 ;; check if recomputation is needed
264 (loop for i = (- jz 1) then (1- i)
266 do (setq j (logior j (elt iq i))))
270 (loop while (= (elt iq (- jk k)) 0) do (incf k))
271 (loop for i = (1+ jz) then (1+ i)
272 while (zerop (elt iq (- jk k)))
273 do (setf (elt f (+ jx i)) (float (elt ipio2 (+ jv i)) 0d0))
275 (loop for j = 0 then (1+ j)
276 with fw of-type double-float = 0d0
278 do (setq fw (+ fw (* (elt x j) (elt f (- (+ jx i) j)))))
279 finally (return fw))))
286 ; for (i=jz-1;i>=jk;i--) j |= iq[i];
287 ; if(j==0) { /* need recomputation
288 ; for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed
290 ; for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k]
291 ; f[jx+i] = (double) ipio2[jv+i];
292 ; for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
300 ;; chop off zero terms
305 (loop while (zerop (elt iq jz))
309 (setq z (scale-float64 z (- (truncate q0))))
312 ((fw (float (truncate (* twon24 z)) 0d0)))
313 (setf (elt iq jz) (truncate (- z (* two24 fw))))
316 (setf (elt iq jz) (truncate fw)))
317 (setf (elt iq jz) (truncate z)))))
321 ; while(iq[jz]==0) { jz--; q0-=24;}
322 ; } else { /* break z into 24-bit if necessary
323 ; z = scalbn(z,-(int)q0);
325 ; fw = (double)((__int32_t)(twon24*z));
326 ; iq[jz] = (__int32_t)(z-two24*fw);
328 ; iq[jz] = (__int32_t) fw;
329 ; } else iq[jz] = (__int32_t) z ;
333 ;; convert integer "bit" chunk to floating-point value
335 ((fw (scale-float64 one (truncate q0))))
336 (loop for i = jz then (1- i)
338 do (setf (elt q i) (* fw (float (elt iq i) 0d0)))
339 do (setq fw (* fw twon24))))
341 ; fw = scalbn(one,(int)q0);
342 ; for(i=jz;i>=0;i--) {
343 ; q[i] = fw*(double)iq[i]; fw*=twon24;
346 ;; compute PIo2[0,...,jp]*q[jz,...,0]
350 (loop for i = jz then (1- i)
352 do (loop for k = 0 then (1+ k)
353 while (and (<= k jp) (<= k (- jz i)))
354 do (setq fw (+ fw (* (float (elt +pi-over-two+ k) 0d0) (elt q (+ i k))))))
355 do (setf (elt fq (- jz i)) fw)))
357 ; for(i=jz;i>=0;i--) {
358 ; for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
363 ;; compress fq[] into y[]
368 (case prec ; of course this will be 2
370 (loop for i = jz then (1- i)
372 do (setq fw (+ fw (elt fq i))))
373 (setq y0 (if (= ih 0) fw (- fw))))
375 (loop for i = jz then (1- i)
377 do (setq fw (+ fw (elt fq i))))
378 (setq y0 (if (= ih 0) fw (- fw)))
379 (setq fw (- (elt fq 0) fw))
380 (loop for i = 0 then (1+ i)
382 do (setq fw (+ fw (elt fq i))))
383 (setq y1 (if (= ih 0) fw (- fw))))
384 (3 ; anytime the source comment says painful, and I don't have to do it....
385 (error "Don't support 113 bit precision")))
390 ; for (i=jz;i>=0;i--) fw += fq[i];
391 ; y[0] = (ih==0)? fw: -fw;
396 ; for (i=jz;i>=0;i--) fw += fq[i];
397 ; y[0] = (ih==0)? fw: -fw;
399 ; for (i=1;i<=jz;i++) fw += fq[i];
400 ; y[1] = (ih==0)? fw: -fw;
403 ; for (i=jz;i>0;i--) {
404 ; fw = fq[i-1]+fq[i];
405 ; fq[i] += fq[i-1]-fw;
408 ; for (i=jz;i>1;i--) {
409 ; fw = fq[i-1]+fq[i];
410 ; fq[i] += fq[i-1]-fw;
413 ; for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
415 ; y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
417 ; y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
420 (values (logand n 7) y0 y1)))))