code.consxy.com Git - clnl/blob - resources/strictmath/otherconversions/gen-float64-krem-pio2.lisp

   1 ;;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: System; Base: 10 -*-
   2 ;;;;
   3 ;;;; evcl - 12 - Number - Kernel rem pi/2
   4 ;;; arch/generic/lisp/math/gen-math-f64-krem-pio2.lisp
   5 ;;;
   6 ;;; This file is part of Evita Common Lisp.
   7 ;;;
   8 ;;; Copyright (C) 1996-2007 by Project Vogue.
   9 ;;; Written by Yoshifumi "VOGUE" INOUE. (yosi@msn.com)
  10 ;;;
  11 ;;; @(#)$Id: //proj/evcl3/mainline/arch/generic/lisp/libm/float64/gen-float64-krem-pio2.lisp#1 $
  12 ;;;
  13 ;;; Description:
  14 ;;;  This file contains implementation of following macros:
  15 ;;;     float64-krem-pio2
  16 ;
  17 (in-package #:strict-math)
  18
  19 #|
  20 /* @(#)k_rem_pio2.c 5.1 93/09/24
  21 /*
  22  * ====================================================
  23  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  24  *
  25  * Developed at SunPro, a Sun Microsystems, Inc. business.
  26  * Permission to use, copy, modify, and distribute this
  27  * software is freely granted, provided that this notice
  28  * is preserved.
  29  * ====================================================
  30
  31
  32 /*
  33  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
  34  * double x[],y[]; int e0,nx,prec; int ipio2[];
  35  *
  36  * __kernel_rem_pio2 return the last three digits of N with
  37  *              y = x - N*pi/2
  38  * so that |y| < pi/2.
  39  *
  40  * The method is to compute the integer (mod 8) and fraction parts of
  41  * (2/pi)*x without doing the full multiplication. In general we
  42  * skip the part of the product that are known to be a huge integer (
  43  * more accurately, = 0 mod 8 ). Thus the number of operations are
  44  * independent of the exponent of the input.
  45  *
  46  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
  47  *
  48  * Input parameters:
  49  *      x[]        The input value (must be positive) is broken into nx
  50  *              pieces of 24-bit integers in double precision format.
  51  *              x[i] will be the i-th 24 bit of x. The scaled exponent
  52  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
  53  *              match x's up to 24 bits.
  54  *
  55  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
  56  *                      e0 = ilogb(z)-23
  57  *                      z  = scalbn(z,-e0)
  58  *              for i = 0,1,2
  59  *                      x[i] = floor(z)
  60  *                      z    = (z-x[i])*2**24
  61  *
  62  *
  63  *      y[]        ouput result in an array of double precision numbers.
  64  *              The dimension of y[] is:
  65  *                      24-bit  precision        1
  66  *                      53-bit  precision        2
  67  *                      64-bit  precision        2
  68  *                      113-bit precision        3
  69  *              The actual value is the sum of them. Thus for 113-bit
  70  *              precison, one may have to do something like:
  71  *
  72  *              long double t,w,r_head, r_tail;
  73  *              t = (long double)y[2] + (long double)y[1];
  74  *              w = (long double)y[0];
  75  *              r_head = t+w;
  76  *              r_tail = w - (r_head - t);
  77  *
  78  *      e0        The exponent of x[0]
  79  *
  80  *      nx        dimension of x[]
  81  *
  82  *      prec        an integer indicating the precision:
  83  *                      0        24  bits (single)
  84  *                      1        53  bits (double)
  85  *                      2        64  bits (extended)
  86  *                      3        113 bits (quad)
  87  *
  88  *      ipio2[]
  89  *              integer array, contains the (24*i)-th to (24*i+23)-th
  90  *              bit of 2/pi after binary point. The corresponding
  91  *              floating value is
  92  *
  93  *                      ipio2[i] * 2^(-24(i+1)).
  94  *
  95  * External function:
  96  *      double scalbn(), floor();
  97  *
  98  *
  99  * Here is the description of some local variables:
 100  *
 101  *      jk        jk+1 is the initial number of terms of ipio2[] needed
 102  *              in the computation. The recommended value is 2,3,4,
 103  *              6 for single, double, extended,and quad.
 104  *
 105  *      jz        local integer variable indicating the number of
 106  *              terms of ipio2[] used.
 107  *
 108  *      jx        nx - 1
 109  *
 110  *      jv        index for pointing to the suitable ipio2[] for the
 111  *              computation. In general, we want
 112  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 113  *              is an integer. Thus
 114  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 115  *              Hence jv = max(0,(e0-3)/24).
 116  *
 117  *      jp        jp+1 is the number of terms in PIo2[] needed, jp = jk.
 118  *
 119  *      q[]        double array with integral value, representing the
 120  *              24-bits chunk of the product of x and 2/pi.
 121  *
 122  *      q0        the corresponding exponent of q[0]. Note that the
 123  *              exponent for q[i] would be q0-24*i.
 124  *
 125  *      PIo2[]        double precision array, obtained by cutting pi/2
 126  *              into 24 bits chunks.
 127  *
 128  *      f[]        ipio2[] in floating point
 129  *
 130  *      iq[]        integer array by breaking up q[] in 24-bits chunk.
 131  *
 132  *      fq[]        final product of x*(2/pi) in fq[0],..,fq[jk]
 133  *
 134  *      ih        integer. If >0 it indicates q[] is >= 0.5, hence
 135  *              it also indicates the *sign* of the result.
 136  *
 137 |#
 138
 139
 140 ;;; Constants:
 141 ;;; The hexadecimal values are the intended ones for the following
 142 ;;; constants. The decimal values may be used, provided that the
 143 ;;; compiler will convert from decimal to binary accurately enough
 144 ;;; to produce the hexadecimal values shown.
 145
 146 ;#ifdef __STDC__
 147 ;static const double PIo2[] = {
 148 ;#else
 149 ;static double PIo2[] = {
 150 ;#endif
 151 ;};
 152 (defparameter +pi-over-two+ (make-array 8 :element-type 'double-float
 153     :initial-contents #(
 154      1.57079625129699707031d+00
 155      7.54978941586159635335d-08
 156      5.39030252995776476554d-15
 157      3.28200341580791294123d-22
 158      1.27065575308067607349d-29
 159      1.22933308981111328932d-36
 160      2.73370053816464559624d-44
 161      2.16741683877804819444d-51
 162  ) ) )
 163
 164 (defun float64-kernel-rem-pio2 (x e0 nx prec ipio2)
 165     ;(declare (values (member -1 0 1) double-float double-float))
 166     (declare (type (simple-array double-float (*)) x))
 167     (declare (type fixnum e0 nx))
 168     (declare (type (integer 0 3) prec))
 169   (prog* (
 170     (zero   0.0d0)
 171     (one    1.0d0)
 172     (two24  #+nil 1.67772160000000000000e+07
 173             #.(encode-float64 #x41700000 #x00000000) )
 174     (twon24 #+nil 5.96046447753906250000e-08
 175             #.(encode-float64 #x3E700000 #x00000000) )
 176     ;; initialize jk
 177     (jk (svref #(2 3 4 6) prec))
 178     (jp jk)
 179     ;; determine jx,jv,q0, note that 3>q0
 180     (jx (- nx 1))
 181     (jv (truncate (- e0 3) 24)) ; if(jv<0) jv=0;
 182     (q0 (- e0 (* 24 (+ jv 1))))
 183     ;; set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk]
 184     (f
 185       (loop
 186         with f = (make-array 20 :element-type 'double-float)
 187         for i to (+ jx jk)
 188         for j = (- jv jx) then (1+ j) do
 189         (setf (elt f i)
 190               (if (minusp j) zero (float (elt ipio2 j) 0d0)) )
 191         finally (return f) ) )
 192     ;;compute q[0],q[1],...q[jk]
 193     (q
 194       (loop
 195         with q = (make-array 20 :element-type 'double-float)
 196         for i to jk do
 197           (loop
 198             with fw of-type double-float = 0d0
 199             for j to jx do
 200               (incf fw (* (elt x j) (elt f (- (+ jx i) j))))
 201               (setf (elt q i) fw) )
 202         finally (return q) ) )
 203     (jz jk)
 204     (iq (make-array 20 :element-type '(signed-byte 32)))
 205     (fq (make-array jk :element-type 'double-float))
 206     (z (elt q jz))
 207     (n 0)
 208     )
 209   recompute
 210     ;; distill q[] into iq[] reversingly
 211     (loop
 212       for i = 0 then (1+ i)
 213       for j = jz then (1- j)
 214       while (> j 0) do
 215         (let ((fw (float (truncate (* twon24 z)) 0d0)))
 216           (setf (elt iq i) (truncate (- z (* two24 fw))))
 217           (setq z (+ (elt q (1- j)) fw)) ))
 218
 219     ;; compute n
 220     (setq z  (scale-float64 z q0))           ; actual value of z
 221     (decf z  (* 8.0d0 (floor (* z 0.125d0))))  ; trim off integer >= 8
 222     (setq n (truncate z))
 223     (decf z (float n 0d0))
 224     (let ((ih
 225             (cond
 226               ((> q0 0)    ; need iq[jz-1] to determine n
 227                 (let ((i  (ash (elt iq (- jz 1)) (- (- 24 q0)))))
 228                   (incf n i)
 229                   (decf (elt iq (- jz 1)) (ash i (- 24 q0)))
 230                   (ash (elt iq (- jz 1)) (- (- 23 q0))) ) )
 231               ((eql q0 0) (ash (elt iq (- jz 1)) -23) )
 232               ((>= z 0.5d0) 2)
 233               (t 0) ) ))
 234       (when (> ih 0)    ; q > 0.5
 235         (incf n)
 236         (let ((carry 0))
 237           ;; compute 1-q
 238           (loop for i below jz do
 239             (let ((j (elt iq i)))
 240               (cond
 241                 ((not (eql carry 0))
 242                   (setf (elt iq i) (- #xffffff j)) )
 243                 ((not (eql j 0))
 244                   (setq carry 1)
 245                   (setf (elt iq i) (- #x1000000 j)) )) ))
 246
 247             ;; rare case: chance is 1 in 12
 248             (when (> q0 0)
 249               (case q0
 250                 (1 (setf (elt iq (- jz 1))
 251                         (logand (elt iq (- jz 1)) #x7fffff) ) )
 252                 (2 (setf (elt iq (- jz 1))
 253                         (logand (elt iq (- jz 1)) #x3fffff) ) )))
 254
 255             (when (eql ih 2)
 256               (setq z (- one z))
 257               (unless (eql carry 0)
 258                 (decf z (scale-float64 one (truncate q0))) )) ))
 259
 260     ;; check if recomputation is needed
 261        (when (= z zero)
 262         (let
 263          ((j 0))
 264          (loop for i = (- jz 1) then (1- i)
 265                while (>= i jk)
 266                do (setq j (logior j (elt iq i))))
 267          (when (= j 0)
 268           (let
 269            ((k 1))
 270            (loop while (= (elt iq (- jk k)) 0) do (incf k))
 271            (loop for i = (1+ jz) then (1+ i)
 272                  while (zerop (elt iq (- jk k)))
 273                  do (setf (elt f (+ jx i)) (float (elt ipio2 (+ jv i)) 0d0))
 274                  do (setf (elt q i)
 275                      (loop for j = 0 then (1+ j)
 276                            with fw of-type double-float = 0d0
 277                            while (<= j jx)
 278                            do (setq fw (+ fw (* (elt x j) (elt f (- (+ jx i) j)))))
 279                            finally (return fw))))
 280            (decf jz k)
 281            (go recompute)))))
 282
 283
 284 ;        if(z==zero) {
 285 ;            j = 0;
 286 ;            for (i=jz-1;i>=jk;i--) j |= iq[i];
 287 ;            if(j==0) { /* need recomputation
 288 ;                for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed
 289 ;
 290 ;                for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k]
 291 ;                    f[jx+i] = (double) ipio2[jv+i];
 292 ;                    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
 293 ;                    q[i] = fw;
 294 ;                }
 295 ;                jz += k;
 296 ;                goto recompute;
 297 ;            }
 298 ;        }
 299
 300     ;; chop off zero terms
 301     (if (= z 0.0d0)
 302      (progn
 303       (decf jz)
 304       (decf q0 24)
 305       (loop while (zerop (elt iq jz))
 306             do (decf jz)
 307             do (decf q0 24)))
 308      (progn
 309       (setq z (scale-float64 z (- (truncate q0))))
 310       (if (>= z two24)
 311        (let
 312         ((fw (float (truncate (* twon24 z)) 0d0)))
 313         (setf (elt iq jz) (truncate (- z (* two24 fw))))
 314         (incf jz)
 315         (incf q0 24)
 316         (setf (elt iq jz) (truncate fw)))
 317        (setf (elt iq jz) (truncate z)))))
 318
 319 ;        if(z==0.0) {
 320 ;            jz -= 1; q0 -= 24;
 321 ;            while(iq[jz]==0) { jz--; q0-=24;}
 322 ;        } else { /* break z into 24-bit if necessary
 323 ;            z = scalbn(z,-(int)q0);
 324 ;            if(z>=two24) {
 325 ;                fw = (double)((__int32_t)(twon24*z));
 326 ;                iq[jz] = (__int32_t)(z-two24*fw);
 327 ;                jz += 1; q0 += 24;
 328 ;                iq[jz] = (__int32_t) fw;
 329 ;            } else iq[jz] = (__int32_t) z ;
 330 ;        }
 331 ;
 332
 333     ;; convert integer "bit" chunk to floating-point value
 334     (let
 335      ((fw (scale-float64 one (truncate q0))))
 336      (loop for i = jz then (1- i)
 337            while (>= i 0)
 338            do (setf (elt q i) (* fw (float (elt iq i) 0d0)))
 339            do (setq fw (* fw twon24))))
 340
 341 ;        fw = scalbn(one,(int)q0);
 342 ;        for(i=jz;i>=0;i--) {
 343 ;            q[i] = fw*(double)iq[i]; fw*=twon24;
 344 ;        }
 345
 346     ;; compute PIo2[0,...,jp]*q[jz,...,0]
 347
 348     (let
 349      ((fw 0.0d0))
 350      (loop for i = jz then (1- i)
 351            while (>= i 0)
 352            do (loop for k = 0 then (1+ k)
 353                     while (and (<= k jp) (<= k (- jz i)))
 354                     do (setq fw (+ fw (* (float (elt +pi-over-two+ k) 0d0) (elt q (+ i k))))))
 355            do (setf (elt fq (- jz i)) fw)))
 356
 357 ;        for(i=jz;i>=0;i--) {
 358 ;            for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
 359 ;            fq[jz-i] = fw;
 360 ;        }
 361 ;
 362
 363     ;; compress fq[] into y[]
 364     (let
 365      ((y0 0)
 366       (y1 0)
 367       (fw 0.0d0))
 368      (case prec ; of course this will be 2
 369       (0
 370        (loop for i = jz then (1- i)
 371              while (>= i 0)
 372              do (setq fw (+ fw (elt fq i))))
 373        (setq y0 (if (= ih 0) fw (- fw))))
 374       ((1 2)
 375        (loop for i = jz then (1- i)
 376              while (>= i 0)
 377              do (setq fw (+ fw (elt fq i))))
 378        (setq y0 (if (= ih 0) fw (- fw)))
 379        (setq fw (- (elt fq 0) fw))
 380        (loop for i = 0 then (1+ i)
 381              while (<= i jz)
 382              do (setq fw (+ fw (elt fq i))))
 383        (setq y1 (if (= ih 0) fw (- fw))))
 384       (3 ; anytime the source comment says painful, and I don't have to do it....
 385        (error "Don't support 113 bit precision")))
 386
 387 ;        switch(prec) {
 388 ;            case 0:
 389 ;                fw = 0.0;
 390 ;                for (i=jz;i>=0;i--) fw += fq[i];
 391 ;                y[0] = (ih==0)? fw: -fw;
 392 ;                break;
 393 ;            case 1:
 394 ;            case 2:
 395 ;                fw = 0.0;
 396 ;                for (i=jz;i>=0;i--) fw += fq[i];
 397 ;                y[0] = (ih==0)? fw: -fw;
 398 ;                fw = fq[0]-fw;
 399 ;                for (i=1;i<=jz;i++) fw += fq[i];
 400 ;                y[1] = (ih==0)? fw: -fw;
 401 ;                break;
 402 ;            case 3:        /* painful
 403 ;                for (i=jz;i>0;i--) {
 404 ;                    fw      = fq[i-1]+fq[i];
 405 ;                    fq[i]  += fq[i-1]-fw;
 406 ;                    fq[i-1] = fw;
 407 ;                }
 408 ;                for (i=jz;i>1;i--) {
 409 ;                    fw      = fq[i-1]+fq[i];
 410 ;                    fq[i]  += fq[i-1]-fw;
 411 ;                    fq[i-1] = fw;
 412 ;                }
 413 ;                for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
 414 ;                if(ih==0) {
 415 ;                    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
 416 ;                } else {
 417 ;                    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
 418 ;                }
 419 ;        }
 420       (values (logand n 7) y0 y1)))))
 421 ;        return n&7;
 422 ;}
 423