code.consxy.com Git - clnl/blob - src/main/strictmath/rem-pio2.lisp

   1 ; Copyright 2022 Frank Duncan (frank@consxy.com) under AGPL3.  See distributed LICENSE.txt.
   2 (in-package #:strictmath)
   3 ; This file is taken from part of Evita Common Lisp.
   4 ;
   5 ; It has been updated to match the rest of the project's documentation and style
   6 ; standards.  But otherwise, the following copyright supersedes the above AGPL copyright.
   7 ;
   8 ; Copyright (C) 1996-2007 by Project Vogue.
   9 ; Written by Yoshifumi "VOGUE" INOUE. (yosi@msn.com)
  10 ;
  11 ; Before that, it was based off of fdlibm
  12 ;
  13 ;  See fdlibm (http://www.netlib.org/fdlibm/)
  14 ;  See http://sources.redhat.com/newlib/
  15 ;
  16 ; *
  17 ; From fdlibm (http://www.netlib.org/fdlibm/)
  18 ; See http://sources.redhat.com/newlib/
  19 ;
  20 ; /* @(#)e_rem_pio2.c 5.1 93/09/24 *
  21 ; /*
  22 ; * ====================================================
  23 ; * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  24 ; *
  25 ; * Developed at SunPro, a Sun Microsystems, Inc. business.
  26 ; * Permission to use, copy, modify, and distribute this
  27 ; * software is freely granted, provided that this notice
  28 ; * is preserved.
  29 ; * ====================================================
  30 ; *
  31 ; *
  32 ;
  33 ; * __ieee754_rem_pio2(x,y)
  34 ; *
  35 ; * return the remainder of x rem pi/2 in y[0]+y[1]
  36 ; * use __kernel_rem_pio2()
  37 ; *
  38
  39 ;;; Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
  40 (defparameter +two-over-pi+
  41  (make-array 66 :element-type '(unsigned-byte 32)
  42   :initial-contents
  43   #(#xA2F983 #x6E4E44 #x1529FC #x2757D1 #xF534DD #xC0DB62
  44     #x95993C #x439041 #xFE5163 #xABDEBB #xC561B7 #x246E3A
  45     #x424DD2 #xE00649 #x2EEA09 #xD1921C #xFE1DEB #x1CB129
  46     #xA73EE8 #x8235F5 #x2EBB44 #x84E99C #x7026B4 #x5F7E41
  47     #x3991D6 #x398353 #x39F49C #x845F8B #xBDF928 #x3B1FF8
  48     #x97FFDE #x05980F #xEF2F11 #x8B5A0A #x6D1F6D #x367ECF
  49     #x27CB09 #xB74F46 #x3F669E #x5FEA2D #x7527BA #xC7EBE5
  50     #xF17B3D #x0739F7 #x8A5292 #xEA6BFB #x5FB11F #x8D5D08
  51     #x560330 #x46FC7B #x6BABF0 #xCFBC20 #x9AF436 #x1DA9E3
  52     #x91615E #xE61B08 #x659985 #x5F14A0 #x68408D #xFFD880
  53     #x4D7327 #x310606 #x1556CA #x73A8C9 #x60E27B #xC08C6B)))
  54
  55 (defparameter +npio2-hw+
  56  (make-array 32 :element-type '(unsigned-byte 32)
  57   :initial-contents
  58   #(#x3FF921FB #x400921FB #x4012D97C #x401921FB #x401F6A7A #x4022D97C
  59     #x4025FDBB #x402921FB #x402C463A #x402F6A7A #x4031475C #x4032D97C
  60     #x40346B9C #x4035FDBB #x40378FDB #x403921FB #x403AB41B #x403C463A
  61     #x403DD85A #x403F6A7A #x40407E4C #x4041475C #x4042106C #x4042D97C
  62     #x4043A28C #x40446B9C #x404534AC #x4045FDBB #x4046C6CB #x40478FDB
  63     #x404858EB #x404921FB)))
  64
  65 ;
  66 ;
  67 ; invpio2:  53 bits of 2/pi
  68 ; pio2_1:   first  33 bit of pi/2
  69 ; pio2_1t:  pi/2 - pio2_1
  70 ; pio2_2:   second 33 bit of pi/2
  71 ; pio2_2t:  pi/2 - (pio2_1+pio2_2)
  72 ; pio2_3:   third  33 bit of pi/2
  73 ; pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
  74 ;
  75 ;
  76
  77 (defun float64-rem-pio2-small (x ix hx)
  78  (prog*
  79   ((pio2_1 #+nil 1.57079632673412561417e+00 #.(encode-float64 #x3FF921FB #x54400000))
  80    (pio2_1t #+nil 6.07710050650619224932e-11 #.(encode-float64 #x3DD0B461 #x1A626331))
  81    (pio2_2 #+nil 6.07710050630396597660e-11 #.(encode-float64 #x3DD0B461 #x1A600000))
  82    (pio2_2t #+nil 2.02226624879595063154e-21 #.(encode-float64 #x3BA3198A #x2E037073)))
  83   (if (> hx 0)
  84    (let ((z (- x pio2_1)))
  85     (if (not (eql ix #x3ff921fb))
  86      ;; 33+53 bit pi is good enough
  87      (let*
  88       ((y0 (- z pio2_1t))
  89        (y1 (- (- z y0) pio2_1t)))
  90       (return (values 1 y0 y1)))
  91      ;; near pi/2, use 33+33+53 bit pi *
  92      (let*
  93       ((z (- z pio2_2))
  94        (y0 (- z pio2_2t))
  95        (y1 (- (- z y0) pio2_2t)))
  96       (return (values 1 y0 y1)))))
  97    ;; negative x
  98    (let
  99     ((z (+ x pio2_1)))
 100     (if (not (eql ix #x3ff921fb))
 101      ;; 33+53 bit pi is good enough
 102      (let*
 103       ((y0 (+ z pio2_1t))
 104        (y1 (+ (- z y0) pio2_1t)))
 105       (return (values -1 y0 y1)))
 106      ;; near pi/2, use 33+33+53 bit pi
 107      (let*
 108       ((z (+ z pio2_2))
 109        (y0 (+ z pio2_2t))
 110        (y1 (+ (- z y0) pio2_2t)))
 111       (return (values -1 y0 y1))))))))
 112
 113 (defun float64-rem-pio2-medium (x ix hx)
 114  (prog*
 115   ((half #+nil 5.00000000000000000000e-01 #.(encode-float64 #x3FE00000 #x00000000))
 116    (invpio2 #+nil 6.36619772367581382433e-01 #.(encode-float64 #x3FE45F30 #x6DC9C883))
 117    (pio2_1 #+nil 1.57079632673412561417e+00 #.(encode-float64 #x3FF921FB #x54400000))
 118    (pio2_1t #+nil 6.07710050650619224932e-11 #.(encode-float64 #x3DD0B461 #x1A626331))
 119    (pio2_2 #+nil 6.07710050630396597660e-11 #.(encode-float64 #x3DD0B461 #x1A600000))
 120    (pio2_2t #+nil 2.02226624879595063154e-21 #.(encode-float64 #x3BA3198A #x2E037073))
 121    (pio2_3 #+nil 2.02226624871116645580e-21 #.(encode-float64 #x3BA3198A #x2E000000))
 122    (pio2_3t #+nil 8.47842766036889956997e-32 #.(encode-float64 #x397B839A #x252049C1)))
 123   (let*
 124    ((tt (float64-abs x))
 125     (n (truncate (+ (* tt invpio2) half)))
 126     (fn (float n 0d0))
 127     (r (- tt (* fn pio2_1)))
 128     (w (* fn pio2_1t))   ; 1st round good to 85 bit *
 129     (y0 (- r w)))
 130    ;; quick check no cancellation
 131    (unless (and (< n 32) (not (eql ix (elt +npio2-hw+ (- n 1)))))
 132     (let*
 133      ((j (ash ix -20))
 134       (high (decode-float64 y0))
 135       (i (- j (logand (ash high -20) #x7ff))))
 136      ;; 2nd iteration needed, good to 118 *
 137      (when (> i 16)
 138       (setq tt r)
 139       (setq w (* fn pio2_2))
 140       (setq r (- tt w))
 141       (setq w (- (* fn pio2_2t) (- (- tt r) w)))
 142       (setq y0 (- r w))
 143       (setq high (decode-float64 y0))
 144       (setq i (- j (logand (ash high -20) #x7ff)))
 145
 146       ;; 3rd iteration need, 151 bits acc *
 147       (when (> i 49)
 148        (setq tt r)    ; will cover all possible cases *
 149        (setq w (* fn pio2_3))
 150        (setq r (- tt w))
 151        (setq w (- (* fn pio2_3t) (- (- tt r) w)))
 152        (setq y0 (- r w))))))
 153    (let ((y1 (- (- r y0) w)))
 154     (if (< hx 0)
 155      (return (values (- n) (- y0) (- y1)))
 156      (return (values n y0 y1)))))))
 157
 158 (defun float64-rem-pio2 (x)
 159  (declare (values fixnum double-float double-float))
 160  (declare (type double-float x))
 161  (prog
 162   ((two24 #+nil 1.67772160000000000000e+07 #.(encode-float64 #x41700000 #x00000000)))
 163   (multiple-value-bind (hx lx) (decode-float64 x)
 164    (let
 165     ((ix (logand hx #x7fffffff)))
 166
 167     (when (<= ix #x3fe921fb) (return (values 0 x 0d0))) ;; |x| ~<= pi/4 , no need for reduction
 168     (when (< ix #x4002d97c) (return (float64-rem-pio2-small x ix hx))) ;; |x| < 3pi/4, special case with n=+-1
 169     (when (<= ix #x413921fb) (return (float64-rem-pio2-medium x ix hx))) ;; |x| ~<= 2^19*(pi/2), medium size
 170
 171     ;; all other (large) arguments
 172     (when (>= ix #x7ff00000)  ; x is inf or NaN *
 173      (let ((nan (- x x))) (return (values 0 nan nan))))
 174
 175     ;; set z = scalbn(|x|,ilogb(x)-23)
 176     (let*
 177      ((e0 (- (ash ix -20) 1046)) ;; e0 = ilogb(z)-23
 178       (z0 (encode-float64 (- ix (ash e0 20)) lx))
 179       (tx0 (float (truncate z0) 0d0))
 180       (z1 (* (- z0 tx0) two24))
 181       (tx1 (float (truncate z1) 0d0))
 182       (z2 (* (- z1 tx1) two24))
 183       (tx2 z2))
 184      (multiple-value-bind (n y0 y1)
 185       (let
 186        ((nx
 187          (cond ;; skip zero term
 188           ((not (zerop tx2)) 3)
 189           ((not (zerop tx1)) 2)
 190           ((not (zerop tx0)) 1)
 191           (t 0))))
 192        (let
 193         ((tx (make-array 3 :element-type 'double-float)))
 194         (setf (elt tx 0) tx0)
 195         (setf (elt tx 1) tx1)
 196         (setf (elt tx 2) tx2)
 197         (float64-kernel-rem-pio2 tx e0 nx 2 +two-over-pi+)))
 198       (if (< hx 0)
 199        (return (values (- n) (- y0) (- y1)))
 200        (return (values n y0 y1)))))))))