/* Sum -- efficiently sum a list of floating-point numbers Copyright 2014-2023 Free Software Foundation, Inc. Contributed by the AriC and Caramba projects, INRIA. This file is part of the GNU MPFR Library. The GNU MPFR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MPFR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" /* Note: In the prototypes, one uses * * const mpfr_ptr *x i.e.: __mpfr_struct *const *x * * instead of * * const mpfr_srcptr *x i.e.: const __mpfr_struct *const *x * * because here one has a double indirection and the type matching rules * from the C standard in such a case are stricter and they would yield * annoying errors for the user in practice. See: * * Why can't I pass a char ** to a function which expects a const char **? * * in the comp.lang.c FAQ: * * https://c-faq.com/ansi/constmismatch.html */ /* See the doc/sum.txt file for the algorithm and a part of its proof (this will later go into algorithms.tex). TODO [VL, after a discussion with James Demmel]: Compared to James Demmel and Yozo Hida, Fast and accurate floating-point summation with application to computational geometry, Numerical Algorithms, volume 37, number 1-4, pages 101--112, 2004. sorting is not necessary here. It is not done because in the most common cases (where big cancellations are rare), it would take time and be useless. However, the lack of sorting increases the worst case complexity. For instance, consider many inputs that cancel one another (two by two). One would need n/2 iterations, where each iteration reads the exponent of each input, therefore n*n/2 read operations. Using a worst-case sort in O(n log n) could give a O(n log n) worst-case complexity. As we don't want to slow down the most common cases, this could be done at the 3rd iteration. But are there practical applications which would be used as tests? Note: see the following paper and its references: http://www.acsel-lab.com/arithmetic/arith21/papers/p54.pdf (J. Demmel and H. D. Nguyen, Fast Reproducible Floating-Point Summation) VL: This is very different: In MPFR In the paper & references arbitrary precision fixed precision correct rounding just reproducible rounding integer operations floating-point operations sequential parallel (& sequential) */ #ifdef MPFR_COV_CHECK int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 }; #endif /* Update minexp (V) after detecting a potential integer overflow in extreme cases (only a 32-bit ABI may be concerned in practice). Instead of an assertion failure below, we could 1. check that the ulp of each regular input has an exponent >= MPFR_EXP_MIN (with an assertion failure if this is not the case); 2. set minexp to MPFR_EXP_MIN and shift the accumulator accordingly (the sum will then be exact). However, such cases, which involve huge precisions, will probably never occur in practice (at least with a 64-bit ABI) and are not easily testable due to these huge precisions. Moreover, switching to a 64-bit ABI would be a better solution for such computations. So, let's leave this unimplemented. */ #define SAFE_SUB(V,E,SH) \ do \ { \ mpfr_prec_t sh = (SH); \ MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh); \ V = (E) - sh; \ } \ while (0) /* Function sum_raw * ================ * * Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote * the exponents of the computed result and of the error bound respectively, * while e - err is less than some given bound (due to cancellation), shift * the accumulator and reiterate. * * Inputs: * wp: pointer to the accumulator (least significant limb first). * ws: size of the accumulator (in limbs). * wq: precision of the accumulator (ws * GMP_NUMB_BITS). * x: array of the input numbers. * n: size of this array (number of inputs, regular or not). * minexp: exponent of the least significant bit of the first block. * maxexp: exponent of the first block (exponent of its MSB + 1). * tp: pointer to a temporary area (pre-allocated). * ts: size of this temporary area. * logn: ceil(log2(rn)), where rn is the number of regular inputs. * prec: lower bound for e - err (as described above). * ep: pointer to mpfr_exp_t (see below), or a null pointer. * minexpp: pointer to mpfr_exp_t (see below), or a null pointer. * maxexpp: pointer to mpfr_exp_t (see below), or a null pointer. * * Preconditions: * prec >= 1 * wq >= logn + prec + 2 * * This function returns 0 if the accumulator is 0 (which implies that * the exact sum for this sum_raw invocation is 0), otherwise the number * of cancelled bits (>= 1), defined as the number of identical bits on * the most significant part of the accumulator. In the latter case, it * also returns the following data in variables passed by reference, if * the pointers are not NULL: * - in ep: the exponent e of the computed result; * - in minexpp: the last value of minexp; * - in maxexpp: the new value of maxexp (for the next iteration after * the first invocation of sum_raw in the main code). * * Notes: * - minexp is also the exponent of the least significant bit of the * accumulator; * - the temporary area must be large enough to hold a shifted input * block, and the value of ts is used only when the full assertions * are checked (i.e. with the --enable-assert configure option), to * check that a buffer overflow doesn't occur; * - contrary to the returned value of minexp (the value in the last * iteration), the returned value of maxexp is the one for the next * iteration (= maxexp2 of the last iteration). */ static mpfr_prec_t sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, const mpfr_ptr *x, unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp, mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec, mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp) { MPFR_LOG_FUNC (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec), ("", 0)); /* The C code below requires prec >= 0 due to the use of unsigned integer arithmetic on it. Actually the computation makes sense only with prec >= 1 (otherwise one can't even know the sign of the result), hence the following assertion. */ MPFR_ASSERTD (prec >= 1); /* Consistency check. */ MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS); /* The following precondition together with prec >= 1 will imply: minexp - shiftq < maxexp2, as required by the algorithm. */ MPFR_ASSERTD (wq >= logn + prec + 2); while (1) { mpfr_exp_t maxexp2 = MPFR_EXP_MIN; unsigned long i; MPFR_LOG_MSG (("sum_raw loop: " "maxexp=%" MPFR_EXP_FSPEC "d " "minexp=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp)); MPFR_ASSERTD (maxexp > minexp); for (i = 0; i < n; i++) if (! MPFR_IS_SINGULAR (x[i])) /* Step 1 (see sum_raw in sum.txt) */ { mp_limb_t *dp, *vp; mp_size_t ds, vs, vds; mpfr_exp_t xe, vd; mpfr_prec_t xq; int tr; xe = MPFR_GET_EXP (x[i]); xq = MPFR_GET_PREC (x[i]); vp = MPFR_MANT (x[i]); vs = MPFR_PREC2LIMBS (xq); vd = xe - vs * GMP_NUMB_BITS - minexp; /* vd is the exponent of the least significant represented bit of x[i] (including the trailing bits, whose value is 0) minus the exponent of the least significant bit of the accumulator. To make the code simpler, we won't try to filter out the trailing bits of x[i]. */ /* Steps 2, 3, 4 (see sum_raw in sum.txt) */ if (vd < 0) { /* This covers the following cases: * [-+- accumulator ---] * [---|----- x[i] ------|--] * | [----- x[i] --|--] * | |[----- x[i] -----] * | | [----- x[i] -----] * maxexp minexp */ /* Step 2 for subcase vd < 0 */ if (xe <= minexp) { /* x[i] is entirely after the LSB of the accumulator, so that it will be ignored at this iteration. */ if (xe > maxexp2) { maxexp2 = xe; /* And since the exponent of x[i] is valid... */ MPFR_ASSERTD (maxexp2 >= MPFR_EMIN_MIN); } continue; } /* Step 3 for subcase vd < 0 */ /* If some significant bits of x[i] are after the LSB of the accumulator, then maxexp2 will necessarily be minexp. */ if (MPFR_LIKELY (xe - xq < minexp)) maxexp2 = minexp; /* Step 4 for subcase vd < 0 */ /* We need to ignore the least |vd| significant bits of x[i]. First, let's ignore the least vds = |vd| / GMP_NUMB_BITS limbs. */ vd = - vd; vds = vd / GMP_NUMB_BITS; vs -= vds; MPFR_ASSERTD (vs > 0); /* see xe <= minexp test above */ vp += vds; vd -= vds * GMP_NUMB_BITS; MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS); if (xe > maxexp) { vs -= (xe - maxexp) / GMP_NUMB_BITS; MPFR_ASSERTD (vs > 0); tr = (xe - maxexp) % GMP_NUMB_BITS; } else tr = 0; if (vd != 0) { MPFR_ASSERTD (vs <= ts); mpn_rshift (tp, vp, vs, vd); vp = tp; tr += vd; if (tr >= GMP_NUMB_BITS) { vs--; tr -= GMP_NUMB_BITS; } MPFR_ASSERTD (vs >= 1); MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS); if (tr != 0) { tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr); tr = 0; } /* Truncation has now been taken into account. */ MPFR_ASSERTD (tr == 0); } dp = wp; ds = ws; } else /* vd >= 0 */ { /* This covers the following cases: * [-+- accumulator ---] * [- x[i] -] | | * [---|-- x[i] ------] | * [------|-- x[i] ---------] * | [- x[i] -] | * maxexp minexp */ /* Steps 2 and 3 for subcase vd >= 0 */ MPFR_ASSERTD (xe - xq >= minexp); /* see definition of vd */ /* Step 4 for subcase vd >= 0 */ /* We need to ignore the least vd significant bits of the accumulator. First, let's ignore the least vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */ vds = vd / GMP_NUMB_BITS; ds = ws - vds; if (ds <= 0) continue; dp = wp + vds; vd -= vds * GMP_NUMB_BITS; MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS); /* The low part of x[i] (to be determined) will have to be shifted vd bits to the left if vd != 0. */ if (xe > maxexp) { vs -= (xe - maxexp) / GMP_NUMB_BITS; if (vs <= 0) continue; tr = (xe - maxexp) % GMP_NUMB_BITS; } else tr = 0; MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0); /* We need to consider the least significant vs limbs of x[i] except the most significant tr bits. */ if (vd != 0) { mp_limb_t carry; MPFR_ASSERTD (vs <= ts); carry = mpn_lshift (tp, vp, vs, vd); tr -= vd; if (tr < 0) { tr += GMP_NUMB_BITS; MPFR_ASSERTD (vs + 1 <= ts); tp[vs++] = carry; } MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS); vp = tp; } } /* vd >= 0 */ MPFR_ASSERTD (vs > 0 && vs <= ds); /* We can't truncate the most significant limb of the input (in case it hasn't been shifted to the temporary area). So, let's ignore it now. It will be taken into account via carry propagation after the addition. */ if (tr != 0) vs--; /* Step 5 (see sum_raw in sum.txt) */ if (MPFR_IS_POS (x[i])) { mp_limb_t carry; carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0; MPFR_ASSERTD (carry <= 1); if (tr != 0) carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr); if (ds > vs) mpn_add_1 (dp + vs, dp + vs, ds - vs, carry); } else { mp_limb_t borrow; borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0; MPFR_ASSERTD (borrow <= 1); if (tr != 0) borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr); if (ds > vs) mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow); } } { mpfr_prec_t cancel; /* number of cancelled bits */ mp_size_t wi; /* index in the accumulator */ mp_limb_t a, b; int cnt; cancel = 0; wi = ws - 1; MPFR_ASSERTD (wi >= 0); a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO; while (wi >= 0) if ((b = wp[wi]) == a) { cancel += GMP_NUMB_BITS; wi--; } else { b ^= a; MPFR_ASSERTD (b != 0); count_leading_zeros (cnt, b); cancel += cnt; break; } if (wi >= 0 || a != MPFR_LIMB_ZERO) /* accumulator != 0 */ { mpfr_exp_t e; /* exponent of the computed result */ mpfr_exp_t err; /* exponent of the error bound */ MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n", a != MPFR_LIMB_ZERO ? "<" : ">", cancel)); MPFR_ASSERTD (cancel > 0); e = minexp + wq - cancel; MPFR_ASSERTD (e >= minexp); err = maxexp2 + logn; /* OK even if maxexp2 == MPFR_EXP_MIN */ /* The absolute value of the truncated sum is in the binade [2^(e-1),2^e] (closed on both ends due to two's complement). The error is strictly less than 2^err (and is 0 if maxexp2 == MPFR_EXP_MIN). */ /* This basically tests whether err <= e - prec without potential integer overflow (since prec >= 0)... Note that the maxexp2 == MPFR_EXP_MIN test is there just for the potential corner case e - prec < MPFR_EXP_MIN + logn. Such corner cases, involving specific huge-precision numbers, are probably not supported in many places in MPFR, but this test doesn't hurt... */ if (maxexp2 == MPFR_EXP_MIN || (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec)) { MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%" MPFR_EXP_FSPEC "d) - (prec=%Pd)\n", (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec)); /* To avoid tests or copies, we consider the only two cases that will occur in sum_aux. */ MPFR_ASSERTD ((ep != NULL && minexpp != NULL && maxexpp != NULL) || (ep == NULL && minexpp == NULL && maxexpp == NULL)); if (ep != NULL) { *ep = e; *minexpp = minexp; *maxexpp = maxexp2; } MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n", (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2, maxexp2 == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : "")); return cancel; } else { mpfr_exp_t diffexp; mpfr_prec_t shiftq; mpfr_size_t shifts; int shiftc; MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n", (mpfr_eexp_t) e, (mpfr_eexp_t) err, (mpfr_eexp_t) maxexp2, maxexp2 == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : "")); diffexp = err - e; if (diffexp < 0) diffexp = 0; /* diffexp = max(0, err - e) */ MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) diffexp)); MPFR_ASSERTD (diffexp < cancel - 2); shiftq = cancel - 2 - (mpfr_prec_t) diffexp; /* equivalent to: minexp + wq - 2 - max(e,err) */ MPFR_ASSERTD (shiftq > 0); shifts = shiftq / GMP_NUMB_BITS; shiftc = shiftq % GMP_NUMB_BITS; MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n", shiftq, (mpfr_prec_t) shifts, shiftc)); if (MPFR_LIKELY (shiftc != 0)) mpn_lshift (wp + shifts, wp, ws - shifts, shiftc); else mpn_copyd (wp + shifts, wp, ws - shifts); MPN_ZERO (wp, shifts); /* Compute minexp = minexp - shiftq safely. */ SAFE_SUB (minexp, minexp, shiftq); MPFR_ASSERTD (minexp < maxexp2); } } else if (maxexp2 == MPFR_EXP_MIN) { MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0)); return 0; } else { MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0)); /* Compute minexp = maxexp2 - (wq - (logn + 1)) safely. */ SAFE_SUB (minexp, maxexp2, wq - (logn + 1)); /* Note: the logn + 1 corresponds to cq in the main code. */ } } maxexp = maxexp2; } } /**********************************************************************/ /* Generic case: all the inputs are finite numbers, with at least 3 regular numbers. */ static int sum_aux (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd, mpfr_exp_t maxexp, unsigned long rn) { mp_limb_t *sump; mp_limb_t *tp; /* pointer to a temporary area */ mp_limb_t *wp; /* pointer to the accumulator */ mp_size_t ts; /* size of the temporary area, in limbs */ mp_size_t ws; /* size of the accumulator, in limbs */ mp_size_t zs; /* size of the TMD accumulator, in limbs */ mpfr_prec_t wq; /* size of the accumulator, in bits */ int logn; /* ceil(log2(rn)) */ int cq; mpfr_prec_t sq; int inex; MPFR_TMP_DECL (marker); MPFR_LOG_FUNC (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu", n, rnd, (mpfr_eexp_t) maxexp, rn), ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum)); MPFR_ASSERTD (rn >= 3 && rn <= n); /* In practice, no integer overflow on the exponent. */ MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >= sizeof (unsigned long) * CHAR_BIT); /* Set up some variables and the accumulator. */ sump = MPFR_MANT (sum); /* rn is the number of regular inputs (the singular ones will be ignored). Compute logn = ceil(log2(rn)). */ logn = MPFR_INT_CEIL_LOG2 (rn); MPFR_ASSERTD (logn >= 2); MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n", logn, (mpfr_eexp_t) maxexp)); sq = MPFR_GET_PREC (sum); cq = logn + 1; /* First determine the size of the accumulator. * cq + sq + logn + 2 >= logn + sq + 5, which will be used later. * The assertion wq - cq - sq >= 4 is another way to check that. */ ws = MPFR_PREC2LIMBS (cq + sq + logn + 2); wq = (mpfr_prec_t) ws * GMP_NUMB_BITS; MPFR_ASSERTD (wq - cq - sq >= 4); /* TODO: timings, comparing with a larger zs. */ zs = MPFR_PREC2LIMBS (wq - sq); MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq)); /* An input block will have up to wq - cq bits, and its shifted value (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */ ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1); MPFR_TMP_MARK (marker); /* Note: If the TMD does not occur, which should be the case for most sums, allocating zs limbs is not necessary. However, we choose to do this now (thus in all cases) because zs is very small, so that the difference on the memory footprint will not be noticeable. More precisely, zs is at most 2 in practice with the current code; we may want to increase it in order to avoid performance issues in some unlikely corner cases, but even in this case, it will remain small. One will have: [------ ts ------][------ ws ------][- zs -] The following would probably be better: [------ ts ------] [------ ws ------] [- zs -] i.e. where the TMD accumulator (partially or completely) takes some unneeded part of the temporary area in order to improve data locality. But * in low precision, data locality is regarded as ensured even with the actual choice; * in high precision, data locality for TMD resolution may not be that important. */ tp = MPFR_TMP_LIMBS_ALLOC (ts + ws + zs); wp = tp + ts; MPN_ZERO (wp, ws); /* zero the accumulator */ { mpfr_exp_t minexp; /* exponent of the LSB of the block for sum_raw */ mpfr_prec_t cancel; /* number of cancelled bits */ mpfr_exp_t e; /* temporary exponent of the result */ mpfr_exp_t u; /* temporary exponent of the ulp (quantum) */ mp_limb_t lbit; /* last bit (useful if even rounding) */ mp_limb_t rbit; /* rounding bit (corrected in halfway case) */ int corr; /* correction term (from -1 to 2) */ int sd, sh; /* shift counts */ mp_size_t sn; /* size of the output number */ int tmd; /* 0: the TMD does not occur 1: the TMD occurs on a machine number 2: the TMD occurs on a midpoint */ int neg; /* 0 if positive sum, 1 if negative */ int sgn; /* +1 if positive sum, -1 if negative */ MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0)); /* Compute minexp = maxexp - (wq - cq) safely. */ SAFE_SUB (minexp, maxexp, wq - cq); MPFR_ASSERTD (wq >= logn + sq + 5); cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts, logn, sq + 3, &e, &minexp, &maxexp); if (MPFR_UNLIKELY (cancel == 0)) { /* The exact sum is zero. Since not all inputs are 0, the sum * is +0 except in MPFR_RNDD, as specified according to the * IEEE 754 rules for the addition of two numbers. */ MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ? MPFR_SIGN_POS : MPFR_SIGN_NEG)); MPFR_SET_ZERO (sum); MPFR_TMP_FREE (marker); MPFR_RET (0); } /* The absolute value of the truncated sum is in the binade [2^(e-1),2^e] (closed on both ends due to two's complement). The error is strictly less than 2^(maxexp + logn) (and is 0 if maxexp == MPFR_EXP_MIN). */ u = e - sq; /* e being the exponent, u is the ulp of the target */ /* neg = 1 if negative, 0 if positive. */ neg = wp[ws-1] >> (GMP_NUMB_BITS - 1); MPFR_ASSERTD (neg == 0 || neg == 1); sgn = neg ? -1 : 1; MPFR_ASSERTN (sgn == (neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS)); MPFR_LOG_MSG (("neg=%d sgn=%d cancel=%Pd" " e=%" MPFR_EXP_FSPEC "d" " u=%" MPFR_EXP_FSPEC "d" " maxexp=%" MPFR_EXP_FSPEC "d%s\n", neg, sgn, cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u, (mpfr_eexp_t) maxexp, maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : "")); if (rnd == MPFR_RNDF) { /* Rounding the approximate value to nearest (ties don't matter) is sufficient. We need to get the rounding bit; the code is similar to a part from the generic code (here, corr = rbit). */ if (MPFR_LIKELY (u > minexp)) { mpfr_prec_t tq; mp_size_t wi; int td; tq = u - minexp; MPFR_ASSERTD (tq > 0); /* number of trailing bits */ MPFR_LOG_MSG (("tq=%Pd\n", tq)); wi = tq / GMP_NUMB_BITS; td = tq % GMP_NUMB_BITS; corr = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) : (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1)); } else corr = 0; inex = 0; /* not meaningful, but needs to have a value */ } else /* rnd != MPFR_RNDF */ { if (MPFR_LIKELY (u > minexp)) { mpfr_prec_t tq; mp_size_t wi; int td; tq = u - minexp; MPFR_ASSERTD (tq > 0); /* number of trailing bits */ MPFR_LOG_MSG (("tq=%Pd\n", tq)); wi = tq / GMP_NUMB_BITS; /* Determine the rounding bit, which is represented. */ td = tq % GMP_NUMB_BITS; lbit = (wp[wi] >> td) & MPFR_LIMB_ONE; rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) : (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1)); MPFR_ASSERTD (rbit == 0 || rbit == 1); MPFR_LOG_MSG (("rbit=%d\n", (int) rbit)); if (maxexp == MPFR_EXP_MIN) { /* The sum in the accumulator is exact. Determine inex: inex = 0 if the final sum is exact, else 1, i.e. inex = rounding bit || sticky bit. In round to nearest, also determine the rounding direction: obtained from the rounding bit possibly except in halfway cases. Halfway cases are rounded toward -inf iff the last bit of the truncated significand in two's complement is 0 (in precision > 1, because the parity after rounding is the same in two's complement and sign + magnitude; in precision 1, one checks that the rule works for both positive (lbit == 1) and negative (lbit == 0) numbers, rounding halfway cases away from zero). */ if (MPFR_LIKELY (rbit == 0 || (rnd == MPFR_RNDN && lbit == 0))) { /* We need to determine the sticky bit, either to set inex (if the rounding bit is 0) or to possibly "correct" rbit (round to nearest, halfway case rounded downward) from which the rounding direction will be determined. */ MPFR_LOG_MSG (("Determine the sticky bit...\n", 0)); inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0 : td == 0 ? (MPFR_ASSERTD (wi >= 1), (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0) : 0; if (!inex) { while (!inex && wi > 0) inex = wp[--wi] != 0; if (!inex && rbit != 0) { /* sticky bit = 0, rounding bit = 1, i.e. halfway case, which will be rounded downward (see earlier if). */ MPFR_ASSERTD (rnd == MPFR_RNDN); inex = 1; rbit = 0; /* even rounding downward */ MPFR_LOG_MSG (("Halfway case rounded downward;" " set inex=1 rbit=0\n", 0)); } } } else inex = 1; tmd = 0; /* We can round correctly -> no TMD. */ } else /* maxexp > MPFR_EXP_MIN */ { mpfr_exp_t d; mp_limb_t limb, mask; int nbits; /* Since maxexp was set to either the exponent of a x[i] or to minexp... */ MPFR_ASSERTD (maxexp >= MPFR_EMIN_MIN || maxexp == minexp); inex = 1; /* We do not know whether the sum is exact. */ MPFR_ASSERTD (u <= MPFR_EMAX_MAX && u <= minexp + wq); d = u - (maxexp + logn); /* representable */ MPFR_ASSERTD (d >= 3); /* due to prec = sq + 3 in sum_raw */ /* Let's see whether the TMD occurs by looking at the d bits following the ulp bit, or the d-1 bits after the rounding bit. */ /* First chunk after the rounding bit... It starts at: (wi,td-2) if td >= 2, (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */ if (td == 0) { MPFR_ASSERTD (wi >= 1); limb = wp[--wi]; mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1); nbits = GMP_NUMB_BITS; } else if (td == 1) { limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO; mask = MPFR_LIMB_MAX; nbits = GMP_NUMB_BITS + 1; } else /* td >= 2 */ { MPFR_ASSERTD (td >= 2); limb = wp[wi]; mask = MPFR_LIMB_MASK (td - 1); nbits = td; } /* nbits: number of bits of the first chunk + 1 (the +1 is for the rounding bit). */ if (nbits > d) { /* Some low significant bits must be ignored. */ limb >>= nbits - d; mask >>= nbits - d; d = 0; } else { d -= nbits; MPFR_ASSERTD (d >= 0); } limb &= mask; tmd = limb == MPFR_LIMB_ZERO ? (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : limb == mask ? (limb = MPFR_LIMB_MAX, rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0; while (tmd != 0 && d != 0) { mp_limb_t limb2; MPFR_ASSERTD (d > 0); if (wi == 0) { /* The non-represented bits are 0's. */ if (limb != MPFR_LIMB_ZERO) tmd = 0; break; } MPFR_ASSERTD (wi > 0); limb2 = wp[--wi]; if (d < GMP_NUMB_BITS) { int c = GMP_NUMB_BITS - d; MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS); if ((limb2 >> c) != (limb >> c)) tmd = 0; break; } if (limb2 != limb) tmd = 0; d -= GMP_NUMB_BITS; } } } else /* u <= minexp */ { /* The exact value of the accumulator will be copied. * The TMD occurs if and only if there are bits still * not taken into account, and if it occurs, this is * necessarily on a machine number (-> tmd = 1). */ lbit = u == minexp ? wp[0] & MPFR_LIMB_ONE : 0; rbit = 0; inex = tmd = maxexp != MPFR_EXP_MIN; } MPFR_ASSERTD (rbit == 0 || rbit == 1); MPFR_LOG_MSG (("tmd=%d lbit=%d rbit=%d inex=%d neg=%d\n", tmd, (int) lbit, (int) rbit, inex, neg)); /* Here, if the final sum is known to be exact, inex = 0, otherwise * inex = 1. We have a truncated significand, a trailing term t such * that 0 <= t < 1 ulp, and an error on the trailing term bounded by * t' in absolute value. Thus the error e on the truncated significand * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases * denoted by a corr value between -1 and 2 depending on e, neg, rbit, * and the rounding mode: * -1: equivalent to nextbelow; * 0: the truncated significand is not corrected; * 1: add 1 ulp; * 2: add 1 ulp, then nextabove. * The nextbelow and nextabove are used here since there may be a * change of the binade. */ if (tmd == 0) /* no TMD */ { switch (rnd) { case MPFR_RNDD: corr = 0; break; case MPFR_RNDU: corr = inex; break; case MPFR_RNDZ: corr = inex && neg; break; case MPFR_RNDA: corr = inex && !neg; break; default: MPFR_ASSERTN (rnd == MPFR_RNDN); /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are rounded downward, rbit has been changed to 0 so that corr is set correctly. */ corr = rbit; } MPFR_ASSERTD (corr == 0 || corr == 1); if (inex && corr == 0) /* two's complement significand decreased */ inex = -1; } else /* tmd */ { mpfr_exp_t minexp2; mpfr_prec_t cancel2; mpfr_exp_t err; /* exponent of the error bound */ mp_size_t zz; /* nb of limbs to zero in the TMD accumulator */ mp_limb_t *zp; /* pointer to the TMD accumulator */ mpfr_prec_t zq; /* size of the TMD accumulator, in bits */ int sst; /* sign of the secondary term */ /* TMD case. Here we use a new variable minexp2, with the same meaning as minexp, as we want to keep the minexp value for the copy to the destination. */ MPFR_ASSERTD (maxexp > MPFR_EXP_MIN); MPFR_ASSERTD (tmd == 1 || tmd == 2); /* TMD accumulator */ zp = wp + ws; zq = (mpfr_prec_t) zs * GMP_NUMB_BITS; err = maxexp + logn; MPFR_LOG_MSG (("TMD with" " maxexp=%" MPFR_EXP_FSPEC "d" " err=%" MPFR_EXP_FSPEC "d" " zs=%Pd" " zq=%Pd\n", (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err, (mpfr_prec_t) zs, zq)); /* The d-1 bits from u-2 to u-d (= err) are identical. */ if (err >= minexp) { mpfr_prec_t tq; mp_size_t wi; int td; /* Let's keep the last 2 over the d-1 identical bits and the following bits, i.e. the bits from err+1 to minexp. */ tq = err - minexp + 2; /* tq = number of such bits */ MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq)); MPFR_ASSERTD (tq >= 2); wi = tq / GMP_NUMB_BITS; td = tq % GMP_NUMB_BITS; /* Note: The "else" (td == 0) branch below can be executed only if tq >= GMP_NUMB_BITS, which is possible only when logn is large enough. Indeed, if tq > logn + some constant, this means that the TMD did not occur. TODO: Find an upper bound on tq, and add a corresponding MPFR_ASSERTD assertion / hint. On some platforms, this branch might be dead code, and such information would allow the compiler to remove it. It seems that this branch is never tested (r12754). */ if (td != 0) { wi++; /* number of words with represented bits */ td = GMP_NUMB_BITS - td; zz = zs - wi; MPFR_ASSERTD (zz >= 0 && zz < zs); mpn_lshift (zp + zz, wp, wi, td); } else { MPFR_ASSERTD (wi > 0); zz = zs - wi; MPFR_ASSERTD (zz >= 0 && zz < zs); if (zz > 0) MPN_COPY (zp + zz, wp, wi); } /* Compute minexp2 = minexp - (zs * GMP_NUMB_BITS + td) safely. */ SAFE_SUB (minexp2, minexp, zz * GMP_NUMB_BITS + td); MPFR_ASSERTD (minexp2 == err + 2 - zq); } else /* err < minexp */ { /* At least one of the identical bits is not represented, meaning that it is 0 and all these bits are 0's. Thus the accumulator will be 0. The new minexp is determined from maxexp, with cq bits reserved to avoid an overflow (as in the early steps). */ MPFR_LOG_MSG (("[TMD] err < minexp\n", 0)); zz = zs; /* Compute minexp2 = maxexp - (zq - cq) safely. */ SAFE_SUB (minexp2, maxexp, zq - cq); MPFR_ASSERTD (minexp2 == err + 1 - zq); } MPN_ZERO (zp, zz); /* We need to determine the sign sst of the secondary term. In sum_raw, since the truncated sum corresponding to this secondary term will be in [2^(e-1),2^e] and the error strictly less than 2^err, we can stop the iterations when e - err >= 1 (this bound is the 11th argument of sum_raw). */ cancel2 = sum_raw (zp, zs, zq, x, n, minexp2, maxexp, tp, ts, logn, 1, NULL, NULL, NULL); if (cancel2 != 0) sst = MPFR_LIMB_MSB (zp[zs-1]) == 0 ? 1 : -1; else if (tmd == 1) sst = 0; else { /* For halfway cases, let's virtually eliminate them by setting a sst equivalent to a non-halfway case, which depends on the last bit of the pre-rounded result. */ MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2); sst = lbit != 0 ? 1 : -1; } MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n", tmd, (int) rbit, sst)); /* Do not consider the corrected sst for MPFR_COV_SET */ MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit] [cancel2 == 0 ? 1 : sst+1][neg][sq > MPFR_PREC_MIN]); inex = MPFR_IS_LIKE_RNDD (rnd, sgn) ? (sst ? -1 : 0) : MPFR_IS_LIKE_RNDU (rnd, sgn) ? (sst ? 1 : 0) : (MPFR_ASSERTD (rnd == MPFR_RNDN), tmd == 1 ? - sst : sst); if (tmd == 2 && sst == (rbit != 0 ? -1 : 1)) corr = 1 - (int) rbit; else if (MPFR_IS_LIKE_RNDD (rnd, sgn) && sst == -1) corr = (int) rbit - 1; else if (MPFR_IS_LIKE_RNDU (rnd, sgn) && sst == +1) corr = (int) rbit + 1; else corr = (int) rbit; } /* tmd */ } /* rnd != MPFR_RNDF */ MPFR_LOG_MSG (("neg=%d corr=%d inex=%d\n", neg, corr, inex)); /* Sign handling (-> absolute value and sign), together with rounding. The most common cases are corr = 0 and corr = 1 as this is necessarily the case when the TMD did not occur. */ MPFR_ASSERTD (corr >= -1 && corr <= 2); MPFR_SIGN (sum) = sgn; /* Let's copy/shift the bits [max(u,minexp),e) to the most significant part of the destination, and zero the least significant part (there can be one only if u < minexp). The trailing bits of the destination may contain garbage at this point. */ sn = MPFR_PREC2LIMBS (sq); sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq; sh = cancel % GMP_NUMB_BITS; MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS); if (MPFR_LIKELY (u > minexp)) { mp_size_t wi; /* Recompute the initial value of wi. */ wi = (u - minexp) / GMP_NUMB_BITS; if (MPFR_LIKELY (sh != 0)) { mp_size_t fi; fi = (e - minexp) / GMP_NUMB_BITS - (sn - 1); MPFR_ASSERTD (fi == wi || fi == wi + 1); mpn_lshift (sump, wp + fi, sn, sh); if (fi != wi) sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh); } else { MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS == cancel); MPN_COPY (sump, wp + wi, sn); } } else /* u <= minexp */ { mp_size_t en; en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS; if (MPFR_LIKELY (sh != 0)) mpn_lshift (sump + sn - en, wp, en, sh); else if (MPFR_UNLIKELY (en > 0)) MPN_COPY (sump + sn - en, wp, en); if (sn > en) MPN_ZERO (sump, sn - en); } /* Let's take the complement if the result is negative, and at the same time, do the rounding and zero the trailing bits. As this is valid only for precisions >= 2, there is special code for precision 1 first. */ if (MPFR_UNLIKELY (sq == 1)) /* precision 1 */ { sump[0] = MPFR_LIMB_HIGHBIT; e += neg ? 1 - corr : corr; } else if (neg) /* negative result with sq > 1 */ { MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0); /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */ /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1 (or mpn_sub_1) operations, as they could yield two loops in some particular cases involving a long sequence of 0's in the low significant bits (except the least significant bit, which doesn't matter). */ if (corr <= 1) { mp_limb_t corr2; /* Here we can just do the correction operation on the least significant limb, then do either a mpn_com or a mpn_neg on the remaining limbs, depending on the carry (BTW, mpn_neg is just a mpn_com with an initial carry propagation: after some point, mpn_neg does a complement). */ corr2 = (mp_limb_t) (1 - corr) << sd; /* Note: If corr = -1, this can overflow to corr2 = 0. This case is taken into account below. */ sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2; if (sump[0] < corr2 || (corr2 == 0 && corr < 0)) { if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1)) { /* Note: The | is important when sump[sn-1] is not 0 (this can occur with sn = 1 and corr = -1). TODO: Add something to make sure that this is tested. */ sump[sn-1] |= MPFR_LIMB_HIGHBIT; e++; } } else if (sn > 1) mpn_com (sump + 1, sump + 1, sn - 1); } else /* corr == 2 */ { mp_limb_t corr2, c; mp_size_t i = 1; /* We want to compute com(x) - 1, but GMP doesn't have an operation for that. The fact is that a sequence of low significant bits 1 is invariant. Starting at the first low significant bit 0, we can do the complement with mpn_com. */ corr2 = MPFR_LIMB_ONE << sd; c = ~ (sump[0] | MPFR_LIMB_MASK (sd)); sump[0] = c - corr2; if (c == 0) { while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX) i++; sump[i] = (~ sump[i]) - 1; i++; } if (i < sn) mpn_com (sump + i, sump + i, sn - i); else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0)) { /* Happens on 01111...111, whose complement is 10000...000, and com(x) - 1 is 01111...111. */ sump[sn-1] |= MPFR_LIMB_HIGHBIT; e--; } } } else /* positive result with sq > 1 */ { MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0); sump[0] &= ~ MPFR_LIMB_MASK (sd); if (corr > 0) { mp_limb_t corr2, carry_out; corr2 = (mp_limb_t) corr << sd; /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows to corr2 = 0. This case is taken into account below. */ carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) : (MPFR_ASSERTD (sn > 1), mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE)); MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out); if (MPFR_UNLIKELY (carry_out)) { /* Note: The | is important when sump[sn-1] is not 0 (this can occur with sn = 1 and corr = 2). TODO: Add something to make sure that this is tested. */ sump[sn-1] |= MPFR_LIMB_HIGHBIT; e++; } } if (corr < 0) { mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd); if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0)) { sump[sn-1] |= MPFR_LIMB_HIGHBIT; e--; } } } MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0); MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e)); /* e may be outside the current exponent range, but this will be checked with mpfr_check_range below. */ MPFR_EXP (sum) = e; } /* main block */ MPFR_TMP_FREE (marker); return mpfr_check_range (sum, inex, rnd); } /**********************************************************************/ int mpfr_sum (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd) { MPFR_LOG_FUNC (("n=%lu rnd=%d", n, rnd), ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum)); if (MPFR_UNLIKELY (n <= 2)) { if (n == 0) { MPFR_SET_ZERO (sum); MPFR_SET_POS (sum); MPFR_RET (0); } else if (n == 1) return mpfr_set (sum, x[0], rnd); else return mpfr_add (sum, x[0], x[1], rnd); } else { mpfr_exp_t maxexp = MPFR_EXP_MIN; /* max(Empty) */ unsigned long i; unsigned long rn = 0; /* will be the number of regular inputs */ /* sign of infinities and zeros (0: currently unknown) */ int sign_inf = 0, sign_zero = 0; MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n)); for (i = 0; i < n; i++) { if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i]))) { if (MPFR_IS_NAN (x[i])) { /* The current value x[i] is NaN. Then the sum is NaN. */ nan: MPFR_SET_NAN (sum); MPFR_RET_NAN; } else if (MPFR_IS_INF (x[i])) { /* The current value x[i] is an infinity. There are two cases: 1. This is the first infinity value (sign_inf == 0). Then set sign_inf to its sign, and go on. 2. All the infinities found until now have the same sign sign_inf. If this new infinity has a different sign, then return NaN immediately, else go on. */ if (sign_inf == 0) sign_inf = MPFR_SIGN (x[i]); else if (MPFR_SIGN (x[i]) != sign_inf) goto nan; } else if (MPFR_UNLIKELY (rn == 0)) { /* The current value x[i] is a zero. The code below matters only when all values found until now are zeros, otherwise it is harmless (the test rn == 0 above is just a minor optimization). Here we track the sign of the zero result when all inputs are zeros: if all zeros have the same sign, the result will have this sign, otherwise (i.e. if there is at least a zero of each sign), the sign of the zero result depends only on the rounding mode (note that this choice is sticky when new zeros are considered). */ MPFR_ASSERTD (MPFR_IS_ZERO (x[i])); if (sign_zero == 0) sign_zero = MPFR_SIGN (x[i]); else if (MPFR_SIGN (x[i]) != sign_zero) sign_zero = rnd == MPFR_RNDD ? -1 : 1; } } else { /* The current value x[i] is a regular number. */ mpfr_exp_t e = MPFR_GET_EXP (x[i]); if (e > maxexp) maxexp = e; /* maximum exponent found until now */ rn++; /* current number of regular inputs */ } } MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n", rn, sign_inf, sign_zero)); /* At this point the result cannot be NaN (this case has already been filtered out). */ if (MPFR_UNLIKELY (sign_inf != 0)) { /* At least one infinity, and all of them have the same sign sign_inf. The sum is the infinity of this sign. */ MPFR_SET_INF (sum); MPFR_SET_SIGN (sum, sign_inf); MPFR_RET (0); } /* At this point, all the inputs are finite numbers. */ if (MPFR_UNLIKELY (rn == 0)) { /* All the numbers were zeros (and there is at least one). The sum is zero with sign sign_zero. */ MPFR_ASSERTD (sign_zero != 0); MPFR_SET_ZERO (sum); MPFR_SET_SIGN (sum, sign_zero); MPFR_RET (0); } /* Optimize the case where there are only two regular numbers. */ if (MPFR_UNLIKELY (rn <= 2)) { unsigned long h = ULONG_MAX; for (i = 0; i < n; i++) if (! MPFR_IS_SINGULAR (x[i])) { if (rn == 1) return mpfr_set (sum, x[i], rnd); if (h != ULONG_MAX) return mpfr_add (sum, x[h], x[i], rnd); h = i; } MPFR_RET_NEVER_GO_HERE(); } return sum_aux (sum, x, n, rnd, maxexp, rn); } }