00001
00002
00003
00004
00005
00006 #include "update/molecdyn/monomial/remez_gmp.h"
00007
00008 #define JMAX 1000 //Maximum number of iterations of Newton's approximation
00009
00010 namespace Chroma
00011 {
00012
00013
00014 RemezGMP::RemezGMP()
00015 {
00016 START_CODE();
00017
00018 alloc = false;
00019
00020 END_CODE();
00021 }
00022
00023
00024
00025 RemezGMP::RemezGMP(const Real& lower, const Real& upper, long precision)
00026 {
00027 START_CODE();
00028
00029 prec = precision;
00030 bigfloat::setDefaultPrecision(prec);
00031
00032 alloc = false;
00033 apstrt = lower;
00034 apend = upper;
00035 apwidt = apend - apstrt;
00036 QDPIO::cout << __func__ << ": approximation bounds are ["
00037 << (double)apstrt << " , " << (double)apend << "]"<< endl;
00038
00039 n = 0;
00040 d = 0;
00041
00042 tolerance = 1e-15;
00043
00044
00045 if (sizeof(Real)==sizeof(double)) {
00046 tolerance = 1e-15;
00047 } else if (sizeof(Real)==sizeof(float)) {
00048 tolerance = 1e-8;
00049 }
00050
00051 END_CODE();
00052 }
00053
00054
00055 void RemezGMP::allocate(int num_degree, int den_degree)
00056 {
00057 START_CODE();
00058
00059
00060 param.resize(num_degree+den_degree+1);
00061 roots.resize(num_degree);
00062 poles.resize(den_degree);
00063 xx.resize(num_degree+den_degree+3);
00064 mm.resize(num_degree+den_degree+2);
00065
00066 alloc = true;
00067
00068 END_CODE();
00069 }
00070
00071
00072 void RemezGMP::setBounds(const Real& lower, const Real& upper)
00073 {
00074 START_CODE();
00075
00076 apstrt = lower;
00077 apend = upper;
00078 apwidt = apend - apstrt;
00079
00080 END_CODE();
00081 }
00082
00083
00084 Real RemezGMP::generateApprox(int degree, unsigned long pnum, unsigned long pden)
00085 {
00086 return generateApprox(degree, degree, pnum, pden);
00087 }
00088
00089
00090 Real RemezGMP::generateApprox(int num_degree, int den_degree,
00091 unsigned long pnum, unsigned long pden)
00092 {
00093 START_CODE();
00094
00095
00096 if (num_degree != n || den_degree != d) allocate(num_degree,den_degree);
00097
00098 step.resize(num_degree+den_degree+2);
00099
00100 power_num = pnum;
00101 power_den = pden;
00102 spread = 1.0e37;
00103 iter = 0;
00104
00105 n = num_degree;
00106 d = den_degree;
00107 neq = n + d + 1;
00108
00109 initialGuess();
00110 stpini(step);
00111
00112 while (spread > tolerance)
00113 {
00114
00115
00116 if (iter++%100==0)
00117 QDPIO::cout << __func__ << ": Iteration " << iter-1
00118 << " spread " << spread << " delta " << delta << endl;
00119
00120 equations();
00121
00122 if (delta < tolerance)
00123 {
00124 QDPIO::cerr << __func__ << ":Delta too small, try increasing precision" << endl;;
00125 QDP_abort(1);
00126 }
00127
00128 search(step);
00129
00130 }
00131
00132 int sign;
00133 Real error = (Real)getErr(mm[0],sign);
00134 QDPIO::cout << __func__ << " Converged at " << iter << " iterations, error = " << error << endl;
00135
00136
00137 if (!root())
00138 {
00139 QDPIO::cerr << __func__ << ": Root finding failed" << endl;
00140 QDP_abort(1);
00141 }
00142
00143 END_CODE();
00144
00145
00146 return error;
00147 }
00148
00149
00150 RemezCoeff_t RemezGMP::getPFE()
00151 {
00152 START_CODE();
00153
00154 if (n!=d)
00155 {
00156 QDPIO::cerr << __func__ << ": Cannot handle case: Numerator degree neq Denominator degree" << endl;
00157 QDP_abort(1);
00158 }
00159
00160 if (!alloc)
00161 {
00162 QDPIO::cerr << __func__ << ": Approximation not yet generated" << endl;
00163 QDP_abort(1);
00164 }
00165
00166 RemezCoeff_t coeff;
00167 coeff.res.resize(n);
00168 coeff.pole.resize(d);
00169
00170 multi1d<bigfloat> r(n);
00171 multi1d<bigfloat> p(d);
00172
00173 for (int i=0; i<n; i++) r[i] = roots[i];
00174 for (int i=0; i<d; i++) p[i] = poles[i];
00175
00176
00177 pfe(r, p, norm);
00178
00179
00180 coeff.norm = (double)norm;
00181 for (int i=0; i<n; i++) coeff.res[i] = (double)r[i];
00182 for (int i=0; i<d; i++) coeff.pole[i] = (double)p[i];
00183
00184 END_CODE();
00185
00186 return coeff;
00187 }
00188
00189
00190 RemezCoeff_t RemezGMP::getIPFE()
00191 {
00192 START_CODE();
00193
00194 if (!alloc)
00195 {
00196 QDPIO::cerr << __func__ << ": Approximation not yet generated" << endl;
00197 QDP_abort(1);
00198 }
00199
00200 RemezCoeff_t coeff;
00201 coeff.res.resize(n);
00202 coeff.pole.resize(d);
00203
00204 multi1d<bigfloat> r(d);
00205 multi1d<bigfloat> p(n);
00206
00207
00208 for (int i=0; i<n; i++) {
00209 r[i] = poles[i];
00210 p[i] = roots[i];
00211 }
00212
00213
00214 pfe(r, p, (bigfloat)1l/norm);
00215
00216
00217 coeff.norm = (double)((bigfloat)1l/(norm));
00218 for (int i=0; i<n; i++) {
00219 coeff.res[i] = (double)r[i];
00220 coeff.pole[i] = (double)p[i];
00221 }
00222
00223 END_CODE();
00224
00225 return coeff;
00226 }
00227
00228
00229 void RemezGMP::initialGuess()
00230 {
00231 START_CODE();
00232
00233
00234 long ncheb = neq;
00235 bigfloat a, r;
00236
00237
00238
00239 a = ncheb;
00240 mm[0] = apstrt;
00241 for (long i = 1; i < ncheb; i++) {
00242 r = 0.5 * (1 - cos((M_PI * i)/(double) a));
00243
00244 r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
00245 mm[i] = apstrt + r * apwidt;
00246 }
00247 mm[ncheb] = apend;
00248
00249 a = 2.0 * ncheb;
00250 for (long i = 0; i <= ncheb; i++) {
00251 r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
00252
00253 r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
00254 xx[i] = apstrt + r * apwidt;
00255 }
00256
00257 END_CODE();
00258 }
00259
00260
00261 void RemezGMP::stpini(multi1d<bigfloat>& step)
00262 {
00263 START_CODE();
00264
00265 if (step.size() == 0)
00266 QDP_error_exit("%s: step not allocated", __func__);
00267
00268 xx[neq+1] = apend;
00269 delta = 0.25;
00270 step[0] = xx[0] - apstrt;
00271 for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
00272 step[neq] = step[neq-1];
00273
00274 END_CODE();
00275 }
00276
00277
00278 void RemezGMP::search(multi1d<bigfloat>& step)
00279 {
00280 START_CODE();
00281
00282 if (step.size() == 0)
00283 QDP_error_exit("%s: step not allocated", __func__);
00284
00285 bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
00286 int i, j, meq, emsign, ensign, steps;
00287
00288 meq = neq + 1;
00289 multi1d<bigfloat> yy(meq);
00290
00291 bigfloat eclose = 1.0e30;
00292 bigfloat farther = 0l;
00293
00294 j = 1;
00295 xx0 = apstrt;
00296
00297 for (i = 0; i < meq; i++) {
00298 steps = 0;
00299 xx1 = xx[i];
00300 if (i == meq-1) xx1 = apend;
00301 xm = mm[i];
00302 ym = getErr(xm,emsign);
00303 q = step[i];
00304 xn = xm + q;
00305 if (xn < xx0 || xn >= xx1) {
00306 q = -q;
00307 xn = xm;
00308 yn = ym;
00309 ensign = emsign;
00310 } else {
00311 yn = getErr(xn,ensign);
00312 if (yn < ym) {
00313 q = -q;
00314 xn = xm;
00315 yn = ym;
00316 ensign = emsign;
00317 }
00318 }
00319
00320 while(yn >= ym) {
00321 if (++steps > 10) break;
00322 ym = yn;
00323 xm = xn;
00324 emsign = ensign;
00325 a = xm + q;
00326 if (a == xm || a <= xx0 || a >= xx1) break;
00327 xn = a;
00328 yn = getErr(xn,ensign);
00329 }
00330
00331 mm[i] = xm;
00332 yy[i] = ym;
00333
00334 if (eclose > ym) eclose = ym;
00335 if (farther < ym) farther = ym;
00336
00337 xx0 = xx1;
00338 }
00339
00340 q = (farther - eclose);
00341 if (eclose != 0.0) q /= eclose;
00342 if (q >= spread) delta *= 0.5;
00343 spread = q;
00344
00345 for (i = 0; i < neq; i++) {
00346 q = yy[i+1];
00347 if (q != 0.0) q = yy[i] / q - (bigfloat)1l;
00348 else q = 0.0625;
00349 if (q > (bigfloat)0.25) q = 0.25;
00350 q *= mm[i+1] - mm[i];
00351 step[i] = q * delta;
00352 }
00353 step[neq] = step[neq-1];
00354
00355 for (i = 0; i < neq; i++) {
00356 xm = xx[i] - step[i];
00357 if (xm <= apstrt) continue;
00358 if (xm >= apend) continue;
00359 if (xm <= mm[i]) xm = (bigfloat)0.5 * (mm[i] + xx[i]);
00360 if (xm >= mm[i+1]) xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
00361 xx[i] = xm;
00362 }
00363
00364 END_CODE();
00365 }
00366
00367
00368 void RemezGMP::equations(void)
00369 {
00370 START_CODE();
00371
00372 bigfloat x, y, z;
00373 int i, j, ip;
00374 bigfloat *aa;
00375
00376 multi1d<bigfloat> AA(neq*neq);
00377 multi1d<bigfloat> BB(neq);
00378
00379 for (i = 0; i < neq; i++) {
00380 ip = neq * i;
00381 x = xx[i];
00382 y = func(x);
00383
00384 z = (bigfloat)1l;
00385 aa = &AA[ip];
00386 for (j = 0; j <= n; j++) {
00387 *aa++ = z;
00388 z *= x;
00389 }
00390
00391 z = (bigfloat)1l;
00392 for (j = 0; j < d; j++) {
00393 *aa++ = -y * z;
00394 z *= x;
00395 }
00396 BB[i] = y * z;
00397 }
00398
00399
00400 if (simq(AA, BB, param, neq))
00401 {
00402 QDPIO::cerr << __func__ << ": simq failed" << endl;
00403 QDP_abort(1);
00404 }
00405
00406 END_CODE();
00407 }
00408
00409
00410
00411 bigfloat RemezGMP::approx(const bigfloat& x)
00412 {
00413 START_CODE();
00414
00415 bigfloat yn, yd;
00416 int i;
00417
00418
00419 yn = param[n];
00420 for (i = n-1; i >= 0; i--) yn = x * yn + param[i];
00421 yd = x + param[n+d];
00422 for (i = n+d-1; i > n; i--) yd = x * yd + param[i];
00423
00424 END_CODE();
00425
00426 return(yn/yd);
00427 }
00428
00429
00430 bigfloat RemezGMP::getErr(const bigfloat& x, int& sign)
00431 {
00432 bigfloat e, f;
00433
00434 f = func(x);
00435 e = approx(x) - f;
00436 if (f != 0) e /= f;
00437 if (e < (bigfloat)0.0) {
00438 sign = -1;
00439 e = -e;
00440 }
00441 else
00442 sign = 1;
00443
00444 return(e);
00445 }
00446
00447
00448 bigfloat RemezGMP::func(const bigfloat& x)
00449 {
00450 START_CODE();
00451
00452 bigfloat y,dy,f=1l,df;
00453
00454
00455 y = (bigfloat)pow((double)x,(double)((bigfloat)1l/(bigfloat)power_den));
00456 while (abs_bf(f)>(bigfloat)1l/pow_bf((bigfloat)10,prec)) {
00457 f = pow_bf(y,power_den) - x;
00458 df = (bigfloat)power_den*pow_bf(y,power_den-1);
00459 dy = f/df;
00460 y -= dy;
00461 }
00462
00463 END_CODE();
00464
00465 return pow_bf(y,power_num);
00466 }
00467
00468
00469 int RemezGMP::simq(multi1d<bigfloat>& A, multi1d<bigfloat>& B, multi1d<bigfloat>& X, int n)
00470 {
00471 START_CODE();
00472
00473 if (A.size() == 0 || B.size() == 0)
00474 QDP_error_exit("%s: A or B not allocated", __func__);
00475
00476 int i, j, ij, ip, ipj, ipk, ipn;
00477 int idxpiv, iback;
00478 int k, kp, kp1, kpk, kpn;
00479 int nip, nkp, nm1;
00480 bigfloat em, q, rownrm, big, size, pivot, sum;
00481 bigfloat *aa;
00482
00483 multi1d<int> IPS(neq);
00484
00485 nm1 = n - 1;
00486
00487
00488 ij = 0;
00489 for (i = 0; i < n; i++) {
00490 IPS[i] = i;
00491 rownrm = 0.0;
00492 for(j = 0; j < n; j++) {
00493 q = abs_bf(A[ij]);
00494 if(rownrm < q) rownrm = q;
00495 ++ij;
00496 }
00497 if (rownrm == (bigfloat)0l)
00498 {
00499 QDPIO::cout << "simq rownrm=0\n" << endl;
00500 return(1);
00501 }
00502 X[i] = (bigfloat)1.0 / rownrm;
00503 }
00504
00505 for (k = 0; k < nm1; k++) {
00506 big = 0.0;
00507 idxpiv = 0;
00508
00509 for (i = k; i < n; i++) {
00510 ip = IPS[i];
00511 ipk = n*ip + k;
00512 size = abs_bf(A[ipk]) * X[ip];
00513 if (size > big) {
00514 big = size;
00515 idxpiv = i;
00516 }
00517 }
00518
00519 if (big == (bigfloat)0l)
00520 {
00521 QDPIO::cout << "simq big=0" << endl;
00522 return(2);
00523 }
00524 if (idxpiv != k) {
00525 j = IPS[k];
00526 IPS[k] = IPS[idxpiv];
00527 IPS[idxpiv] = j;
00528 }
00529 kp = IPS[k];
00530 kpk = n*kp + k;
00531 pivot = A[kpk];
00532 kp1 = k+1;
00533 for (i = kp1; i < n; i++) {
00534 ip = IPS[i];
00535 ipk = n*ip + k;
00536 em = -A[ipk] / pivot;
00537 A[ipk] = -em;
00538 nip = n*ip;
00539 nkp = n*kp;
00540 aa = &A[nkp+kp1];
00541 for (j = kp1; j < n; j++) {
00542 ipj = nip + j;
00543 A[ipj] = A[ipj] + em * *aa++;
00544 }
00545 }
00546 }
00547 kpn = n * IPS[n-1] + n - 1;
00548 if (A[kpn] == (bigfloat)0l)
00549 {
00550 QDPIO::cout << "simq A[kpn]=0" << endl;
00551 return(3);
00552 }
00553
00554
00555 ip = IPS[0];
00556 X[0] = B[ip];
00557 for (i = 1; i < n; i++) {
00558 ip = IPS[i];
00559 ipj = n * ip;
00560 sum = 0.0;
00561 for (j = 0; j < i; j++) {
00562 sum += A[ipj] * X[j];
00563 ++ipj;
00564 }
00565 X[i] = B[ip] - sum;
00566 }
00567
00568 ipn = n * IPS[n-1] + n - 1;
00569 X[n-1] = X[n-1] / A[ipn];
00570
00571 for (iback = 1; iback < n; iback++) {
00572
00573 i = nm1 - iback;
00574 ip = IPS[i];
00575 nip = n*ip;
00576 sum = 0.0;
00577 aa = &A[nip+i+1];
00578 for (j= i + 1; j < n; j++)
00579 sum += *aa++ * X[j];
00580 X[i] = (X[i] - sum) / A[nip+i];
00581 }
00582
00583 END_CODE();
00584
00585 return(0);
00586 }
00587
00588
00589 int RemezGMP::root()
00590 {
00591 START_CODE();
00592
00593 long i,j;
00594 bigfloat x,dx=0.05;
00595 bigfloat upper=1, lower=-100000;
00596 bigfloat tol = 1e-20;
00597
00598 multi1d<bigfloat> poly(neq+1);
00599
00600
00601 for (i=0; i<=n; i++) poly[i] = param[i];
00602 for (i=n-1; i>=0; i--) {
00603 roots[i] = rtnewt(poly,i+1,lower,upper,tol);
00604 if (roots[i] == 0.0)
00605 {
00606 QDPIO::cerr << __func__ << ": Failure to converge on root" << endl;
00607 return 0;
00608 }
00609 poly[0] = -poly[0]/roots[i];
00610 for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/roots[i];
00611 }
00612
00613
00614 poly[d] = 1l;
00615 for (i=0; i<d; i++) poly[i] = param[n+1+i];
00616 for (i=d-1; i>=0; i--) {
00617 poles[i]=rtnewt(poly,i+1,lower,upper,tol);
00618 if (poles[i] == 0.0) {
00619 QDPIO::cerr << __func__ << ": Failure to converge on root" << endl;
00620 return 0;
00621 }
00622 poly[0] = -poly[0]/poles[i];
00623 for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/poles[i];
00624 }
00625
00626 norm = param[n];
00627 QDPIO::cout << __func__ << ": Normalisation constant is " << (double)norm << endl;
00628 for (i=0; i<n; i++)
00629 QDPIO::cout << "root[" << i << "] = " << (double)roots[i] << endl;
00630 for (i=0; i<d; i++)
00631 QDPIO::cout << "pole[" << i << "] = " << (double)poles[i] << endl;
00632
00633 END_CODE();
00634
00635 return 1;
00636 }
00637
00638
00639 bigfloat RemezGMP::polyEval(const bigfloat& x, const multi1d<bigfloat>& poly, long size)
00640 {
00641 bigfloat f = poly[size];
00642 for (int i=size-1; i>=0; i--) f = f*x + poly[i];
00643 return f;
00644 }
00645
00646
00647 bigfloat RemezGMP::polyDiff(const bigfloat& x, const multi1d<bigfloat>& poly, long size)
00648 {
00649 bigfloat df = (bigfloat)size*poly[size];
00650 for (int i=size-1; i>0; i--)
00651 df = df*x + (bigfloat)i*poly[i];
00652 return df;
00653 }
00654
00655
00656 bigfloat RemezGMP::rtnewt(const multi1d<bigfloat>& poly, long i,
00657 const bigfloat& x1, const bigfloat& x2, const bigfloat& xacc)
00658 {
00659 START_CODE();
00660
00661 int j;
00662 bigfloat df, dx, f, rtn;
00663
00664 rtn=(bigfloat)0.5*(x1+x2);
00665 for (j=1; j<=JMAX;j++)
00666 {
00667 f = polyEval(rtn, poly, i);
00668 df = polyDiff(rtn, poly, i);
00669 dx = f/df;
00670 rtn -= dx;
00671 if ((x1-rtn)*(rtn-x2) < (bigfloat)0.0)
00672 QDPIO::cerr << __func__ << ": Jumped out of brackets in rtnewt" << endl;
00673 if (abs_bf(dx) < xacc) return rtn;
00674 }
00675 QDPIO::cerr << __func__ << ": Maximum number of iterations exceeded in rtnewt" << endl;
00676
00677 END_CODE();
00678 return 0.0;
00679 }
00680
00681
00682
00683
00684 void RemezGMP::pfe(multi1d<bigfloat>& res, multi1d<bigfloat>& poles, const bigfloat& norm)
00685 {
00686 START_CODE();
00687
00688
00689
00690 if (res.size() == 0 || poles.size() == 0)
00691 QDP_error_exit("%s: res or poles not allocated", __func__);
00692
00693 int i,j,small;
00694 bigfloat temp;
00695 multi1d<bigfloat> numerator(n);
00696 multi1d<bigfloat> denominator(d);
00697
00698
00699 for (i=1; i<n; i++) {
00700 numerator[i] = 0l;
00701 denominator[i] = 0l;
00702 }
00703 numerator[0]=1l;
00704 denominator[0]=1l;
00705
00706 for (j=0; j<n; j++) {
00707 for (i=n-1; i>=0; i--) {
00708 numerator[i] *= -res[j];
00709 denominator[i] *= -poles[j];
00710 if (i>0) {
00711 numerator[i] += numerator[i-1];
00712 denominator[i] += denominator[i-1];
00713 }
00714 }
00715 }
00716
00717
00718
00719 for (i=0; i<n; i++) numerator[i] -= denominator[i];
00720
00721
00722
00723 for (i=0; i<n; i++) {
00724 res[i] = 0l;
00725 for (j=n-1; j>=0; j--) {
00726 res[i] = poles[i]*res[i]+numerator[j];
00727 }
00728 for (j=n-1; j>=0; j--) {
00729 if (i!=j) res[i] /= poles[i]-poles[j];
00730 }
00731 res[i] *= norm;
00732 }
00733
00734
00735 j = 0;
00736 for (i=0; i<n; i++) poles[i] = -poles[i];
00737
00738
00739 for (j=0; j<n; j++)
00740 {
00741 small = j;
00742 for (i=j+1; i<n; i++) {
00743 if (poles[i] < poles[small]) small = i;
00744 }
00745 if (small != j) {
00746 temp = poles[small];
00747 poles[small] = poles[j];
00748 poles[j] = temp;
00749 temp = res[small];
00750 res[small] = res[j];
00751 res[j] = temp;
00752 }
00753 QDPIO::cout << __func__ << ": Residue = " << (double)res[j] << " Pole = " << (double)poles[j] << endl;
00754 }
00755
00756
00757
00758 END_CODE();
00759 }
00760
00761
00762
00763 Real RemezGMP::evalPFE(const Real& x, const RemezCoeff_t& coeff)
00764 {
00765 if ((coeff.res.size() != coeff.pole.size()) || coeff.res.size() == 0)
00766 {
00767 QDPIO::cerr << __func__ << ": invalid res and pole" << endl;
00768 QDP_abort(1);
00769 }
00770
00771 Real f = coeff.norm;
00772 for (int i=0; i < coeff.res.size(); ++i)
00773 f += coeff.res[i] / (x + coeff.pole[i]);
00774
00775 return f;
00776 }
00777
00778 }
00779