00001
00002
00003
00004
00005
00006
00007 #include <qdp-lapack.h>
00008
00009
00010
00011 #include "actions/ferm/invert/inv_eigcg2_array.h"
00012
00013 #include "actions/ferm/invert/containers.h"
00014 #include "actions/ferm/invert/norm_gram_schm.h"
00015
00016
00017 #define DEBUG_FINAL
00018
00019
00020
00021 namespace Chroma
00022 {
00023 using namespace LinAlg;
00024
00025
00026 namespace InvEigCG2ArrayEnv
00027 {
00028
00029
00030
00031 template<typename T>
00032 void SubSpaceMatrix_T(LinAlg::Matrix<DComplex>& H,
00033 const LinearOperatorArray<T>& A,
00034 const multi2d<T>& evec,
00035 int Nvecs)
00036 {
00037 H.N = Nvecs;
00038 if(Nvecs>evec.size2()){
00039 H.N = evec.size2();
00040 }
00041 if(H.N>H.size()){
00042 QDPIO::cerr<<"OOPS! your matrix can't take this many matrix elements\n";
00043 exit(1);
00044 }
00045
00046 H.mat = 0.0;
00047
00048 multi1d<T> Ap;
00049 for(int i(0);i<H.N;i++)
00050 {
00051 A(Ap,evec[i],PLUS);
00052 for(int j(i);j<H.N;j++)
00053 {
00054 H(j,i) = innerProduct(evec[j], Ap, A.subset());
00055
00056 H(i,j) = conj(H(j,i));
00057 if(i==j) H(i,j) = real(H(i,j));
00058 }
00059 }
00060 }
00061
00062
00063 template<typename T>
00064 SystemSolverResults_t InvEigCG2_T(const LinearOperatorArray<T>& A,
00065 multi1d<T>& x,
00066 const multi1d<T>& b,
00067 multi1d<Double>& eval,
00068 multi2d<T>& evec,
00069 int Neig,
00070 int Nmax,
00071 const Real& RsdCG, int MaxCG)
00072 {
00073 START_CODE();
00074
00075 FlopCounter flopcount;
00076 flopcount.reset();
00077 StopWatch swatch;
00078 swatch.reset();
00079 swatch.start();
00080
00081 const int Ls = A.size();
00082
00083 SystemSolverResults_t res;
00084
00085 multi1d<T> p;
00086 multi1d<T> Ap;
00087 multi1d<T> r;
00088
00089
00090 Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
00091 Double alphaprev, alpha,pAp;
00092 Real beta;
00093 Double betaprev ;
00094 Double r_dot_z, r_dot_z_old;
00095
00096 int k = 0;
00097 A(Ap,x,PLUS);
00098 for(int l=0; l < Ls; ++l)
00099 r[l][A.subset()] = b[l] - Ap[l];
00100
00101
00102 #if 1
00103 QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size2() << endl;
00104 QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_dot_z << endl;
00105 #endif
00106 Matrix<DComplex> H(Nmax);
00107 VectorArrays<T> vec(Nmax,Ls);
00108
00109
00110 beta=0.0;
00111 alpha = 1.0;
00112 multi1d<T> Ap_prev;
00113 multi1d<T> tt;
00114
00115
00116
00117 while(toBool(r_dot_z>rsd_sq))
00118 {
00119
00120
00121
00122
00123 r_dot_z_old = r_dot_z;
00124 r_dot_z = norm2(r,A.subset());
00125 Double inv_sqrt_r_dot_z = 1.0/sqrt(r_dot_z);
00126 k++;
00127 if(k==1){
00128
00129 for(int l=0; l < Ls; ++l)
00130 p[l][A.subset()] = r[l];
00131 }
00132 else{
00133 betaprev = beta;
00134 beta = r_dot_z/r_dot_z_old;
00135
00136 for(int l=0; l < Ls; ++l)
00137 p[l][A.subset()] = r[l] + beta*p[l];
00138 }
00139
00140 if((Neig>0)&& (H.N == Nmax))
00141 for(int l=0; l < Ls; ++l)
00142 Ap_prev[l][A.subset()] = Ap[l];
00143
00144 A(Ap,p,PLUS);
00145
00146
00147 if(Neig>0)
00148 {
00149 if(k>1)
00150 H(H.N-1,H.N-1) = 1/alpha + betaprev/alphaprev;
00151 if(vec.N==Nmax)
00152 {
00153 QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<endl;
00154 #ifdef DEBUG
00155 {
00156 stringstream tag;
00157 tag<<"H"<<k;
00158 OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
00159 }
00160 {
00161 Matrix<DComplex> tmp(Nmax);
00162 SubSpaceMatrix(tmp,A,vec.vec,vec.N);
00163 stringstream tag;
00164 tag<<"H"<<k<<"ex";
00165 OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
00166 }
00167 #endif
00168 multi2d<DComplex> Hevecs(H.mat);
00169 multi1d<Double> Heval;
00170 char V = 'V'; char U = 'U';
00171 QDPLapack::zheev(V,U,Nmax,Hevecs,Heval);
00172 #ifdef DEBUG
00173 {
00174 stringstream tag;
00175 tag<<"Hevecs"<<k;
00176 OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
00177 }
00178 for(int i(0);i<Nmax;i++)
00179 QDPIO::cout<<" eignvalue: "<<Heval[i]<<endl;
00180 #endif
00181 multi2d<DComplex> Hevecs_old(H.mat);
00182 multi1d<Double> Heval_old;
00183 QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
00184 for(int i(0);i<Nmax;i++)
00185 Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0;
00186
00187
00188 H.N = Neig + Neig;
00189
00190 for(int i(Neig);i<2*Neig;i++)
00191 for(int j(0);j<Nmax;j++)
00192 Hevecs(i,j) = Hevecs_old(i-Neig,j);
00193
00194
00195
00196
00197 multi1d<DComplex> TAU;
00198 QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
00199 char R = 'R'; char L = 'L'; char N ='N'; char C = 'C';
00200 multi2d<DComplex> Htmp = H.mat;
00201 QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
00202 QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
00203
00204 QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
00205 #ifdef DEBUG
00206 {
00207 stringstream tag;
00208 tag<<"Htmp"<<k;
00209 OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00210 }
00211 #endif
00212 for(int i(Neig); i< 2*Neig;i++ )
00213 for(int j(2*Neig); j<Nmax; j++)
00214 Htmp(i,j) =0.0;
00215
00216 #ifdef DEBUG
00217 {
00218 stringstream tag;
00219 tag<<"HtmpBeforeZUM"<<k;
00220 OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00221 }
00222 #endif
00223 QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
00224 #ifdef DEBUG
00225 {
00226 stringstream tag;
00227 tag<<"HtmpAfeterZUM"<<k;
00228 OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00229 }
00230 #endif
00231
00232 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
00233 multi2d<T> tt_vec(2*Neig,Ls);
00234 for(int i(0);i<2*Neig;i++){
00235 for(int l=0; l < Ls; ++l)
00236 tt_vec[i][l][A.subset()] = zero;
00237 for(int j(0);j<Nmax;j++)
00238 for(int l=0; l < Ls; ++l)
00239 tt_vec[i][l][A.subset()] +=Htmp(i,j)*vec[j][l];
00240 }
00241 for(int i(0);i<2*Neig;i++)
00242 for(int l=0; l < Ls; ++l)
00243 vec[i][l][A.subset()] = tt_vec[i][l];
00244 #else
00245
00246
00247
00248
00249
00250 #endif
00251 vec.N = 2*Neig;
00252
00253 H.mat = 0.0;
00254 for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
00255
00256
00257 for(int l=0; l < Ls; ++l)
00258 tt[l] = Ap[l] - beta*Ap_prev[l];
00259
00260 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
00261 for (int i=0;i<2*Neig;i++){
00262 H(2*Neig,i)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z;
00263 H(i,2*Neig)=conj(H(2*Neig,i));
00264
00265
00266 }
00267 #else
00268
00269
00270
00271
00272
00273
00274 #endif
00275 }
00276 else{
00277 if(k>1)
00278 H(H.N-1,H.N) = H(H.N,H.N-1) = -sqrt(beta)/alpha;
00279 }
00280 H.N++;
00281
00282 vec.NormalizeAndAddVector(r,inv_sqrt_r_dot_z,A.subset());
00283 }
00284
00285 pAp = innerProductReal(p,Ap,A.subset());
00286 alphaprev = alpha;
00287 alpha = r_dot_z/pAp;
00288 for(int l=0; l < Ls; ++l)
00289 x[l][A.subset()] += alpha*p[l];
00290 for(int l=0; l < Ls; ++l)
00291 r[l][A.subset()] -= alpha*Ap[l];
00292
00293
00294
00295 if(k>MaxCG){
00296 res.n_count = k;
00297 res.resid = sqrt(r_dot_z);
00298 QDP_error_exit("too many CG iterations: count = %d", res.n_count);
00299 END_CODE();
00300 return res;
00301 }
00302 #if 1
00303 QDPIO::cout << "InvEigCG2: k = " << k ;
00304 QDPIO::cout << " r_dot_z = " << r_dot_z << endl;
00305 #endif
00306 }
00307
00308 if(Neig>0)
00309 {
00310 evec.resize(Neig,Ls);
00311 eval.resize(Neig);
00312
00313 #define USE_LAST_VECTORS
00314 #ifdef USE_LAST_VECTORS
00315
00316 multi2d<DComplex> Hevecs(H.mat);
00317 multi1d<Double> Heval;
00318 char V = 'V'; char U = 'U';
00319 QDPLapack::zheev(V,U,H.N-1,Hevecs,Heval);
00320
00321 for(int i(0);i<Neig;i++){
00322 for(int l=0; l < Ls; ++l)
00323 evec[i][l][A.subset()] = zero;
00324 eval[i] = Heval[i];
00325 for(int j(0);j<H.N-1;j++)
00326 for(int l=0; l < Ls; ++l)
00327 evec[i][l][A.subset()] +=Hevecs(i,j)*vec[j][l];
00328 }
00329 #else
00330 for(int i(0);i<Neig;i++){
00331 evec[i][A.subset()] = vec[i];
00332 eval[i] = real(H(i,i));
00333 }
00334 #endif
00335 }
00336 res.n_count = k;
00337 res.resid = sqrt(r_dot_z);
00338 swatch.stop();
00339 QDPIO::cout << "InvEigCG2: k = " << k << endl;
00340 flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
00341 END_CODE();
00342 return res;
00343 }
00344
00345
00346 template<typename T>
00347 SystemSolverResults_t old_InvEigCG2_T(const LinearOperatorArray<T>& A,
00348 multi1d<T>& x,
00349 const multi1d<T>& b,
00350 multi1d<Double>& eval,
00351 multi2d<T>& evec,
00352 int Neig,
00353 int Nmax,
00354 const Real& RsdCG, int MaxCG)
00355 {
00356 START_CODE();
00357
00358 FlopCounter flopcount;
00359 flopcount.reset();
00360 StopWatch swatch;
00361 swatch.reset();
00362 swatch.start();
00363
00364 const int Ls = A.size();
00365
00366 SystemSolverResults_t res;
00367
00368 multi1d<T> p;
00369 multi1d<T> Ap;
00370 multi1d<T> r,z;
00371
00372 Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
00373 Double alphaprev, alpha,pAp;
00374 Real beta;
00375 Double r_dot_z, r_dot_z_old;
00376
00377
00378
00379 int k = 0;
00380 A(Ap,x,PLUS);
00381 r[A.subset()] = b - Ap;
00382 Double r_norm2 = norm2(r,A.subset());
00383
00384 #if 1
00385 QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size2() << endl;
00386 QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2 << endl;
00387 #endif
00388 Real inorm(Real(1.0/sqrt(r_norm2)));
00389 bool FindEvals = (Neig>0);
00390 int tr;
00391 bool from_restart;
00392 Matrix<DComplex> H(Nmax);
00393 VectorArrays<T> vec(Nmax,Ls);
00394
00395 vec.AddVectors(evec,A.subset());
00396
00397 p[A.subset()] = inorm*r;
00398 vec.AddOrReplaceVector(p,A.subset());
00399
00400 normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset());
00401 normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset());
00402 SubSpaceMatrix(H,A,vec.vec,vec.N);
00403 from_restart = true;
00404 tr = H.N - 1;
00405
00406
00407 Double betaprev;
00408 beta=0.0;
00409 alpha = 1.0;
00410
00411
00412 while(toBool(r_norm2>rsd_sq))
00413 {
00414
00415 z[A.subset()]=r;
00416
00417 r_dot_z = innerProductReal(r,z,A.subset());
00418 k++;
00419 betaprev = beta;
00420 if(k==1){
00421 p[A.subset()] = z;
00422 H.N++;
00423 }
00424 else{
00425 beta = r_dot_z/r_dot_z_old;
00426 p[A.subset()] = z + beta*p;
00427
00428
00429 if(FindEvals){
00430 if(!((from_restart)&&(H.N == tr+1))){
00431 if(!from_restart){
00432 H(H.N-2,H.N-2) = 1/alpha + betaprev/alphaprev;
00433 }
00434 from_restart = false;
00435 H(H.N-1,H.N-2) = -sqrt(beta)/alpha;
00436 H(H.N-2,H.N-1) = -sqrt(beta)/alpha;
00437 }
00438 H.N++;
00439 }
00440
00441 }
00442 A(Ap,p,PLUS);
00443 pAp = innerProductReal(p,Ap,A.subset());
00444
00445 alphaprev = alpha;
00446 alpha = r_dot_z/pAp;
00447 x[A.subset()] += alpha*p;
00448 r[A.subset()] -= alpha*Ap;
00449 r_norm2 = norm2(r,A.subset());
00450 r_dot_z_old = r_dot_z;
00451
00452
00453
00454
00455 if(FindEvals){
00456 if (vec.N==Nmax){
00457
00458 QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<endl;
00459 H(Nmax-1,Nmax-1) = 1/alpha + beta/alphaprev;
00460
00461 #ifdef DEBUG
00462 {
00463 stringstream tag;
00464 tag<<"H"<<k;
00465 OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
00466 }
00467 {
00468 Matrix<DComplex> tmp(Nmax);
00469 SubSpaceMatrix(tmp,A,vec.vec,vec.N);
00470 stringstream tag;
00471 tag<<"H"<<k<<"ex";
00472 OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
00473 }
00474 #endif
00475
00476 multi2d<DComplex> Hevecs(H.mat);
00477 multi1d<Double> Heval;
00478 char V = 'V'; char U = 'U';
00479 QDPLapack::zheev(V,U,Hevecs,Heval);
00480 multi2d<DComplex> Hevecs_old(H.mat);
00481
00482 #ifdef DEBUG
00483 {
00484 multi1d<T> tt_vec(vec.size());
00485 for(int i(0);i<Nmax;i++){
00486 tt_vec[i][A.subset()] = zero;
00487 for(int j(0);j<Nmax;j++)
00488 tt_vec[i][A.subset()] +=Hevecs(i,j)*vec[j];
00489 }
00490
00491
00492 multi1d<T> Av;
00493 for(int i(0);i<Nmax;i++){
00494 A(Av,tt_vec[i],PLUS);
00495 DComplex rq = innerProduct(tt_vec[i],Av,A.subset());
00496 Av[A.subset()] -= Heval[i]*tt_vec[i];
00497 Double tt = sqrt(norm2(Av,A.subset()));
00498 QDPIO::cout<<"1 error eigenvector["<<i<<"] = "<<tt<<" ";
00499 tt = sqrt(norm2(tt_vec[i],A.subset()));
00500 QDPIO::cout<<" --- eval = "<<Heval[i]<<" ";
00501 QDPIO::cout<<" --- rq = "<<real(rq)<<" ";
00502 QDPIO::cout<<"--- norm = "<<tt<<endl ;
00503 }
00504 }
00505 #endif
00506 multi1d<Double> Heval_old;
00507 QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
00508 for(int i(0);i<Nmax;i++)
00509 Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0;
00510 #ifdef DEBUG
00511 {
00512 stringstream tag;
00513 tag<<"Hevecs_old"<<k;
00514 OctavePrintOut(Hevecs_old,Nmax,tag.str(),"Hmatrix.m");
00515 }
00516
00517 #endif
00518 tr = Neig + Neig;
00519
00520 for(int i(Neig);i<tr;i++)
00521 for(int j(0);j<Nmax;j++)
00522 Hevecs(i,j) = Hevecs_old(i-Neig,j);
00523 #ifdef DEBUG
00524 {
00525 stringstream tag;
00526 tag<<"Hevecs"<<k;
00527 OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
00528 }
00529 #endif
00530
00531
00532
00533
00534 multi1d<DComplex> TAU;
00535 QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
00536 char R = 'R'; char L = 'L'; char N ='N'; char C = 'C';
00537 multi2d<DComplex> Htmp = H.mat;
00538 QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
00539 QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
00540
00541
00542 #ifdef DEBUG
00543 {
00544 stringstream tag;
00545 tag<<"Htmp"<<k;
00546 OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00547 }
00548 #endif
00549 QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
00550 for(int i(Neig); i< 2*Neig;i++ )
00551 for(int j(2*Neig); j<Nmax; j++)
00552 Htmp(i,j) =0.0;
00553 #ifdef DEBUG
00554 {
00555 stringstream tag;
00556 tag<<"evecstmp"<<k;
00557 OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00558 }
00559 #endif
00560 QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
00561 #ifdef DEBUG
00562 {
00563 stringstream tag;
00564 tag<<"Yevecstmp"<<k;
00565 OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00566 }
00567 #endif
00568
00569 multi1d<T> tt_vec = vec.vec;
00570 for(int i(0);i<2*Neig;i++){
00571 vec[i][A.subset()] = zero;
00572 for(int j(0);j<Nmax;j++)
00573 vec[i][A.subset()] +=Htmp(i,j)*tt_vec[j];
00574 }
00575
00576 #ifdef DEBUG
00577
00578 {
00579 multi1d<T> Av;
00580 for(int i(0);i<2*Neig;i++){
00581 A(Av,vec[i],PLUS);
00582 DComplex rq = innerProduct(vec[i],Av,A.subset());
00583 Av[A.subset()] -= Heval[i]*vec[i];
00584 Double tt = sqrt(norm2(Av,A.subset()));
00585 QDPIO::cout<<"error eigenvector["<<i<<"] = "<<tt<<" ";
00586 tt = sqrt(norm2(vec[i],A.subset()));
00587 QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
00588 QDPIO::cout<<"--- norm = "<<tt<<endl ;
00589 }
00590
00591 }
00592 #endif
00593 H.mat = 0.0;
00594 for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
00595
00596 multi1d<T> Ar;
00597
00598 A(Ar,r,PLUS);
00599 Ar /= sqrt(r_norm2);
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611 for (int i=0;i<2*Neig;i++){
00612 H(2*Neig,i) = innerProduct(vec[i], Ar, A.subset());
00613 H(i,2*Neig) = conj(H(2*Neig,i));
00614 }
00615 H(2*Neig,2*Neig) = innerProduct(r, Ar, A.subset())/sqrt(r_norm2);
00616
00617 H.N = 2*Neig + 1 ;
00618 from_restart = true;
00619 vec.N = 2*Neig;
00620 #ifdef DEBUG
00621 {
00622 stringstream tag;
00623 tag<<"finalH"<<k;
00624 OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
00625 }
00626 #endif
00627 }
00628
00629 Double inorm = 1.0/sqrt(r_norm2);
00630 vec.NormalizeAndAddVector(r,inorm,A.subset());
00631
00632 }
00633
00634
00635 if(k>MaxCG){
00636 res.n_count = k;
00637 res.resid = sqrt(r_norm2);
00638 QDP_error_exit("too many CG iterations: count = %d", res.n_count);
00639 END_CODE();
00640 return res;
00641 }
00642 #if 1
00643 QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2;
00644 QDPIO::cout << " r_dot_z = " << r_dot_z << endl;
00645 #endif
00646 }
00647 res.n_count = k;
00648 res.resid = sqrt(r_norm2);
00649 if(FindEvals)
00650 {
00651
00652
00653
00654 normGramSchmidt(vec.vec,0,2*Neig,A.subset());
00655 normGramSchmidt(vec.vec,0,2*Neig,A.subset());
00656 Matrix<DComplex> Htmp(2*Neig);
00657 SubSpaceMatrix(Htmp,A,vec.vec,2*Neig);
00658
00659 char V = 'V'; char U = 'U';
00660 multi1d<Double> tt_eval;
00661 QDPLapack::zheev(V,U,Htmp.mat,tt_eval);
00662 evec.resize(Neig,Ls);
00663 eval.resize(Neig);
00664 for(int i(0);i<Neig;i++){
00665 evec[i][A.subset()] = zero;
00666 eval[i] = tt_eval[i];
00667 for(int j(0);j<2*Neig;j++)
00668 evec[i][A.subset()] += Htmp(i,j)*vec[j];
00669 }
00670
00671
00672
00673 #ifdef DEBUG_FINAL
00674
00675 {
00676 multi1d<T> Av;
00677 for(int i(0);i<Neig;i++)
00678 {
00679 A(Av,evec[i],PLUS);
00680 DComplex rq = innerProduct(evec[i],Av,A.subset());
00681 Av[A.subset()] -= eval[i]*evec[i];
00682 Double tt = sqrt(norm2(Av,A.subset()));
00683 QDPIO::cout<<"FINAL: error eigenvector["<<i<<"] = "<<tt<<" ";
00684 tt = sqrt(norm2(evec[i],A.subset()));
00685 QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
00686 QDPIO::cout<<"--- norm = "<<tt<<endl ;
00687 }
00688
00689 }
00690 #endif
00691 }
00692
00693 res.n_count = k;
00694 res.resid = sqrt(r_norm2);
00695 swatch.stop();
00696 QDPIO::cout << "InvEigCG2: k = " << k << endl;
00697 flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
00698 END_CODE();
00699 return res;
00700 }
00701
00702
00703 template<typename T>
00704 SystemSolverResults_t vecPrecondCG_T(const LinearOperatorArray<T>& A,
00705 multi1d<T>& x,
00706 const multi1d<T>& b,
00707 const multi1d<Double>& eval,
00708 const multi2d<T>& evec,
00709 int startV, int endV,
00710 const Real& RsdCG, int MaxCG)
00711 {
00712 START_CODE();
00713
00714 FlopCounter flopcount;
00715 flopcount.reset();
00716 StopWatch swatch;
00717 swatch.reset();
00718 swatch.start();
00719
00720 const int Ls = A.size();
00721
00722 if(endV>eval.size()){
00723 QDP_error_exit("vPrecondCG: not enought evecs eval.size()=%d",eval.size());
00724 }
00725
00726 SystemSolverResults_t res;
00727
00728 multi1d<T> p;
00729 multi1d<T> Ap;
00730 multi1d<T> r,z;
00731
00732 Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
00733 Double alpha,pAp;
00734 Real beta;
00735 Double r_dot_z, r_dot_z_old;
00736
00737
00738
00739 int k = 0;
00740 A(Ap,x,PLUS);
00741 for(int l=0; l < Ls; ++l)
00742 r[l][A.subset()] = b[l] - Ap[l];
00743 Double r_norm2 = norm2(r,A.subset());
00744
00745 #if 1
00746 QDPIO::cout << "vecPrecondCG: k = " << k << " res^2 = " << r_norm2 << endl;
00747 #endif
00748
00749
00750 while(toBool(r_norm2>rsd_sq)){
00751
00752 for(int l=0; l < Ls; ++l)
00753 z[l][A.subset()] = r[l];
00754
00755 for(int i(startV);i<endV;i++){
00756 DComplex d = innerProduct(evec[i],r,A.subset());
00757
00758 for(int l=0; l < Ls; ++l)
00759 z[l][A.subset()] += (1.0/eval[i]-1.0)*d*evec[i][l];
00760 }
00761
00762
00763 r_dot_z = innerProductReal(r,z,A.subset());
00764 k++;
00765 if(k==1){
00766 for(int l=0; l < Ls; ++l)
00767 p[l][A.subset()] = z[l];
00768 }
00769 else{
00770 beta = r_dot_z/r_dot_z_old;
00771 for(int l=0; l < Ls; ++l)
00772 p[l][A.subset()] = z[l] + beta*p[l];
00773 }
00774
00775 A(Ap,p,PLUS);
00776 pAp = innerProductReal(p,Ap,A.subset());
00777
00778 alpha = r_dot_z/pAp;
00779 for(int l=0; l < Ls; ++l)
00780 x[l][A.subset()] += alpha*p[l];
00781 for(int l=0; l < Ls; ++l)
00782 r[l][A.subset()] -= alpha*Ap[l];
00783 r_norm2 = norm2(r,A.subset());
00784 r_dot_z_old = r_dot_z;
00785
00786 if(k>MaxCG){
00787 res.n_count = k;
00788 QDP_error_exit("too many CG iterations: count = %d", res.n_count);
00789 return res;
00790 }
00791 #if 1
00792 QDPIO::cout << "vecPrecondCG: k = " << k << " res^2 = " << r_norm2;
00793 QDPIO::cout << " r_dot_z = " << r_dot_z << endl;
00794 #endif
00795 }
00796
00797 res.n_count = k;
00798 res.resid = sqrt(r_norm2);
00799 swatch.stop();
00800 QDPIO::cout << "vPreconfCG: k = " << k << endl;
00801 flopcount.report("vPrecondCG", swatch.getTimeInSeconds());
00802 END_CODE();
00803 return res;
00804 }
00805
00806 template<typename T>
00807 void InitGuess_T(const LinearOperatorArray<T>& A,
00808 multi1d<T>& x,
00809 const multi1d<T>& b,
00810 const multi1d<Double>& eval,
00811 const multi2d<T>& evec,
00812 int& n_count)
00813 {
00814 int N = evec.size2();
00815 InitGuess(A,x,b,eval,evec,N,n_count);
00816 }
00817
00818 template<typename T>
00819 void InitGuess_T(const LinearOperatorArray<T>& A,
00820 multi1d<T>& x,
00821 const multi1d<T>& b,
00822 const multi1d<Double>& eval,
00823 const multi2d<T>& evec,
00824 int N,
00825 int& n_count)
00826 {
00827 multi1d<T> p;
00828 multi1d<T> Ap;
00829 multi1d<T> r;
00830
00831 const int Ls = A.size();
00832
00833 StopWatch snoop;
00834 snoop.reset();
00835 snoop.start();
00836
00837 A(Ap,x,PLUS);
00838 for(int l=0; l < Ls; ++l)
00839 r[l][A.subset()] = b[l] - Ap[l];
00840
00841
00842 for(int i(0);i<N;i++)
00843 {
00844 DComplex d = innerProduct(evec[i],r,A.subset());
00845 for(int l=0; l < Ls; ++l)
00846 x[l][A.subset()] += (d/eval[i])*evec[i][l];
00847
00848
00849 }
00850
00851 snoop.stop();
00852 QDPIO::cout << "InitGuess: time = "
00853 << snoop.getTimeInSeconds()
00854 << " secs" << endl;
00855
00856 n_count = 1;
00857 }
00858
00859
00860
00861
00862
00863
00864 void SubSpaceMatrix(LinAlg::Matrix<DComplex>& H,
00865 const LinearOperatorArray<LatticeFermionF>& A,
00866 const multi2d<LatticeFermionF>& evec,
00867 int Nvecs)
00868 {
00869 SubSpaceMatrix_T(H, A, evec, Nvecs);
00870 }
00871
00872 SystemSolverResults_t InvEigCG2(const LinearOperatorArray<LatticeFermionF>& A,
00873 multi1d<LatticeFermionF>& x,
00874 const multi1d<LatticeFermionF>& b,
00875 multi1d<Double>& eval,
00876 multi2d<LatticeFermionF>& evec,
00877 int Neig,
00878 int Nmax,
00879 const Real& RsdCG, int MaxCG)
00880 {
00881 InvEigCG2_T(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG);
00882 }
00883
00884 SystemSolverResults_t vecPrecondCG(const LinearOperatorArray<LatticeFermionF>& A,
00885 multi1d<LatticeFermionF>& x,
00886 const multi1d<LatticeFermionF>& b,
00887 const multi1d<Double>& eval,
00888 const multi2d<LatticeFermionF>& evec,
00889 int startV, int endV,
00890 const Real& RsdCG, int MaxCG)
00891 {
00892 vecPrecondCG_T(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
00893 }
00894
00895 void InitGuess(const LinearOperatorArray<LatticeFermionF>& A,
00896 multi1d<LatticeFermionF>& x,
00897 const multi1d<LatticeFermionF>& b,
00898 const multi1d<Double>& eval,
00899 const multi2d<LatticeFermionF>& evec,
00900 int& n_count)
00901 {
00902 InitGuess_T(A, x, b, eval, evec, n_count);
00903 }
00904
00905 void InitGuess(const LinearOperatorArray<LatticeFermionF>& A,
00906 multi1d<LatticeFermionF>& x,
00907 const multi1d<LatticeFermionF>& b,
00908 const multi1d<Double>& eval,
00909 const multi2d<LatticeFermionF>& evec,
00910 int N,
00911 int& n_count)
00912 {
00913 InitGuess_T(A, x, b, eval, evec, N, n_count);
00914 }
00915
00916
00917
00918 void SubSpaceMatrix(LinAlg::Matrix<DComplex>& H,
00919 const LinearOperatorArray<LatticeFermionD>& A,
00920 const multi2d<LatticeFermionD>& evec,
00921 int Nvecs)
00922 {
00923 SubSpaceMatrix_T(H, A, evec, Nvecs);
00924 }
00925
00926 SystemSolverResults_t InvEigCG2(const LinearOperatorArray<LatticeFermionD>& A,
00927 multi1d<LatticeFermionD>& x,
00928 const multi1d<LatticeFermionD>& b,
00929 multi1d<Double>& eval,
00930 multi2d<LatticeFermionD>& evec,
00931 int Neig,
00932 int Nmax,
00933 const Real& RsdCG, int MaxCG)
00934 {
00935 InvEigCG2_T(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG);
00936 }
00937
00938 SystemSolverResults_t vecPrecondCG(const LinearOperatorArray<LatticeFermionD>& A,
00939 multi1d<LatticeFermionD>& x,
00940 const multi1d<LatticeFermionD>& b,
00941 const multi1d<Double>& eval,
00942 const multi2d<LatticeFermionD>& evec,
00943 int startV, int endV,
00944 const Real& RsdCG, int MaxCG)
00945 {
00946 vecPrecondCG_T(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
00947 }
00948
00949 void InitGuess(const LinearOperatorArray<LatticeFermionD>& A,
00950 multi1d<LatticeFermionD>& x,
00951 const multi1d<LatticeFermionD>& b,
00952 const multi1d<Double>& eval,
00953 const multi2d<LatticeFermionD>& evec,
00954 int& n_count)
00955 {
00956 InitGuess_T(A, x, b, eval, evec, n_count);
00957 }
00958
00959 void InitGuess(const LinearOperatorArray<LatticeFermionD>& A,
00960 multi1d<LatticeFermionD>& x,
00961 const multi1d<LatticeFermionD>& b,
00962 const multi1d<Double>& eval,
00963 const multi2d<LatticeFermionD>& evec,
00964 int N,
00965 int& n_count)
00966 {
00967 InitGuess_T(A, x, b, eval, evec, N, n_count);
00968 }
00969 }
00970
00971 }
00972