inv_eigcg2_array.cc

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 // $Id: inv_eigcg2_array.cc,v 1.3 2008/01/16 19:03:57 edwards Exp $
00003 /*! \file
00004  *  \brief Conjugate-Gradient algorithm with eigenvector acceleration
00005  */
00006 
00007 #include <qdp-lapack.h>
00008 //#include "octave_debug.h"
00009 //#include "octave_debug.cc"
00010 
00011 #include "actions/ferm/invert/inv_eigcg2_array.h"
00012 
00013 #include "actions/ferm/invert/containers.h"
00014 #include "actions/ferm/invert/norm_gram_schm.h"
00015 
00016 //#define DEBUG
00017 #define DEBUG_FINAL
00018 
00019 
00020 // NEEDS A LOT OF CLEAN UP
00021 namespace Chroma 
00022 {
00023   using namespace LinAlg;
00024   //using namespace Octave;
00025 
00026   namespace InvEigCG2ArrayEnv
00027   {
00028     // needs a little clean up... for possible exceptions if the size of H
00029     // is smaller than hind 
00030     // LatticeFermionF
00031     template<typename T>
00032     void SubSpaceMatrix_T(LinAlg::Matrix<DComplex>& H,
00033                           const LinearOperatorArray<T>& A,
00034                           const multi2d<T>& evec,
00035                           int Nvecs)
00036     {
00037       H.N = Nvecs;
00038       if(Nvecs>evec.size2()){
00039         H.N = evec.size2();
00040       }
00041       if(H.N>H.size()){
00042         QDPIO::cerr<<"OOPS! your matrix can't take this many matrix elements\n";
00043         exit(1);        
00044       }
00045       // fill H  with zeros
00046       H.mat = 0.0;
00047     
00048       multi1d<T> Ap;
00049       for(int i(0);i<H.N;i++)
00050       {
00051         A(Ap,evec[i],PLUS);
00052         for(int j(i);j<H.N;j++)
00053         {
00054           H(j,i) = innerProduct(evec[j], Ap, A.subset());
00055           //enforce hermiticity
00056           H(i,j) = conj(H(j,i));
00057           if(i==j) H(i,j) = real(H(i,j));
00058         }
00059       } 
00060     }
00061 
00062 
00063     template<typename T>
00064     SystemSolverResults_t InvEigCG2_T(const LinearOperatorArray<T>& A,
00065                                       multi1d<T>& x, 
00066                                       const multi1d<T>& b,
00067                                       multi1d<Double>& eval, 
00068                                       multi2d<T>& evec,
00069                                       int Neig,
00070                                       int Nmax,
00071                                       const Real& RsdCG, int MaxCG)
00072     {
00073       START_CODE();
00074 
00075       FlopCounter flopcount;
00076       flopcount.reset();
00077       StopWatch swatch;
00078       swatch.reset();
00079       swatch.start();
00080     
00081       const int Ls = A.size();
00082 
00083       SystemSolverResults_t  res;
00084     
00085       multi1d<T> p; 
00086       multi1d<T> Ap; 
00087       multi1d<T> r;
00088       //multi1d<T> z;
00089 
00090       Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
00091       Double alphaprev, alpha,pAp;     
00092       Real beta; 
00093       Double betaprev ;
00094       Double r_dot_z, r_dot_z_old;
00095 
00096       int k = 0;
00097       A(Ap,x,PLUS);
00098       for(int l=0; l < Ls; ++l)
00099         r[l][A.subset()] = b[l] - Ap[l];
00100 
00101 
00102 #if 1
00103       QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size2() << endl;
00104       QDPIO::cout << "InvEigCG2: k = " << k << "  res^2 = " << r_dot_z << endl;
00105 #endif
00106       Matrix<DComplex> H(Nmax); // square matrix containing the tridiagonal
00107       VectorArrays<T> vec(Nmax,Ls); // contains the vectors we use...
00108       
00109 
00110       beta=0.0;
00111       alpha = 1.0;
00112       multi1d<T> Ap_prev;
00113       multi1d<T> tt;
00114 
00115       // Algorithm from page 529 of Golub and Van Loan
00116       // Modified to match the m-code from A. Stathopoulos
00117       while(toBool(r_dot_z>rsd_sq))
00118       {
00119         /** preconditioning algorithm **/
00120         //z[A.subset()]=r; //preconditioning can be added here
00121         //r_dot_z = innerProductReal(r,z,A.subset());
00122         //****//
00123         r_dot_z_old = r_dot_z;
00124         r_dot_z = norm2(r,A.subset());
00125         Double inv_sqrt_r_dot_z = 1.0/sqrt(r_dot_z);
00126         k++;
00127         if(k==1){
00128           //p[A.subset()] = z;  
00129           for(int l=0; l < Ls; ++l)
00130             p[l][A.subset()] = r[l];    
00131         }
00132         else{
00133           betaprev = beta;
00134           beta = r_dot_z/r_dot_z_old;
00135           //p[A.subset()] = z + beta*p; 
00136           for(int l=0; l < Ls; ++l)
00137             p[l][A.subset()] = r[l] + beta*p[l];
00138         }
00139         //-------- Eigenvalue eigenvector finding code ------
00140         if((Neig>0)&& (H.N == Nmax)) 
00141           for(int l=0; l < Ls; ++l)
00142             Ap_prev[l][A.subset()] = Ap[l];
00143         //---------------------------------------------------
00144         A(Ap,p,PLUS);
00145         
00146         //-------- Eigenvalue eigenvector finding code ------
00147         if(Neig>0)
00148         {
00149           if(k>1)
00150             H(H.N-1,H.N-1) = 1/alpha + betaprev/alphaprev;
00151           if(vec.N==Nmax)
00152           {
00153             QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<endl;
00154 #ifdef DEBUG
00155             {
00156               stringstream tag;
00157               tag<<"H"<<k;
00158               OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
00159             }
00160             {
00161               Matrix<DComplex> tmp(Nmax); 
00162               SubSpaceMatrix(tmp,A,vec.vec,vec.N);
00163               stringstream tag;
00164               tag<<"H"<<k<<"ex";
00165               OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
00166             }
00167 #endif
00168             multi2d<DComplex> Hevecs(H.mat);
00169             multi1d<Double> Heval;
00170             char V = 'V'; char U = 'U';
00171             QDPLapack::zheev(V,U,Nmax,Hevecs,Heval);
00172 #ifdef DEBUG
00173             {
00174               stringstream tag;
00175               tag<<"Hevecs"<<k;
00176               OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
00177             }
00178             for(int i(0);i<Nmax;i++)
00179               QDPIO::cout<<" eignvalue: "<<Heval[i]<<endl;
00180 #endif
00181             multi2d<DComplex> Hevecs_old(H.mat);
00182             multi1d<Double> Heval_old;
00183             QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
00184             for(int i(0);i<Nmax;i++)        
00185               Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0;
00186             
00187             //Reduce to 2*Neig vectors
00188             H.N = Neig + Neig; // Thickness of restart 2*Neig
00189 
00190             for(int i(Neig);i<2*Neig;i++) 
00191               for(int j(0);j<Nmax;j++)      
00192                 Hevecs(i,j) = Hevecs_old(i-Neig,j);
00193 
00194             // Orthogonalize the last Neig vecs (keep the first Neig fixed)
00195             // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
00196             //  TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
00197             multi1d<DComplex> TAU;
00198             QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
00199             char R = 'R'; char L = 'L'; char N ='N'; char C = 'C';
00200             multi2d<DComplex> Htmp = H.mat;
00201             QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
00202             QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
00203 
00204             QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
00205 #ifdef DEBUG
00206             {
00207               stringstream tag;
00208               tag<<"Htmp"<<k;
00209               OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00210             }
00211 #endif
00212             for(int i(Neig); i< 2*Neig;i++ ) // mhpws prepei na einai 0..2*Neig
00213               for(int j(2*Neig); j<Nmax; j++) 
00214                 Htmp(i,j) =0.0;
00215 
00216 #ifdef DEBUG
00217             {
00218               stringstream tag;
00219               tag<<"HtmpBeforeZUM"<<k;
00220               OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00221             }
00222 #endif
00223             QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
00224 #ifdef DEBUG
00225             {
00226               stringstream tag;
00227               tag<<"HtmpAfeterZUM"<<k;
00228               OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00229             }
00230 #endif
00231             
00232 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
00233             multi2d<T> tt_vec(2*Neig,Ls);
00234             for(int i(0);i<2*Neig;i++){
00235               for(int l=0; l < Ls; ++l)
00236                 tt_vec[i][l][A.subset()] = zero;
00237               for(int j(0);j<Nmax;j++)
00238                 for(int l=0; l < Ls; ++l)
00239                   tt_vec[i][l][A.subset()] +=Htmp(i,j)*vec[j][l]; 
00240             }
00241             for(int i(0);i<2*Neig;i++)
00242               for(int l=0; l < Ls; ++l)
00243                 vec[i][l][A.subset()] = tt_vec[i][l];
00244 #else
00245             // Here I need the restart_X bit
00246             // zgemm("N", "N", Ns*Nc*Vol/2, 2*Neig, Nmax, 1.0,
00247             //      vec.vec, Ns*Nc*Vol, Htmp, Nmax+1, 0.0, tt_vec, Ns*Nc*Vol);
00248             // copy apo tt_vec se vec.vec
00249 
00250 #endif
00251             vec.N = 2*Neig; // restart the vectors to keep
00252 
00253             H.mat = 0.0; // zero out H 
00254             for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
00255 
00256             //A(tt,r,PLUS);
00257             for(int l=0; l < Ls; ++l)
00258               tt[l] = Ap[l] - beta*Ap_prev[l]; //avoid the matvec
00259 
00260 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
00261             for (int i=0;i<2*Neig;i++){
00262               H(2*Neig,i)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z;
00263               H(i,2*Neig)=conj(H(2*Neig,i));
00264               //H(i,2*Neig)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z;
00265               //H(2*Neig,i)=conj(H(i,2*Neig));
00266             }
00267 #else  
00268             // Optimized code for creating the H matrix row and column
00269             // asume even-odd layout: Can I check if this is the case at 
00270             // compile time? or even at run time? This is a good test to 
00271             // have to avoid wrong results.
00272             
00273             
00274 #endif
00275           }//H.N==Nmax
00276           else{
00277             if(k>1)
00278               H(H.N-1,H.N) = H(H.N,H.N-1) = -sqrt(beta)/alpha;
00279           }
00280           H.N++;
00281           //vec.NormalizeAndAddVector(z,inv_sqrt_r_dot_z,A.subset());
00282           vec.NormalizeAndAddVector(r,inv_sqrt_r_dot_z,A.subset());
00283         }
00284 
00285         pAp = innerProductReal(p,Ap,A.subset());
00286         alphaprev = alpha;// additional line for Eigenvalue eigenvector code
00287         alpha = r_dot_z/pAp;
00288         for(int l=0; l < Ls; ++l)
00289           x[l][A.subset()] += alpha*p[l];
00290         for(int l=0; l < Ls; ++l)
00291           r[l][A.subset()] -= alpha*Ap[l];
00292               
00293         
00294         //---------------------------------------------------
00295         if(k>MaxCG){
00296           res.n_count = k;
00297           res.resid   = sqrt(r_dot_z);
00298           QDP_error_exit("too many CG iterations: count = %d", res.n_count);
00299           END_CODE();
00300           return res;
00301         }
00302 #if 1
00303         QDPIO::cout << "InvEigCG2: k = " << k ;
00304         QDPIO::cout << "  r_dot_z = " << r_dot_z << endl;
00305 #endif
00306       }//end CG loop
00307 
00308       if(Neig>0)
00309       {
00310         evec.resize(Neig,Ls);
00311         eval.resize(Neig);
00312         
00313 #define  USE_LAST_VECTORS
00314 #ifdef USE_LAST_VECTORS
00315         
00316         multi2d<DComplex> Hevecs(H.mat);
00317         multi1d<Double> Heval;
00318         char V = 'V'; char U = 'U';
00319         QDPLapack::zheev(V,U,H.N-1,Hevecs,Heval);
00320         
00321         for(int i(0);i<Neig;i++){
00322           for(int l=0; l < Ls; ++l)
00323             evec[i][l][A.subset()] = zero;
00324           eval[i] = Heval[i];
00325           for(int j(0);j<H.N-1;j++)
00326             for(int l=0; l < Ls; ++l)
00327               evec[i][l][A.subset()] +=Hevecs(i,j)*vec[j][l];
00328         }
00329 #else
00330         for(int i(0);i<Neig;i++){
00331           evec[i][A.subset()] = vec[i];
00332           eval[i] = real(H(i,i));
00333         }
00334 #endif
00335       }
00336       res.n_count = k;
00337       res.resid = sqrt(r_dot_z);
00338       swatch.stop();
00339       QDPIO::cout << "InvEigCG2: k = " << k << endl;
00340       flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
00341       END_CODE();
00342       return res;
00343     }
00344 
00345    
00346     template<typename T>
00347     SystemSolverResults_t old_InvEigCG2_T(const LinearOperatorArray<T>& A,
00348                                           multi1d<T>& x, 
00349                                           const multi1d<T>& b,
00350                                           multi1d<Double>& eval, 
00351                                           multi2d<T>& evec,
00352                                           int Neig,
00353                                           int Nmax,
00354                                           const Real& RsdCG, int MaxCG)
00355     {
00356       START_CODE();
00357 
00358       FlopCounter flopcount;
00359       flopcount.reset();
00360       StopWatch swatch;
00361       swatch.reset();
00362       swatch.start();
00363 
00364       const int Ls = A.size();
00365     
00366       SystemSolverResults_t  res;
00367     
00368       multi1d<T> p; 
00369       multi1d<T> Ap; 
00370       multi1d<T> r,z;
00371 
00372       Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
00373       Double alphaprev, alpha,pAp;
00374       Real beta;
00375       Double r_dot_z, r_dot_z_old;
00376       //Complex r_dot_z, r_dot_z_old,beta;
00377       //Complex alpha,pAp;
00378 
00379       int k = 0;
00380       A(Ap,x,PLUS);
00381       r[A.subset()] = b - Ap;
00382       Double r_norm2 = norm2(r,A.subset());
00383 
00384 #if 1
00385       QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size2() << endl;
00386       QDPIO::cout << "InvEigCG2: k = " << k << "  res^2 = " << r_norm2 << endl;
00387 #endif
00388       Real inorm(Real(1.0/sqrt(r_norm2)));
00389       bool FindEvals = (Neig>0);
00390       int tr; // Don't know what this does...
00391       bool from_restart;
00392       Matrix<DComplex> H(Nmax); // square matrix containing the tridiagonal
00393       VectorArrays<T> vec(Nmax,Ls); // contains the vectors we use...
00394       //-------- Eigenvalue eigenvector finding code ------
00395       vec.AddVectors(evec,A.subset());
00396       //QDPIO::cout<<"vec.N="<<vec.N<<endl;
00397       p[A.subset()] = inorm*r; // this is not needed GramSchmidt will take care of it
00398       vec.AddOrReplaceVector(p,A.subset());
00399       //QDPIO::cout<<"vec.N="<<vec.N<<endl;
00400       normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset());
00401       normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset()); //call twice: need to improve...
00402       SubSpaceMatrix(H,A,vec.vec,vec.N);
00403       from_restart = true;
00404       tr = H.N - 1; // this is just a flag as far as I can tell  
00405       //-------
00406 
00407       Double betaprev;
00408       beta=0.0;
00409       alpha = 1.0;
00410       // Algorithm from page 529 of Golub and Van Loan
00411       // Modified to match the m-code from A. Stathopoulos
00412       while(toBool(r_norm2>rsd_sq))
00413       {
00414         /** preconditioning algorithm **/
00415         z[A.subset()]=r; //preconditioning can be added here
00416         /**/
00417         r_dot_z = innerProductReal(r,z,A.subset());
00418         k++;
00419         betaprev = beta; // additional line for Eigenvalue eigenvector code
00420         if(k==1){
00421           p[A.subset()] = z;    
00422           H.N++;
00423         }
00424         else{
00425           beta = r_dot_z/r_dot_z_old;
00426           p[A.subset()] = z + beta*p; 
00427           //-------- Eigenvalue eigenvector finding code ------
00428           // fist block
00429           if(FindEvals){
00430             if(!((from_restart)&&(H.N == tr+1))){
00431               if(!from_restart){
00432                 H(H.N-2,H.N-2) = 1/alpha + betaprev/alphaprev;
00433               } 
00434               from_restart = false; 
00435               H(H.N-1,H.N-2) = -sqrt(beta)/alpha;
00436               H(H.N-2,H.N-1) = -sqrt(beta)/alpha;
00437             }
00438             H.N++;
00439           }
00440           //---------------------------------------------------
00441         }
00442         A(Ap,p,PLUS);
00443         pAp = innerProductReal(p,Ap,A.subset());
00444       
00445         alphaprev = alpha;// additional line for Eigenvalue eigenvector code
00446         alpha = r_dot_z/pAp;
00447         x[A.subset()] += alpha*p;
00448         r[A.subset()] -= alpha*Ap;
00449         r_norm2 =  norm2(r,A.subset());
00450         r_dot_z_old = r_dot_z;
00451 
00452       
00453         //-------- Eigenvalue eigenvector finding code ------
00454         // second block
00455         if(FindEvals){
00456           if (vec.N==Nmax){//we already have stored the maximum number of vectors
00457             // The magic begins here....
00458             QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<endl;
00459             H(Nmax-1,Nmax-1) = 1/alpha + beta/alphaprev;
00460 
00461 #ifdef DEBUG
00462             {
00463               stringstream tag;
00464               tag<<"H"<<k;
00465               OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
00466             }
00467             {
00468               Matrix<DComplex> tmp(Nmax); 
00469               SubSpaceMatrix(tmp,A,vec.vec,vec.N);
00470               stringstream tag;
00471               tag<<"H"<<k<<"ex";
00472               OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
00473             }
00474 #endif
00475             //exit(1);
00476             multi2d<DComplex> Hevecs(H.mat);
00477             multi1d<Double> Heval;
00478             char V = 'V'; char U = 'U';
00479             QDPLapack::zheev(V,U,Hevecs,Heval);
00480             multi2d<DComplex> Hevecs_old(H.mat);
00481 
00482 #ifdef DEBUG
00483             {
00484               multi1d<T> tt_vec(vec.size());
00485               for(int i(0);i<Nmax;i++){
00486                 tt_vec[i][A.subset()] = zero;
00487                 for(int j(0);j<Nmax;j++)
00488                   tt_vec[i][A.subset()] +=Hevecs(i,j)*vec[j]; // NEED TO CHECK THE INDEXINT
00489               }
00490               // CHECK IF vec are eigenvectors...
00491           
00492               multi1d<T> Av;
00493               for(int i(0);i<Nmax;i++){
00494                 A(Av,tt_vec[i],PLUS);
00495                 DComplex rq = innerProduct(tt_vec[i],Av,A.subset());
00496                 Av[A.subset()] -= Heval[i]*tt_vec[i];
00497                 Double tt = sqrt(norm2(Av,A.subset()));
00498                 QDPIO::cout<<"1 error eigenvector["<<i<<"] = "<<tt<<" ";
00499                 tt =  sqrt(norm2(tt_vec[i],A.subset()));
00500                 QDPIO::cout<<" --- eval = "<<Heval[i]<<" ";
00501                 QDPIO::cout<<" --- rq = "<<real(rq)<<" ";
00502                 QDPIO::cout<<"--- norm = "<<tt<<endl ;
00503               }
00504             }
00505 #endif
00506             multi1d<Double> Heval_old;
00507             QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
00508             for(int i(0);i<Nmax;i++)        
00509               Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0;
00510 #ifdef DEBUG
00511             {
00512               stringstream tag;
00513               tag<<"Hevecs_old"<<k;
00514               OctavePrintOut(Hevecs_old,Nmax,tag.str(),"Hmatrix.m");
00515             }
00516           
00517 #endif
00518             tr = Neig + Neig; // v_old = Neig optimal choice
00519            
00520             for(int i(Neig);i<tr;i++)
00521               for(int j(0);j<Nmax;j++)      
00522                 Hevecs(i,j) = Hevecs_old(i-Neig,j);
00523 #ifdef DEBUG
00524             {
00525               stringstream tag;
00526               tag<<"Hevecs"<<k;
00527               OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
00528             }
00529 #endif
00530 
00531             // Orthogonalize the last Neig vecs (keep the first Neig fixed)
00532             // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
00533             //    TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
00534             multi1d<DComplex> TAU;
00535             QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
00536             char R = 'R'; char L = 'L'; char N ='N'; char C = 'C';
00537             multi2d<DComplex> Htmp = H.mat;
00538             QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
00539             QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
00540             // Notice that now H is of size 2Neig x 2Neig, 
00541             // but still with LDA = Nmax 
00542 #ifdef DEBUG
00543             {
00544               stringstream tag;
00545               tag<<"Htmp"<<k;
00546               OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00547             }
00548 #endif
00549             QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
00550             for(int i(Neig); i< 2*Neig;i++ ) 
00551               for(int j(2*Neig); j<Nmax; j++)
00552                 Htmp(i,j) =0.0;
00553 #ifdef DEBUG
00554             {
00555               stringstream tag;
00556               tag<<"evecstmp"<<k;
00557               OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00558             }
00559 #endif
00560             QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
00561 #ifdef DEBUG
00562             {
00563               stringstream tag;
00564               tag<<"Yevecstmp"<<k;
00565               OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
00566             }
00567 #endif
00568             // Here I need the restart_X bit
00569             multi1d<T> tt_vec = vec.vec;
00570             for(int i(0);i<2*Neig;i++){
00571               vec[i][A.subset()] = zero;
00572               for(int j(0);j<Nmax;j++)
00573                 vec[i][A.subset()] +=Htmp(i,j)*tt_vec[j]; // NEED TO CHECK THE INDEXING
00574             }
00575 
00576 #ifdef DEBUG
00577             // CHECK IF vec are eigenvectors...
00578             {
00579               multi1d<T> Av;
00580               for(int i(0);i<2*Neig;i++){
00581                 A(Av,vec[i],PLUS);
00582                 DComplex rq = innerProduct(vec[i],Av,A.subset());
00583                 Av[A.subset()] -= Heval[i]*vec[i];
00584                 Double tt = sqrt(norm2(Av,A.subset()));
00585                 QDPIO::cout<<"error eigenvector["<<i<<"] = "<<tt<<" ";
00586                 tt =  sqrt(norm2(vec[i],A.subset()));
00587                 QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
00588                 QDPIO::cout<<"--- norm = "<<tt<<endl ;
00589               }
00590 
00591             }
00592 #endif
00593             H.mat = 0.0; // zero out H 
00594             for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
00595           
00596             multi1d<T> Ar;
00597             // Need to reorganize so that we aboid this extra matvec
00598             A(Ar,r,PLUS);
00599             Ar /= sqrt(r_norm2); 
00600             // this has the oposite convension than the subspace matrix
00601             // this is the reason the vector reconstruction in here does not
00602             // need the conj(H) while in the Rayleigh Ritz refinement 
00603             // it does need it. 
00604             // In here the only complex matrix elements are these computined
00605             // in the next few lines. This is why the inconsistency 
00606             //  does not matter anywhere else. 
00607             // In the refinement step though all matrix elements are complex
00608             // hence things break down.
00609             // Need to fix the convension so that we do not
00610             // have these inconsistencies.
00611             for (int i=0;i<2*Neig;i++){
00612               H(2*Neig,i) = innerProduct(vec[i], Ar, A.subset());
00613               H(i,2*Neig) = conj(H(2*Neig,i));
00614             }
00615             H(2*Neig,2*Neig) = innerProduct(r, Ar, A.subset())/sqrt(r_norm2);
00616           
00617             H.N = 2*Neig + 1 ; // why this ?
00618             from_restart = true;
00619             vec.N = 2*Neig; // only keep the lowest Neig. Is this correct???
00620 #ifdef DEBUG
00621             {
00622               stringstream tag;
00623               tag<<"finalH"<<k;
00624               OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
00625             }
00626 #endif
00627           }
00628           // Here we add a vector in the list
00629           Double inorm = 1.0/sqrt(r_norm2);
00630           vec.NormalizeAndAddVector(r,inorm,A.subset());
00631           // Shouldn't be the z vector when a preconditioner is used?
00632         }
00633         //---------------------------------------------------
00634 
00635         if(k>MaxCG){
00636           res.n_count = k;
00637           res.resid   = sqrt(r_norm2);
00638           QDP_error_exit("too many CG iterations: count = %d", res.n_count);
00639           END_CODE();
00640           return res;
00641         }
00642 #if 1
00643         QDPIO::cout << "InvEigCG2: k = " << k << "  res^2 = " << r_norm2;
00644         QDPIO::cout << "  r_dot_z = " << r_dot_z << endl;
00645 #endif
00646       }
00647       res.n_count = k;
00648       res.resid = sqrt(r_norm2);
00649       if(FindEvals)
00650       {
00651         // Evec Code ------ Before we return --------
00652         // vs is the current number of vectors stored
00653         // Neig is the number of eigenvector we want to compute
00654         normGramSchmidt(vec.vec,0,2*Neig,A.subset());
00655         normGramSchmidt(vec.vec,0,2*Neig,A.subset());
00656         Matrix<DComplex> Htmp(2*Neig);
00657         SubSpaceMatrix(Htmp,A,vec.vec,2*Neig);
00658 
00659         char V = 'V'; char U = 'U';
00660         multi1d<Double> tt_eval;
00661         QDPLapack::zheev(V,U,Htmp.mat,tt_eval);
00662         evec.resize(Neig,Ls);
00663         eval.resize(Neig);
00664         for(int i(0);i<Neig;i++){
00665           evec[i][A.subset()] = zero;
00666           eval[i] = tt_eval[i];
00667           for(int j(0);j<2*Neig;j++)
00668             evec[i][A.subset()] += Htmp(i,j)*vec[j];
00669         }
00670       
00671         // I will do the checking of eigenvector quality outside this routine
00672         // -------------------------------------------
00673 #ifdef DEBUG_FINAL
00674         // CHECK IF vec are eigenvectors...                                   
00675         {
00676           multi1d<T> Av;
00677           for(int i(0);i<Neig;i++)
00678           {
00679             A(Av,evec[i],PLUS);
00680             DComplex rq = innerProduct(evec[i],Av,A.subset());
00681             Av[A.subset()] -= eval[i]*evec[i];
00682             Double tt = sqrt(norm2(Av,A.subset()));
00683             QDPIO::cout<<"FINAL: error eigenvector["<<i<<"] = "<<tt<<" ";
00684             tt =  sqrt(norm2(evec[i],A.subset()));
00685             QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
00686             QDPIO::cout<<"--- norm = "<<tt<<endl ;
00687           }
00688         
00689         }
00690 #endif
00691       }
00692 
00693       res.n_count = k;
00694       res.resid   = sqrt(r_norm2);
00695       swatch.stop();
00696       QDPIO::cout << "InvEigCG2: k = " << k << endl;
00697       flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
00698       END_CODE();
00699       return res;
00700     }
00701 
00702     // A  should be Hermitian positive definite
00703     template<typename T>
00704     SystemSolverResults_t vecPrecondCG_T(const LinearOperatorArray<T>& A, 
00705                                          multi1d<T>& x, 
00706                                          const multi1d<T>& b, 
00707                                          const multi1d<Double>& eval, 
00708                                          const multi2d<T>& evec, 
00709                                          int startV, int endV,
00710                                          const Real& RsdCG, int MaxCG)
00711     {
00712       START_CODE();
00713 
00714       FlopCounter flopcount;
00715       flopcount.reset();
00716       StopWatch swatch;
00717       swatch.reset();
00718       swatch.start();
00719 
00720       const int Ls = A.size();
00721 
00722       if(endV>eval.size()){
00723         QDP_error_exit("vPrecondCG: not enought evecs eval.size()=%d",eval.size());
00724       }
00725  
00726       SystemSolverResults_t  res;
00727     
00728       multi1d<T> p; 
00729       multi1d<T> Ap; 
00730       multi1d<T> r,z;
00731 
00732       Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
00733       Double alpha,pAp;
00734       Real beta;
00735       Double r_dot_z, r_dot_z_old;
00736       //Complex r_dot_z, r_dot_z_old,beta;
00737       //Complex alpha,pAp;
00738 
00739       int k = 0;
00740       A(Ap,x,PLUS);
00741       for(int l=0; l < Ls; ++l)
00742         r[l][A.subset()] = b[l] - Ap[l];
00743       Double r_norm2 = norm2(r,A.subset());
00744 
00745 #if 1
00746       QDPIO::cout << "vecPrecondCG: k = " << k << "  res^2 = " << r_norm2 << endl;
00747 #endif
00748 
00749       // Algorithm from page 529 of Golub and Van Loan
00750       while(toBool(r_norm2>rsd_sq)){
00751         /** preconditioning algorithm **/
00752         for(int l=0; l < Ls; ++l)
00753           z[l][A.subset()] = r[l]; // not optimal but do it for now...
00754         /**/
00755         for(int i(startV);i<endV;i++){
00756           DComplex d = innerProduct(evec[i],r,A.subset());
00757           //QDPIO::cout<<"vecPrecondCG: "<< d<<" "<<(1.0/eval[i]-1.0)<<endl;
00758           for(int l=0; l < Ls; ++l)
00759             z[l][A.subset()] += (1.0/eval[i]-1.0)*d*evec[i][l];
00760         }
00761         /**/
00762         /**/
00763         r_dot_z = innerProductReal(r,z,A.subset());
00764         k++;
00765         if(k==1){
00766           for(int l=0; l < Ls; ++l)
00767             p[l][A.subset()] = z[l];
00768         }
00769         else{
00770           beta = r_dot_z/r_dot_z_old;
00771           for(int l=0; l < Ls; ++l)
00772             p[l][A.subset()] = z[l] + beta*p[l]; 
00773         }
00774         //Cheb.Qsq(Ap,p);
00775         A(Ap,p,PLUS);
00776         pAp = innerProductReal(p,Ap,A.subset());
00777       
00778         alpha = r_dot_z/pAp;
00779         for(int l=0; l < Ls; ++l)
00780           x[l][A.subset()] += alpha*p[l];
00781         for(int l=0; l < Ls; ++l)
00782           r[l][A.subset()] -= alpha*Ap[l];
00783         r_norm2 =  norm2(r,A.subset());
00784         r_dot_z_old = r_dot_z;
00785 
00786         if(k>MaxCG){
00787           res.n_count = k;
00788           QDP_error_exit("too many CG iterations: count = %d", res.n_count);
00789           return res;
00790         }
00791 #if 1
00792         QDPIO::cout << "vecPrecondCG: k = " << k << "  res^2 = " << r_norm2;
00793         QDPIO::cout << "  r_dot_z = " << r_dot_z << endl;
00794 #endif
00795       }
00796     
00797       res.n_count = k;
00798       res.resid   = sqrt(r_norm2); 
00799       swatch.stop();
00800       QDPIO::cout << "vPreconfCG: k = " << k << endl;
00801       flopcount.report("vPrecondCG", swatch.getTimeInSeconds());
00802       END_CODE();
00803       return res;
00804     }
00805   
00806     template<typename T>
00807     void InitGuess_T(const LinearOperatorArray<T>& A, 
00808                      multi1d<T>& x, 
00809                      const multi1d<T>& b, 
00810                      const multi1d<Double>& eval, 
00811                      const multi2d<T>& evec, 
00812                      int& n_count)
00813     {
00814       int N = evec.size2();
00815       InitGuess(A,x,b,eval,evec,N,n_count);
00816     }
00817 
00818     template<typename T>
00819     void InitGuess_T(const LinearOperatorArray<T>& A, 
00820                      multi1d<T>& x, 
00821                      const multi1d<T>& b, 
00822                      const multi1d<Double>& eval, 
00823                      const multi2d<T>& evec, 
00824                      int N, // number of vectors to use
00825                      int& n_count)
00826     {
00827       multi1d<T> p; 
00828       multi1d<T> Ap; 
00829       multi1d<T> r;
00830 
00831       const int Ls = A.size();
00832 
00833       StopWatch snoop;
00834       snoop.reset();
00835       snoop.start();
00836 
00837       A(Ap,x,PLUS);
00838       for(int l=0; l < Ls; ++l)
00839         r[l][A.subset()] = b[l] - Ap[l];
00840       // Double r_norm2 = norm2(r,A.subset());
00841    
00842       for(int i(0);i<N;i++)
00843       {
00844         DComplex d = innerProduct(evec[i],r,A.subset());
00845         for(int l=0; l < Ls; ++l)
00846           x[l][A.subset()] += (d/eval[i])*evec[i][l];
00847         //QDPIO::cout<<"InitCG: "<<d<<" "<<eval[i]<<endl;
00848 
00849       }
00850    
00851       snoop.stop();
00852       QDPIO::cout << "InitGuess:  time = "
00853                   << snoop.getTimeInSeconds() 
00854                   << " secs" << endl;
00855 
00856       n_count = 1;
00857     }
00858 
00859     
00860     //
00861     // Wrappers
00862     //
00863     // LatticeFermionF
00864     void SubSpaceMatrix(LinAlg::Matrix<DComplex>& H,
00865                         const LinearOperatorArray<LatticeFermionF>& A,
00866                         const multi2d<LatticeFermionF>& evec,
00867                         int Nvecs)
00868     {
00869       SubSpaceMatrix_T(H, A, evec, Nvecs);
00870     }
00871 
00872     SystemSolverResults_t InvEigCG2(const LinearOperatorArray<LatticeFermionF>& A,
00873                                     multi1d<LatticeFermionF>& x, 
00874                                     const multi1d<LatticeFermionF>& b,
00875                                     multi1d<Double>& eval, 
00876                                     multi2d<LatticeFermionF>& evec,
00877                                     int Neig,
00878                                     int Nmax,
00879                                     const Real& RsdCG, int MaxCG)
00880     {
00881       InvEigCG2_T(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG);
00882     }
00883   
00884     SystemSolverResults_t vecPrecondCG(const LinearOperatorArray<LatticeFermionF>& A, 
00885                                        multi1d<LatticeFermionF>& x, 
00886                                        const multi1d<LatticeFermionF>& b, 
00887                                        const multi1d<Double>& eval, 
00888                                        const multi2d<LatticeFermionF>& evec, 
00889                                        int startV, int endV,
00890                                        const Real& RsdCG, int MaxCG)
00891     {
00892       vecPrecondCG_T(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
00893     }
00894 
00895     void InitGuess(const LinearOperatorArray<LatticeFermionF>& A, 
00896                    multi1d<LatticeFermionF>& x, 
00897                    const multi1d<LatticeFermionF>& b, 
00898                    const multi1d<Double>& eval, 
00899                    const multi2d<LatticeFermionF>& evec, 
00900                    int& n_count)
00901     {
00902       InitGuess_T(A, x, b, eval, evec, n_count);
00903     }
00904   
00905     void InitGuess(const LinearOperatorArray<LatticeFermionF>& A, 
00906                    multi1d<LatticeFermionF>& x, 
00907                    const multi1d<LatticeFermionF>& b, 
00908                    const multi1d<Double>& eval, 
00909                    const multi2d<LatticeFermionF>& evec, 
00910                    int N, // number of vectors to use
00911                    int& n_count)
00912     {
00913       InitGuess_T(A, x, b, eval, evec, N, n_count);
00914     }
00915 
00916 
00917     // LatticeFermionD
00918     void SubSpaceMatrix(LinAlg::Matrix<DComplex>& H,
00919                         const LinearOperatorArray<LatticeFermionD>& A,
00920                         const multi2d<LatticeFermionD>& evec,
00921                         int Nvecs)
00922     {
00923       SubSpaceMatrix_T(H, A, evec, Nvecs);
00924     }
00925 
00926     SystemSolverResults_t InvEigCG2(const LinearOperatorArray<LatticeFermionD>& A,
00927                                     multi1d<LatticeFermionD>& x, 
00928                                     const multi1d<LatticeFermionD>& b,
00929                                     multi1d<Double>& eval, 
00930                                     multi2d<LatticeFermionD>& evec,
00931                                     int Neig,
00932                                     int Nmax,
00933                                     const Real& RsdCG, int MaxCG)
00934     {
00935       InvEigCG2_T(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG);
00936     }
00937   
00938     SystemSolverResults_t vecPrecondCG(const LinearOperatorArray<LatticeFermionD>& A, 
00939                                        multi1d<LatticeFermionD>& x, 
00940                                        const multi1d<LatticeFermionD>& b, 
00941                                        const multi1d<Double>& eval, 
00942                                        const multi2d<LatticeFermionD>& evec, 
00943                                        int startV, int endV,
00944                                        const Real& RsdCG, int MaxCG)
00945     {
00946       vecPrecondCG_T(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
00947     }
00948 
00949     void InitGuess(const LinearOperatorArray<LatticeFermionD>& A, 
00950                    multi1d<LatticeFermionD>& x, 
00951                    const multi1d<LatticeFermionD>& b, 
00952                    const multi1d<Double>& eval, 
00953                    const multi2d<LatticeFermionD>& evec, 
00954                    int& n_count)
00955     {
00956       InitGuess_T(A, x, b, eval, evec, n_count);
00957     }
00958   
00959     void InitGuess(const LinearOperatorArray<LatticeFermionD>& A, 
00960                    multi1d<LatticeFermionD>& x, 
00961                    const multi1d<LatticeFermionD>& b, 
00962                    const multi1d<Double>& eval, 
00963                    const multi2d<LatticeFermionD>& evec, 
00964                    int N, // number of vectors to use
00965                    int& n_count)
00966     {
00967       InitGuess_T(A, x, b, eval, evec, N, n_count);
00968     }
00969   } // namespace InvEigCG2ArrayEnv
00970   
00971 }// End Namespace Chroma
00972 

Generated on Sun Nov 22 04:33:18 2009 for CHROMA by  doxygen 1.4.7