clover_term_bagel_clover.cc

Go to the documentation of this file.
00001 // $Id: clover_term_bagel_clover.cc,v 1.6 2008/01/21 20:18:50 edwards Exp $
00002 /*! \file
00003  *  \brief Clover term linear operator
00004  *
00005  *  This particular implementation is specific to a scalar-like
00006  *  architecture
00007  */
00008 
00009 #include "chromabase.h"
00010 #include "actions/ferm/linop/clover_term_bagel_clover.h"
00011 #include "meas/glue/mesfield.h"
00012 
00013 #include <bagel_clover.h>
00014 
00015 
00016 namespace Chroma 
00017 { 
00018 
00019 #if 0
00020   //! XML output
00021   inline
00022   XMLWriter& operator<<(XMLWriter& xml, const PrimitiveClovTriang& d)
00023   {
00024     xml.openTag("PrimClovTriang");
00025 
00026     XMLWriterAPI::AttributeList alist;
00027 
00028     xml.openTag("Diag");
00029     for(int i=0; i < 2; ++i)
00030     {
00031       for(int j=0; j < 2*Nc; ++j)
00032       {
00033         alist.clear();
00034         alist.push_back(XMLWriterAPI::Attribute("block", i));
00035         alist.push_back(XMLWriterAPI::Attribute("col", j));
00036 
00037         xml.openTag("elem", alist);
00038         xml << d.diag[i][j];
00039         xml.closeTag();
00040       }
00041     }
00042     xml.closeTag(); // Diag
00043 
00044     xml.openTag("Offd");
00045     for(int i=0; i < 2; ++i)
00046     {
00047       for(int j=0; j < 2*Nc*Nc-Nc; ++j)
00048       {
00049         alist.clear();
00050         alist.push_back(XMLWriterAPI::Attribute("block", i));
00051         alist.push_back(XMLWriterAPI::Attribute("col", j));
00052 
00053         xml.openTag("elem", alist);
00054         xml << d.offd[i][j];
00055         xml.closeTag();
00056       }
00057     }
00058     xml.closeTag(); // Offd
00059 
00060     xml.closeTag(); // PrimClovTriang
00061     return xml;
00062   }
00063 
00064 
00065   // Reader/writers
00066   void read(XMLReader& xml, const string& path, PrimitiveClovTriang& param)
00067   {
00068     QDP_error_exit("clover reader not implemented");
00069   }
00070 
00071   void write(XMLWriter& xml, const string& path, const PrimitiveClovTriang& param)
00072   {
00073     push(xml,path);
00074     xml << param;
00075     pop(xml);
00076   }
00077 #endif
00078 
00079   // Empty constructor. Must use create later
00080   BAGELCloverTerm::BAGELCloverTerm() {
00081     bool retry = false;
00082 
00083     // Allocate the arrays, with proper alignment
00084 
00085     try { 
00086       tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::FAST );
00087     }
00088     catch( std::bad_alloc ) { 
00089       retry = true;
00090     }
00091 
00092     if( retry ) { 
00093       try { 
00094         tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::DEFAULT );
00095       }
00096       catch( std::bad_alloc ) { 
00097         QDPIO::cerr << "Failed to allocate the tri_diag" << endl << flush ;
00098         QDP_abort(1);
00099       }
00100     }
00101 
00102 
00103     retry = false;
00104     try { 
00105       tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::FAST );
00106     }
00107     catch( std::bad_alloc ) { 
00108       retry = true;
00109     }
00110 
00111     if( retry ) { 
00112       try { 
00113         tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::DEFAULT );
00114       }
00115       catch( std::bad_alloc ) { 
00116         QDPIO::cerr << "Failed to allocate the tri_off_diag" << endl << flush ;
00117         QDP_abort(1);
00118       }
00119     }
00120 
00121 
00122   }
00123 
00124 
00125   BAGELCloverTerm::~BAGELCloverTerm() {
00126     if ( tri_diag != 0x0 ) { 
00127       QDP::Allocator::theQDPAllocator::Instance().free(tri_diag);
00128     }
00129     
00130     if ( tri_off_diag != 0x0 ) {        
00131       QDP::Allocator::theQDPAllocator::Instance().free(tri_off_diag);
00132     }   
00133   }
00134 
00135   //! Creation routine
00136   void BAGELCloverTerm::create(Handle< FermState<T,P,Q> > fs,
00137                                const CloverFermActParams& param_)
00138   {
00139     START_CODE();
00140 
00141     u = fs->getLinks();
00142     fbc = fs->getFermBC();
00143     param = param_;
00144 
00145     // Sanity check
00146     if (fbc.operator->() == 0)
00147     {
00148       QDPIO::cerr << "BAGELCloverTerm: error: fbc is null" << endl;
00149       QDP_abort(1);
00150     }
00151 
00152     {
00153       Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
00154       param.clovCoeffR *= Real(0.5) * ff;
00155       param.clovCoeffT *= Real(0.5);
00156     }
00157 
00158     //
00159     // Yuk. Some bits of knowledge of the dslash term are buried in the 
00160     // effective mass term. They show up here. If I wanted some more 
00161     // complicated dslash then this will have to be fixed/adjusted.
00162     //
00163     Real diag_mass;
00164     {
00165       Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
00166       diag_mass = 1 + (Nd-1)*ff + param.Mass;
00167     }
00168 
00169     /* Calculate F(mu,nu) */
00170     multi1d<LatticeColorMatrix> f;
00171     mesField(f, u);
00172     makeClov(f, diag_mass);
00173     
00174     choles_done.resize(rb.numSubsets());
00175     for(int i=0; i < rb.numSubsets(); i++) {
00176       choles_done[i] = false;
00177     }
00178 
00179 #if 0
00180     // Testing code
00181     LatticeFermion foo;
00182     LatticeFermion res1=zero;
00183     LatticeFermion res2=zero;
00184     gaussian(foo);
00185     apply(res1,foo,PLUS, 0);
00186     applySite(res2,foo,PLUS, 0);
00187 
00188     LatticeFermion diff=res1-res2;
00189     XMLFileWriter fred("fred");
00190     push(fred, "stuff");
00191     write(fred, "diff", diff);
00192     pop(fred);
00193 
00194     QDPIO::cout << "sqrt( norm2( diff))= " << sqrt(norm2(diff)) << endl << flush;
00195     QDP_abort(1);
00196 #endif
00197 
00198     END_CODE();
00199   }
00200 
00201     // Now copy
00202   void BAGELCloverTerm::create(Handle< FermState<T,P,Q> > fs,
00203                              const CloverFermActParams& param_,
00204                              const BAGELCloverTerm& from_)
00205   {
00206     START_CODE();
00207 
00208     const BAGELCloverTerm& from = dynamic_cast<const BAGELCloverTerm&>(from_);
00209     u = fs->getLinks();
00210     fbc = fs->getFermBC();
00211     param = param_;
00212     
00213     // Sanity check
00214     if (fbc.operator->() == 0) {
00215       QDPIO::cerr << "BAGELCloverTerm: error: fbc is null" << endl;
00216       QDP_abort(1);
00217     }
00218     
00219     {
00220       Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
00221       param.clovCoeffR *= Real(0.5) * ff;
00222       param.clovCoeffT *= Real(0.5);
00223     }
00224     
00225     //
00226     // Yuk. Some bits of knowledge of the dslash term are buried in the 
00227     // effective mass term. They show up here. If I wanted some more 
00228     // complicated dslash then this will have to be fixed/adjusted.
00229     //
00230     Real diag_mass;
00231     {
00232       Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
00233       diag_mass = 1 + (Nd-1)*ff + param.Mass;
00234     }
00235     
00236     
00237     /* Calculate F(mu,nu) */
00238     //multi1d<LatticeColorMatrix> f;
00239     //mesField(f, u);
00240     //makeClov(f, diag_mass);
00241     
00242     choles_done.resize(rb.numSubsets());
00243     for(int i=0; i < rb.numSubsets(); i++) {
00244       choles_done[i] = from.choles_done[i];
00245     }
00246     
00247     tr_log_diag_ = from.tr_log_diag_;
00248     
00249     // Should be allocated by constructor.
00250     // tri_diag.resize(from.tri_diag.size());
00251     // tri_off_diag.resize(from.tri_off_diag.size());
00252     for(int site=0; site < Layout::sitesOnNode(); site++) {
00253       for(int block = 0; block < 2; block++) { 
00254         for(int comp=0; comp< 6; comp++) { 
00255           tri_diag[site][block][comp] = from.tri_diag[site][block][comp];
00256         }
00257         for(int comp=0; comp < 15; comp++) { 
00258           tri_off_diag[site][block][comp] = from.tri_off_diag[site][block][comp];
00259         }
00260       }
00261     }
00262                             
00263     END_CODE();
00264   }
00265 
00266 
00267   /*
00268    * MAKCLOV 
00269    *
00270    *  In this routine, MAKCLOV calculates
00271 
00272    *    1 - (1/4)*sigma(mu,nu) F(mu,nu)
00273 
00274    *  using F from mesfield
00275 
00276    *    F(mu,nu) =  (1/4) sum_p (1/2) [ U_p(x) - U^dag_p(x) ]
00277 
00278    *  using basis of SPPROD and stores in a lower triangular matrix
00279    *  (no diagonal) plus real diagonal
00280 
00281    *  where
00282    *    U_1 = u(x,mu)*u(x+mu,nu)*u_dag(x+nu,mu)*u_dag(x,nu)
00283    *    U_2 = u(x,nu)*u_dag(x-mu+nu,mu)*u_dag(x-mu,nu)*u(x-mu,mu)
00284    *    U_3 = u_dag(x-mu,mu)*u_dag(x-mu-nu,nu)*u(x-mu-nu,mu)*u(x-nu,nu)
00285    *    U_4 = u_dag(x-nu,nu)*u(x-nu,mu)*u(x-nu+mu,nu)*u_dag(x,mu)
00286 
00287    *  and
00288 
00289    *         | sigF(1)   sigF(3)     0         0     |
00290    *  sigF = | sigF(5)  -sigF(1)     0         0     |
00291    *         |   0         0      -sigF(0)  -sigF(2) |
00292    *         |   0         0      -sigF(4)   sigF(0) |
00293    *  where
00294    *    sigF(i) is a color matrix
00295 
00296    *  sigF(0) = i*(ClovT*E_z + ClovR*B_z)
00297    *          = i*(ClovT*F(3,2) + ClovR*F(1,0))
00298    *  sigF(1) = i*(ClovT*E_z - ClovR*B_z)
00299    *          = i*(ClovT*F(3,2) - ClovR*F(1,0))
00300    *  sigF(2) = i*(E_+ + B_+)
00301    *  sigF(3) = i*(E_+ - B_+)
00302    *  sigF(4) = i*(E_- + B_-)
00303    *  sigF(5) = i*(E_- - B_-)
00304    *  i*E_+ = (i*ClovT*E_x - ClovT*E_y)
00305    *        = (i*ClovT*F(3,0) - ClovT*F(3,1))
00306    *  i*E_- = (i*ClovT*E_x + ClovT*E_y)
00307    *        = (i*ClovT*F(3,0) + ClovT*F(3,1))
00308    *  i*B_+ = (i*ClovR*B_x - ClovR*B_y)
00309    *        = (i*ClovR*F(2,1) + ClovR*F(2,0))
00310    *  i*B_- = (i*ClovR*B_x + ClovR*B_y)
00311    *        = (i*ClovR*F(2,1) - ClovR*F(2,0))
00312 
00313    *  NOTE: I am using  i*F  of the usual F defined by UKQCD, Heatlie et.al.
00314 
00315    *  NOTE: the above definitions assume that the time direction, t_dir,
00316    *        is 3. In general F(k,j) is multiplied with ClovT if either
00317    *        k=t_dir or j=t_dir, and with ClovR otherwise.
00318 
00319    *+++
00320    *  Here are some notes on the origin of this routine. NOTE, ClovCoeff or u0
00321    *  are not actually used in MAKCLOV.
00322    *
00323    *  The clover mass term is suppose to act on a vector like
00324    *
00325    *  chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
00326 
00327    *  Definitions used here (NOTE: no "i")
00328    *   sigma(mu,nu) = gamma(mu)*gamma(nu) - gamma(nu)*gamma(mu)
00329    *                = 2*gamma(mu)*gamma(nu)   for mu != nu
00330    *       
00331    *   chi = sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi   for mu < nu
00332    *       = (1/2) * sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi   for mu != nu
00333    *       = (1/4) * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)*psi
00334    *
00335    *
00336    * chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
00337    *     = psi - (ClovCoeff/u0^3) * kappa * chi
00338    *     == psi - kappa * chi
00339    *
00340    *  We have absorbed ClovCoeff/u0^3 into kappa. A u0 was previously absorbed into kappa
00341    *  for compatibility to ancient conventions. 
00342    *---
00343 
00344    * Arguments:
00345    *  \param f         field strength tensor F(cb,mu,nu)        (Read)
00346    *  \param diag_mass effective mass term                      (Read)
00347    */
00348   void BAGELCloverTerm::makeClov(const multi1d<LatticeColorMatrix>& f, const Real& diag_mass)
00349   {
00350     START_CODE();
00351 
00352     LatticeColorMatrix f0;
00353     LatticeColorMatrix f1;
00354     LatticeColorMatrix f2;
00355     LatticeColorMatrix f3;
00356     LatticeColorMatrix f4;
00357     LatticeColorMatrix f5;
00358 
00359     const int nodeSites = QDP::Layout::sitesOnNode();
00360 
00361     START_CODE();
00362   
00363     if ( Nd != 4 )
00364       QDP_error_exit("expecting Nd == 4", Nd);
00365   
00366     if ( Ns != 4 )
00367       QDP_error_exit("expecting Ns == 4", Ns);
00368   
00369     f0 = f[0] * getCloverCoeff(0,1);
00370     f1 = f[1] * getCloverCoeff(0,2);
00371     f2 = f[2] * getCloverCoeff(0,3);
00372     f3 = f[3] * getCloverCoeff(1,2);
00373     f4 = f[4] * getCloverCoeff(1,3);
00374     f5 = f[5] * getCloverCoeff(2,3);    
00375 
00376     /* Multiply in the appropriate clover coefficient */
00377     /*
00378       switch (param.anisoParam.t_dir)
00379       {
00380       case 1:
00381       f0 = f[0] * param.clovCoeffT;
00382       f1 = f[1] * param.clovCoeffR;
00383       f2 = f[2] * param.clovCoeffR;
00384       f3 = f[3] * param.clovCoeffT;
00385       f4 = f[4] * param.clovCoeffT;
00386       f5 = f[5] * param.clovCoeffR;
00387       break;
00388 
00389       case 2:
00390       f0 = f[0] * param.clovCoeffR;
00391       f1 = f[1] * param.clovCoeffT;
00392       f2 = f[2] * param.clovCoeffR;
00393       f3 = f[3] * param.clovCoeffT;
00394       f4 = f[4] * param.clovCoeffR;
00395       f5 = f[5] * param.clovCoeffT;
00396       break;
00397 
00398       case 3:
00399       f0 = f[0] * param.clovCoeffR;
00400       f1 = f[1] * param.clovCoeffR;
00401       f2 = f[2] * param.clovCoeffT;
00402       f3 = f[3] * param.clovCoeffR;
00403       f4 = f[4] * param.clovCoeffT;
00404       f5 = f[5] * param.clovCoeffT;
00405       break;
00406 
00407       default:
00408       QDPIO::cerr << __func__ << ": invalid time direction: t_dir= " 
00409       << param.anisoParam.t_dir << endl;
00410       QDP_abort(1);
00411       }
00412     */
00413 
00414     // These are now preallocated
00415     //tri_diag.resize(nodeSites);  // hold local lattice
00416     // tri_off_diag.resize(nodeSites);
00417 
00418     /*# Construct diagonal */
00419     for(int site = 0; site < nodeSites; ++site)
00420     {
00421 
00422       for(int jj = 0; jj < 2; jj++)
00423       {
00424         for(int ii = 0; ii < 2*Nc; ii++)
00425         {
00426           tri_diag[site][jj][ii] = diag_mass.elem().elem().elem();
00427         }
00428       }
00429     }
00430 
00431 
00432 
00433     /* The appropriate clover coeffients are already included in the
00434        field strengths F(mu,nu)! */
00435     for(int site = 0; site < nodeSites; ++site)
00436     {
00437       RComplex<REAL> E_minus;
00438       RComplex<REAL> B_minus;
00439       RComplex<REAL> ctmp_0;
00440       RComplex<REAL> ctmp_1;
00441       RScalar<REAL> rtmp_0;
00442       RScalar<REAL> rtmp_1;
00443   
00444       for(int i = 0; i < Nc; ++i)
00445       {
00446         /*# diag_L(i,0) = 1 - i*diag(E_z - B_z) */
00447         /*#             = 1 - i*diag(F(3,2) - F(1,0)) */
00448         ctmp_0 = f5.elem(site).elem().elem(i,i);
00449         ctmp_0 -= f0.elem(site).elem().elem(i,i);
00450         rtmp_0 = imag(ctmp_0);
00451         tri_diag[site][0][i] += rtmp_0;
00452 
00453         /*# diag_L(i+Nc,0) = 1 + i*diag(E_z - B_z) */
00454         /*#                = 1 + i*diag(F(3,2) - F(1,0)) */
00455         tri_diag[site][0][i+Nc] -= rtmp_0;
00456 
00457         /*# diag_L(i,1) = 1 + i*diag(E_z + B_z) */
00458         /*#             = 1 + i*diag(F(3,2) + F(1,0)) */
00459         ctmp_1 = f5.elem(site).elem().elem(i,i);
00460         ctmp_1 += f0.elem(site).elem().elem(i,i);
00461         rtmp_1 = imag(ctmp_1);
00462         tri_diag[site][1][i] -= rtmp_1;
00463 
00464         /*# diag_L(i+Nc,1) = 1 - i*diag(E_z + B_z) */
00465         /*#                = 1 - i*diag(F(3,2) + F(1,0)) */
00466         tri_diag[site][1][i+Nc] += rtmp_1;
00467       }
00468 
00469       /*# Construct lower triangular portion */
00470       /*# Block diagonal terms */
00471       for(int i = 1; i < Nc; ++i)
00472       {
00473         for(int j = 0; j < i; ++j)
00474         {
00475           int elem_ij  = i*(i-1)/2 + j;
00476           int elem_tmp = (i+Nc)*(i+Nc-1)/2 + j+Nc;
00477 
00478           /*# L(i,j,0) = -i*(E_z - B_z)[i,j] */
00479           /*#          = -i*(F(3,2) - F(1,0)) */
00480           ctmp_0 = f0.elem(site).elem().elem(i,j);
00481           ctmp_0 -= f5.elem(site).elem().elem(i,j);
00482           tri_off_diag[site][0][elem_ij] = timesI(ctmp_0);
00483 
00484           /*# L(i+Nc,j+Nc,0) = +i*(E_z - B_z)[i,j] */
00485           /*#                = +i*(F(3,2) - F(1,0)) */
00486           tri_off_diag[site][0][elem_tmp] = -tri_off_diag[site][0][elem_ij];
00487 
00488           /*# L(i,j,1) = i*(E_z + B_z)[i,j] */
00489           /*#          = i*(F(3,2) + F(1,0)) */
00490           ctmp_1 = f5.elem(site).elem().elem(i,j);
00491           ctmp_1 += f0.elem(site).elem().elem(i,j);
00492           tri_off_diag[site][1][elem_ij] = timesI(ctmp_1);
00493 
00494           /*# L(i+Nc,j+Nc,1) = -i*(E_z + B_z)[i,j] */
00495           /*#                = -i*(F(3,2) + F(1,0)) */
00496           tri_off_diag[site][1][elem_tmp] = -tri_off_diag[site][1][elem_ij];
00497         }
00498       }
00499       
00500       /*# Off-diagonal */
00501       for(int i = 0; i < Nc; ++i)
00502       {
00503         for(int j = 0; j < Nc; ++j)
00504         {
00505           // Flipped index
00506           // by swapping i <-> j. In the past i would run slow
00507           // and now j runs slow
00508           int elem_ij  = (i+Nc)*(i+Nc-1)/2 + j;
00509 
00510           /*# i*E_- = (i*E_x + E_y) */
00511           /*#       = (i*F(3,0) + F(3,1)) */
00512           E_minus = timesI(f2.elem(site).elem().elem(i,j));
00513           E_minus += f4.elem(site).elem().elem(i,j);
00514 
00515           /*# i*B_- = (i*B_x + B_y) */
00516           /*#       = (i*F(2,1) - F(2,0)) */
00517           B_minus = timesI(f3.elem(site).elem().elem(i,j));
00518           B_minus -= f1.elem(site).elem().elem(i,j);
00519 
00520           /*# L(i+Nc,j,0) = -i*(E_- - B_-)  */
00521           tri_off_diag[site][0][elem_ij] = B_minus - E_minus;
00522 
00523           /*# L(i+Nc,j,1) = +i*(E_- + B_-)  */
00524           tri_off_diag[site][1][elem_ij] = E_minus + B_minus;
00525         }
00526       }
00527         
00528 
00529     }
00530 
00531 
00532     END_CODE();
00533   }
00534   
00535 
00536   //! Invert
00537   /*!
00538    * Computes the inverse of the term on cb using Cholesky
00539    */
00540   void BAGELCloverTerm::choles(int cb)
00541   {
00542     START_CODE();
00543 
00544     // When you are doing the cholesky - also fill out the trace_log_diag piece)
00545     // chlclovms(tr_log_diag_, cb);
00546     ldagdlinv(tr_log_diag_,cb);
00547     
00548     END_CODE();
00549   }
00550 
00551 
00552   //! Invert
00553   /*!
00554    * Computes the inverse of the term on cb using Cholesky
00555    *
00556    * \return logarithm of the determinant  
00557    */
00558   Double BAGELCloverTerm::cholesDet(int cb) const
00559   {
00560     START_CODE();
00561 
00562     if( choles_done[cb] == false ) 
00563     {
00564       QDPIO::cout << "Error: YOu have not done the Cholesky.on this operator on this subset" << endl;
00565       QDPIO::cout << "You sure you shouldn't be asking invclov?" << endl;
00566       QDP_abort(1);
00567     }
00568 
00569     END_CODE();
00570 
00571     return sum(tr_log_diag_, rb[cb]);
00572   }    
00573 
00574    /*! An LDL^\dag decomposition and inversion? */
00575   void BAGELCloverTerm::ldagdlinv(LatticeReal& tr_log_diag, int cb)
00576   {
00577     START_CODE();
00578 
00579     if ( 2*Nc < 3 )
00580       QDP_error_exit("Matrix is too small", Nc, Ns);
00581 
00582     // Zero trace log
00583     tr_log_diag = zero;
00584     RScalar<REAL> zip=0;
00585 
00586     int N = 2*Nc;
00587 
00588     // Loop through the sites.
00589     for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
00590     {
00591       int site = rb[cb].siteTable()[ssite];
00592 
00593       int site_neg_logdet=0;
00594       // Loop through the blocks on the site.
00595       for(int block=0; block < 2; block++) { 
00596 
00597         // Triangular storage 
00598         // Big arrays to get good alignment...
00599 
00600         RScalar<REAL> inv_d[8] QDP_ALIGN16;
00601         RComplex<REAL> inv_offd[16] QDP_ALIGN16;
00602         RComplex<REAL> v[8] QDP_ALIGN16;
00603         RScalar<REAL>  diag_g[8] QDP_ALIGN16;
00604         // Algorithm 4.1.2 LDL^\dagger Decomposition
00605         // From Golub, van Loan 3rd ed, page 139
00606         for(int i=0; i < N; i++) { 
00607           inv_d[i] = tri_diag[site][block][i];
00608         }
00609         for(int i=0; i < 15; i++) { 
00610           inv_offd[i]  =tri_off_diag[site][block][i];
00611         }
00612 
00613         for(int j=0; j < N; ++j) { 
00614 
00615           // Compute v(0:j-1)
00616           //
00617           // for i=0:j-2
00618           //   v(i) = A(j,i) A(i,i)
00619           // end
00620 
00621 
00622           for(int i=0; i < j; i++) { 
00623             int elem_ji = j*(j-1)/2 + i;
00624 
00625             RComplex<REAL> A_ii = cmplx( inv_d[i], zip );
00626             v[i] = A_ii*adj(inv_offd[elem_ji]);
00627           }
00628           
00629           // v(j) = A(j,j) - A(j, 0:j-2) v(0:j-2)
00630           //                 ^ This is done with a loop over k ie:
00631           //
00632           // v(j) = A(j,j) - sum_k A*(j,k) v(k)     k=0...j-2
00633           //
00634           //      = A(j,j) - sum_k A*(j,k) A(j,k) A(k,k)
00635           //      = A(j,j) - sum_k | A(j,k) |^2 A(k,k)
00636  
00637           v[j] = cmplx(inv_d[j],zip);
00638 
00639           for(int k=0; k < j; k++) { 
00640             int elem_jk = j*(j-1)/2 + k;
00641             v[j] -= inv_offd[elem_jk]*v[k];
00642           }
00643           
00644 
00645           // At this point in time v[j] has to be real, since
00646           // A(j,j) is from diag ie real and all | A(j,k) |^2 is real
00647           // as is A(k,k)
00648           
00649           // A(j,j) is the diagonal element - so store it.
00650           inv_d[j] = real( v[j] );
00651 
00652           // Last line of algorithm:
00653           // A( j+1 : n, j) = ( A(j+1:n, j) - A(j+1:n, 1:j-1)v(1:k-1) ) / v(j)
00654           //
00655           // use k as first colon notation and l as second so
00656           // 
00657           // for k=j+1 < n-1
00658           //      A(k,j) = A(k,j) ;
00659           //      for l=0 < j-1
00660           //         A(k,j) -= A(k, l) v(l)
00661           //      end
00662           //      A(k,j) /= v(j);
00663           //
00664           for(int k=j+1; k < N; k++) { 
00665             int elem_kj = k*(k-1)/2 + j;
00666             for(int l=0; l < j; l++) { 
00667               int elem_kl = k*(k-1)/2 + l;
00668               inv_offd[elem_kj] -= inv_offd[elem_kl] * v[l];
00669             }
00670             inv_offd[elem_kj] /= v[j];
00671           }
00672         }
00673 
00674         // Now fix up the inverse
00675         RScalar<REAL> one;
00676         one.elem() = (REAL)1;
00677 
00678         for(int i=0; i < N; i++) { 
00679           diag_g[i] = one/inv_d[i];
00680         
00681           // Compute the trace log
00682           // NB we are always doing trace log | A | 
00683           // (because we are always working with actually A^\dagger A
00684           //  even in one flavour case where we square root)
00685           tr_log_diag.elem(site).elem().elem().elem() += log(fabs(inv_d[i].elem()));
00686           // However, it is worth counting just the no of negative logdets
00687           // on site
00688           if( inv_d[i].elem() < 0 ) { 
00689             site_neg_logdet++;
00690           }
00691         }
00692         // Now we need to invert the L D L^\dagger 
00693         // We can do this by solving:
00694         //
00695         //  L D L^\dagger M^{-1} = 1   
00696         //
00697         // This can be done by solving L D X = 1  (X = L^\dagger M^{-1})
00698         //
00699         // Then solving L^\dagger M^{-1} = X
00700         //
00701         // LD is lower diagonal and so X will also be lower diagonal.
00702         // LD X = 1 can be solved by forward substitution.
00703         //
00704         // Likewise L^\dagger is strictly upper triagonal and so
00705         // L^\dagger M^{-1} = X can be solved by forward substitution.
00706         RComplex<REAL> sum;
00707         for(int k = 0; k < N; ++k)
00708         {
00709           for(int i = 0; i < k; ++i) {
00710             zero_rep(v[i]);
00711           }
00712           
00713           /*# Forward substitution */
00714 
00715           // The first element is the inverse of the diagonal
00716           v[k] = cmplx(diag_g[k],zip);
00717       
00718           for(int i = k+1; i < N; ++i) {
00719             zero_rep(v[i]);
00720 
00721             for(int j = k; j < i; ++j) {
00722               int elem_ij = i*(i-1)/2+j;        
00723 
00724               // subtract l_ij*d_j*x_{kj}
00725               v[i] -= inv_offd[elem_ij] *inv_d[j]*v[j];
00726 
00727             }
00728         
00729             // scale out by 1/d_i
00730             v[i] *= diag_g[i];
00731           }
00732       
00733           /*# Backward substitution */
00734           // V[N-1] remains unchanged
00735           // Start from V[N-2]
00736 
00737           for(int i = N-2; (int)i >= (int)k; --i) {
00738             for(int j = i+1; j < N; ++j) {
00739               int elem_ji = j*(j-1)/2 + i;
00740               // Subtract terms of typ (l_ji)*x_kj
00741               v[i] -= adj(inv_offd[elem_ji]) * v[j];
00742             }
00743           }
00744 
00745           /*# Overwrite column k of invcl.offd */
00746           inv_d[k] = real(v[k]);
00747           for(int i = k+1; i < N; ++i)
00748           {
00749             int elem_ik = i*(i-1)/2+k;
00750             inv_offd[elem_ik] = v[i];
00751           }
00752         }
00753 
00754 
00755         // Overwrite original data
00756         for(int i=0; i < N; i++) { 
00757           tri_diag[site][block][i] = inv_d[i];
00758         }
00759         for(int i=0; i < 15; i++) { 
00760           tri_off_diag[site][block][i] = inv_offd[i];
00761         }
00762       }
00763 
00764       if( site_neg_logdet != 0 ) { 
00765         // Report if site had odd number of negative terms. (-ve def)
00766         std::cout << "WARNING: Odd number of negative terms in Clover DET (" 
00767                   << site_neg_logdet<< ") at site: " << site << endl;
00768       }
00769     }
00770     
00771     // This comes from the days when we used to do Cholesky
00772     choles_done[cb] = true;
00773     END_CODE();
00774   }
00775 
00776   
00777 
00778 
00779   /*! CHLCLOVMS - Cholesky decompose the clover mass term and uses it to
00780    *              compute  lower(A^-1) = lower((L.L^dag)^-1)
00781    *              Adapted from Golub and Van Loan, Matrix Computations, 2nd, Sec 4.2.4
00782    *
00783    * Arguments:
00784    *
00785    * \param DetP         flag whether to compute determinant (Read)
00786    * \param logdet       logarithm of the determinant        (Write)
00787    * \param cb           checkerboard of work                (Read)
00788    */
00789   void BAGELCloverTerm::chlclovms(LatticeReal& tr_log_diag, int cb)
00790   {
00791     START_CODE();
00792 
00793     if ( 2*Nc < 3 )
00794       QDP_error_exit("Matrix is too small", Nc, Ns);
00795   
00796     tr_log_diag = zero;
00797   
00798     int n = 2*Nc;
00799 
00800     /*# Cholesky decompose  A = L.L^dag */
00801     /*# NOTE!!: I can store this matrix in  invclov, but will need a */
00802     /*#   temporary  diag */
00803     for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
00804     {
00805       int site = rb[cb].siteTable()[ssite];
00806 
00807       PrimitiveClovDiag  invclov_diag;
00808       PrimitiveClovOffDiag  invclov_off_diag;
00809 
00810       multi1d< RScalar<REAL> > diag_g(n);
00811       multi1d< RComplex<REAL> > v1(n);
00812       RComplex<REAL> sum;
00813       RScalar<REAL> one;
00814       RScalar<REAL> zero;
00815       RScalar<REAL> lrtmp;
00816 
00817       one = 1;
00818       zero = 0;
00819   
00820       for(int s = 0; s < 2; ++s)
00821       {
00822         int elem_jk = 0;
00823         int elem_ij;
00824         
00825         for(int j = 0; j <  n; ++j)
00826         {
00827           /*# Multiply clover mass term against basis vector.  */
00828           /*# Actually, I need a column of the lower triang matrix clov. */
00829           v1[j] = cmplx(tri_diag[site][s][j],zero);
00830     
00831           elem_ij = elem_jk + 2*j;
00832       
00833           for(int i = j+1; i < n; ++i)
00834           {
00835             v1[i] = tri_off_diag[site][s][elem_ij];
00836             elem_ij += i;
00837           }
00838       
00839           /*# Back to cholesky */
00840           /*# forward substitute */
00841           for(int k = 0; k < j; ++k)
00842           {
00843             int elem_ik = elem_jk;
00844         
00845             for(int i = j; i < n; ++i)
00846             {
00847               v1[i] -= adj(invclov_off_diag[s][elem_jk]) * invclov_off_diag[s][elem_ik];
00848               elem_ik += i;
00849             }
00850             elem_jk++;
00851           }
00852 
00853           /*# The diagonal is (should be!!) real and positive */
00854           diag_g[j] = real(v1[j]);
00855           
00856           /*#+ */
00857           /*# Squeeze in computation of the trace log of the diagonal term */
00858           /*#- */
00859           if ( diag_g[j].elem() > 0 ) 
00860           { 
00861             lrtmp = log(diag_g[j]);
00862           }
00863           else
00864           { 
00865             // Make sure any node can print this message
00866             cerr << "Clover term has negative diagonal element: "
00867                  << "diag_g[" << j << "]= " << diag_g[j] 
00868                  << " at site: " << site << endl;
00869             QDP_abort(1);
00870           }
00871 
00872           tr_log_diag.elem(site).elem().elem() += lrtmp;
00873           
00874           diag_g[j] = sqrt(diag_g[j]);
00875           diag_g[j] = one / diag_g[j];
00876       
00877           /*# backward substitute */
00878           elem_ij = elem_jk + j;
00879           for(int i = j+1; i < n; ++i)
00880           {
00881             invclov_off_diag[s][elem_ij] = v1[i] * diag_g[j];
00882             elem_ij += i;
00883           }
00884         }
00885 
00886         /*# Use forward and back substitution to construct  invcl.offd = lower(A^-1) */
00887         for(int k = 0; k < n; ++k)
00888         {
00889           for(int i = 0; i < k; ++i)
00890             zero_rep(v1[i]);
00891           
00892           /*# Forward substitution */
00893           v1[k] = cmplx(diag_g[k],zero);
00894       
00895           for(int i = k+1; i < n; ++i)
00896           {
00897             zero_rep(sum);
00898             elem_ij = i*(i-1)/2+k;      
00899             for(int j = k; j < i; ++j)
00900             {
00901               sum -= invclov_off_diag[s][elem_ij] * v1[j];
00902               elem_ij++;
00903             }
00904         
00905             v1[i] = sum * diag_g[i];
00906           }
00907       
00908           /*# Backward substitution */
00909           v1[n-1] = v1[n-1] * diag_g[n-1];
00910      
00911           for(int i = n-2; (int)i >= (int)k; --i)
00912           {
00913             sum = v1[i];
00914             
00915             int elem_ji = ((i+1)*i)/2+i;
00916             for(int j = i+1; j < n; ++j)
00917             {
00918               sum -= adj(invclov_off_diag[s][elem_ji]) * v1[j];
00919               elem_ji += j;
00920             }
00921             v1[i] = sum * diag_g[i];
00922           }
00923 
00924           /*# Overwrite column k of invcl.offd */
00925           invclov_diag[s][k] = real(v1[k]);
00926 
00927           int elem_ik = ((k+1)*k)/2+k;
00928       
00929           for(int i = k+1; i < n; ++i)
00930           {
00931             invclov_off_diag[s][elem_ik] = v1[i];
00932             elem_ik += i;
00933           }
00934         }
00935       }
00936 
00937       // Overwrite original element
00938       for(int s=0; s < 2; s++) { 
00939         for(int i=0; i < 6; i++) { 
00940           tri_diag[site][s][i] = invclov_diag[s][i];
00941         }
00942         for(int i=0; i < 15; i++) {
00943           tri_off_diag[site][s][i] = invclov_off_diag[s][i];
00944         }
00945       }
00946     }
00947   
00948   
00949     choles_done[cb] = true;
00950     END_CODE();
00951   }
00952 
00953 
00954   /**
00955    * Apply a dslash
00956    *
00957    * Performs the operation
00958    *
00959    *  chi <-   (L + D + L^dag) . psi
00960    *
00961    * where
00962    *   L       is a lower triangular matrix
00963    *   D       is the real diagonal. (stored together in type TRIANG)
00964    *
00965    * Arguments:
00966    * \param chi     result                                      (Write)
00967    * \param psi     source                                      (Read)
00968    * \param isign   D'^dag or D'  ( MINUS | PLUS ) resp.        (Read)
00969    * \param cb      Checkerboard of OUTPUT vector               (Read) 
00970    */
00971   void BAGELCloverTerm::apply(LatticeFermion& chi, const LatticeFermion& psi, 
00972                             enum PlusMinus isign, int cb) const
00973   {
00974     START_CODE();
00975 
00976     if ( Ns != 4 )
00977       QDP_error_exit("code requires Ns == 4", Ns);
00978 
00979     int n = 2*Nc;
00980 
00981     
00982     if( rb[cb].hasOrderedRep() ) {
00983       // unsigned int start = rb[cb].start();
00984       // unsigned long n_sites = rb[cb].siteTable().size();
00985       int start = rb[cb].start();
00986       unsigned long n_sites=rb[cb].siteTable().size();
00987 #if 0
00988       //Testing code do only one site
00989       unsigned long n_sites =1;
00990 #endif
00991       // Need to unroll over sites, so instead of having : site, struct { diag, offdiag } 
00992       // we have site,diag,  and site, offdiag arrays
00993       //
00994 
00995       BAGELCloverFloat* chiptr = (BAGELCloverFloat *)&( chi.elem(start).elem(0).elem(0).real());
00996       const BAGELCloverFloat* psiptr = (const BAGELCloverFloat *)&(psi.elem(start).elem(0).elem(0).real());
00997       const BAGELCloverFloat* offd = (const BAGELCloverFloat *)&(tri_off_diag[start][0][0].real());
00998       const BAGELCloverFloat* diag = (const BAGELCloverFloat *)&(tri_diag[start][0][0].elem());
00999       bagel_clover(diag, offd, psiptr, chiptr, n_sites);
01000 
01001     }
01002     else {
01003       const multi1d<int>& tab = rb[cb].siteTable();
01004 
01005 
01006       for(int ssite=0; ssite < tab.size(); ++ssite) {
01007         
01008         int site = tab[ssite];
01009         unsigned long n_sites=1;
01010         // RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
01011         // const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
01012         
01013         BAGELCloverFloat* chiptr = (BAGELCloverFloat *)&( chi.elem(site).elem(0).elem(0).real());
01014         const BAGELCloverFloat* psiptr = (const BAGELCloverFloat *)&(psi.elem(site).elem(0).elem(0).real());
01015         const BAGELCloverFloat* offd = (const BAGELCloverFloat *)&(tri_off_diag[site][0][0].real());
01016         const BAGELCloverFloat* diag = (const BAGELCloverFloat *)&(tri_diag[site][0][0].elem());
01017         bagel_clover(diag, offd, psiptr, chiptr, n_sites);
01018         
01019       }
01020     }
01021 
01022     getFermBC().modifyF(chi, QDP::rb[cb]);
01023 
01024     END_CODE();
01025   }
01026 
01027   /**
01028    * Apply a dslash
01029    *
01030    * Performs the operation
01031    *
01032    *  chi <-   (L + D + L^dag) . psi
01033    *
01034    * where
01035    *   L       is a lower triangular matrix
01036    *   D       is the real diagonal. (stored together in type TRIANG)
01037    *
01038    * Arguments:
01039    * \param chi     result                                      (Write)
01040    * \param psi     source                                      (Read)
01041    * \param isign   D'^dag or D'  ( MINUS | PLUS ) resp.        (Read)
01042    * \param cb      Checkerboard of OUTPUT vector               (Read) 
01043    */
01044   void BAGELCloverTerm::applySite(LatticeFermion& chi, const LatticeFermion& psi, 
01045                             enum PlusMinus isign, int site) const
01046   {
01047     START_CODE();
01048 
01049     if ( Ns != 4 )
01050       QDP_error_exit("code requires Ns == 4", Ns);
01051 
01052     int n = 2*Nc;
01053 
01054     RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
01055     const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
01056 
01057 
01058     cchi[ 0] = tri_diag[site][0][ 0]  * ppsi[ 0]
01059       +   conj(tri_off_diag[site][0][ 0]) * ppsi[ 1]
01060       +   conj(tri_off_diag[site][0][ 1]) * ppsi[ 2]
01061       +   conj(tri_off_diag[site][0][ 3]) * ppsi[ 3]
01062       +   conj(tri_off_diag[site][0][ 6]) * ppsi[ 4]
01063       +   conj(tri_off_diag[site][0][10]) * ppsi[ 5];
01064     
01065     cchi[ 1] = tri_diag[site][0][ 1]  * ppsi[ 1]
01066       +        tri_off_diag[site][0][ 0]  * ppsi[ 0]
01067       +   conj(tri_off_diag[site][0][ 2]) * ppsi[ 2]
01068       +   conj(tri_off_diag[site][0][ 4]) * ppsi[ 3]
01069       +   conj(tri_off_diag[site][0][ 7]) * ppsi[ 4]
01070       +   conj(tri_off_diag[site][0][11]) * ppsi[ 5];
01071     
01072     cchi[ 2] = tri_diag[site][0][ 2]  * ppsi[ 2]
01073       +        tri_off_diag[site][0][ 1]  * ppsi[ 0]
01074       +        tri_off_diag[site][0][ 2]  * ppsi[ 1]
01075       +   conj(tri_off_diag[site][0][ 5]) * ppsi[ 3]
01076       +   conj(tri_off_diag[site][0][ 8]) * ppsi[ 4]
01077       +   conj(tri_off_diag[site][0][12]) * ppsi[ 5];
01078     
01079     cchi[ 3] = tri_diag[site][0][ 3]  * ppsi[ 3]
01080       +        tri_off_diag[site][0][ 3]  * ppsi[ 0]
01081       +        tri_off_diag[site][0][ 4]  * ppsi[ 1]
01082       +        tri_off_diag[site][0][ 5]  * ppsi[ 2]
01083       +   conj(tri_off_diag[site][0][ 9]) * ppsi[ 4]
01084       +   conj(tri_off_diag[site][0][13]) * ppsi[ 5];
01085     
01086     cchi[ 4] = tri_diag[site][0][ 4]  * ppsi[ 4]
01087       +        tri_off_diag[site][0][ 6]  * ppsi[ 0]
01088       +        tri_off_diag[site][0][ 7]  * ppsi[ 1]
01089       +        tri_off_diag[site][0][ 8]  * ppsi[ 2]
01090       +        tri_off_diag[site][0][ 9]  * ppsi[ 3]
01091       +   conj(tri_off_diag[site][0][14]) * ppsi[ 5];
01092     
01093     cchi[ 5] = tri_diag[site][0][ 5]  * ppsi[ 5]
01094       +        tri_off_diag[site][0][10]  * ppsi[ 0]
01095       +        tri_off_diag[site][0][11]  * ppsi[ 1]
01096       +        tri_off_diag[site][0][12]  * ppsi[ 2]
01097       +        tri_off_diag[site][0][13]  * ppsi[ 3]
01098       +        tri_off_diag[site][0][14]  * ppsi[ 4];
01099     
01100     cchi[ 6] = tri_diag[site][1][ 0]  * ppsi[ 6]
01101       +   conj(tri_off_diag[site][1][ 0]) * ppsi[ 7]
01102       +   conj(tri_off_diag[site][1][ 1]) * ppsi[ 8]
01103       +   conj(tri_off_diag[site][1][ 3]) * ppsi[ 9]
01104       +   conj(tri_off_diag[site][1][ 6]) * ppsi[10]
01105       +   conj(tri_off_diag[site][1][10]) * ppsi[11];
01106     
01107     cchi[ 7] = tri_diag[site][1][ 1]  * ppsi[ 7]
01108       +        tri_off_diag[site][1][ 0]  * ppsi[ 6]
01109       +   conj(tri_off_diag[site][1][ 2]) * ppsi[ 8]
01110       +   conj(tri_off_diag[site][1][ 4]) * ppsi[ 9]
01111       +   conj(tri_off_diag[site][1][ 7]) * ppsi[10]
01112       +   conj(tri_off_diag[site][1][11]) * ppsi[11];
01113     
01114     cchi[ 8] = tri_diag[site][1][ 2]  * ppsi[ 8]
01115       +        tri_off_diag[site][1][ 1]  * ppsi[ 6]
01116       +        tri_off_diag[site][1][ 2]  * ppsi[ 7]
01117       +   conj(tri_off_diag[site][1][ 5]) * ppsi[ 9]
01118       +   conj(tri_off_diag[site][1][ 8]) * ppsi[10]
01119       +   conj(tri_off_diag[site][1][12]) * ppsi[11];
01120     
01121     cchi[ 9] = tri_diag[site][1][ 3]  * ppsi[ 9]
01122       +        tri_off_diag[site][1][ 3]  * ppsi[ 6]
01123       +        tri_off_diag[site][1][ 4]  * ppsi[ 7]
01124       +        tri_off_diag[site][1][ 5]  * ppsi[ 8]
01125       +   conj(tri_off_diag[site][1][ 9]) * ppsi[10]
01126       +   conj(tri_off_diag[site][1][13]) * ppsi[11];
01127     
01128     cchi[10] = tri_diag[site][1][ 4]  * ppsi[10]
01129       +        tri_off_diag[site][1][ 6]  * ppsi[ 6]
01130       +        tri_off_diag[site][1][ 7]  * ppsi[ 7]
01131       +        tri_off_diag[site][1][ 8]  * ppsi[ 8]
01132       +        tri_off_diag[site][1][ 9]  * ppsi[ 9]
01133       +   conj(tri_off_diag[site][1][14]) * ppsi[11];
01134     
01135     cchi[11] = tri_diag[site][1][ 5]  * ppsi[11]
01136       +        tri_off_diag[site][1][10]  * ppsi[ 6]
01137       +        tri_off_diag[site][1][11]  * ppsi[ 7]
01138       +        tri_off_diag[site][1][12]  * ppsi[ 8]
01139       +        tri_off_diag[site][1][13]  * ppsi[ 9]
01140       +        tri_off_diag[site][1][14]  * ppsi[10];
01141 
01142 
01143     END_CODE();
01144   }
01145 
01146 
01147 
01148 
01149   //! TRIACNTR 
01150   /*! 
01151    * \ingroup linop
01152    *
01153    *  Calculates
01154    *     Tr_D ( Gamma_mat L )
01155    *
01156    * This routine is specific to Wilson fermions!
01157    * 
01158    *  the trace over the Dirac indices for one of the 16 Gamma matrices
01159    *  and a hermitian color x spin matrix A, stored as a block diagonal
01160    *  complex lower triangular matrix L and a real diagonal diag_L.
01161 
01162    *  Here 0 <= mat <= 15 and
01163    *  if mat = mat_1 + mat_2 * 2 + mat_3 * 4 + mat_4 * 8
01164    *
01165    *  Gamma(mat) = gamma(1)^(mat_1) * gamma(2)^(mat_2) * gamma(3)^(mat_3)
01166    *             * gamma(4)^(mat_4)
01167    *
01168    *  Further, in basis for the Gamma matrices used, A is of the form
01169    *
01170    *      | A_0 |  0  |
01171    *  A = | --------- |
01172    *      |  0  | A_1 |
01173    *
01174    *
01175    * Arguments:
01176    *
01177    *  \param B         the resulting SU(N) color matrix   (Write) 
01178    *  \param clov      clover term                        (Read) 
01179    *  \param mat       label of the Gamma matrix          (Read)
01180    */
01181   void BAGELCloverTerm::triacntr(LatticeColorMatrix& B, int mat, int cb) const
01182   {
01183     START_CODE();
01184 
01185     B = zero;
01186 
01187     if ( mat < 0  ||  mat > 15 )
01188     {
01189       QDPIO::cerr << __func__ << ": Gamma out of range: mat = " << mat << endl;
01190       QDP_abort(1);
01191     }
01192   
01193     switch( mat )
01194     {
01195     case 0:
01196       /*# gamma(   0)   1  0  0  0            # ( 0000 )  --> 0 */
01197       /*#               0  1  0  0 */
01198       /*#               0  0  1  0 */
01199       /*#               0  0  0  1 */
01200       /*# From diagonal part */
01201       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01202       {
01203         int site = rb[cb].siteTable()[ssite];
01204 
01205         RComplex<REAL> lctmp0;
01206         RScalar<REAL> lr_zero0;
01207         RScalar<REAL> lrtmp0;
01208   
01209         lr_zero0 = 0;
01210   
01211         for(int i0 = 0; i0 < Nc; ++i0)
01212         {
01213           lrtmp0 = tri_diag[site][0][i0];
01214           lrtmp0 += tri_diag[site][0][i0+Nc];
01215           lrtmp0 += tri_diag[site][1][i0];
01216           lrtmp0 += tri_diag[site][1][i0+Nc];
01217           B.elem(site).elem().elem(i0,i0) = cmplx(lrtmp0,lr_zero0);
01218         }
01219 
01220         /*# From lower triangular portion */
01221         int elem_ij0 = 0;
01222         for(int i0 = 1; i0 < Nc; ++i0)
01223         {
01224           int elem_ijb0 = (i0+Nc)*(i0+Nc-1)/2 + Nc;
01225 
01226           for(int j0 = 0; j0 < i0; ++j0)
01227           {
01228             lctmp0 = tri_off_diag[site][0][elem_ij0];
01229             lctmp0 += tri_off_diag[site][0][elem_ijb0];
01230             lctmp0 += tri_off_diag[site][1][elem_ij0];
01231             lctmp0 += tri_off_diag[site][1][elem_ijb0];
01232 
01233             B.elem(site).elem().elem(j0,i0) = lctmp0;
01234             B.elem(site).elem().elem(i0,j0) = adj(lctmp0);
01235 
01236 
01237             elem_ij0++;
01238             elem_ijb0++;
01239           }
01240         }
01241       }
01242       break;
01243 
01244     case 3:
01245       /*# gamma(  12)  -i  0  0  0            # ( 0011 )  --> 3 */
01246       /*#               0  i  0  0 */
01247       /*#               0  0 -i  0 */
01248       /*#               0  0  0  i */
01249       /*# From diagonal part */
01250       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01251       {
01252         int site = rb[cb].siteTable()[ssite];
01253 
01254         RComplex<REAL> lctmp3;
01255         RScalar<REAL> lr_zero3;
01256         RScalar<REAL> lrtmp3;
01257                           
01258         lr_zero3 = 0;
01259   
01260         for(int i3 = 0; i3 < Nc; ++i3)
01261         {
01262           lrtmp3 = tri_diag[site][0][i3+Nc];
01263           lrtmp3 -= tri_diag[site][0][i3];
01264           lrtmp3 -= tri_diag[site][1][i3];
01265           lrtmp3 += tri_diag[site][1][i3+Nc];
01266           B.elem(site).elem().elem(i3,i3) = cmplx(lr_zero3,lrtmp3);
01267         }
01268         
01269         /*# From lower triangular portion */
01270         int elem_ij3 = 0;
01271         for(int i3 = 1; i3 < Nc; ++i3)
01272         {
01273           int elem_ijb3 = (i3+Nc)*(i3+Nc-1)/2 + Nc;
01274 
01275           for(int j3 = 0; j3 < i3; ++j3)
01276           {
01277             lctmp3 = tri_off_diag[site][0][elem_ijb3];
01278             lctmp3 -= tri_off_diag[site][0][elem_ij3];
01279             lctmp3 -= tri_off_diag[site][1][elem_ij3];
01280             lctmp3 += tri_off_diag[site][1][elem_ijb3];
01281 
01282             B.elem(site).elem().elem(j3,i3) = timesI(adj(lctmp3));
01283             B.elem(site).elem().elem(i3,j3) = timesI(lctmp3);
01284             
01285             elem_ij3++;
01286             elem_ijb3++;
01287           }
01288         }
01289       }
01290       break;
01291 
01292     case 5:
01293       /*# gamma(  13)   0 -1  0  0            # ( 0101 )  --> 5 */
01294       /*#               1  0  0  0 */
01295       /*#               0  0  0 -1 */
01296       /*#               0  0  1  0 */
01297       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01298       {
01299         int site = rb[cb].siteTable()[ssite];
01300 
01301         RComplex<REAL> lctmp5;
01302         RScalar<REAL> lrtmp5;
01303                           
01304         for(int i5 = 0; i5 < Nc; ++i5)
01305         {
01306           int elem_ij5 = (i5+Nc)*(i5+Nc-1)/2;
01307 
01308           for(int j5 = 0; j5 < Nc; ++j5)
01309           {
01310             int elem_ji5 = (j5+Nc)*(j5+Nc-1)/2 + i5;
01311 
01312           
01313             lctmp5 = adj(tri_off_diag[site][0][elem_ji5]);
01314             lctmp5 -= tri_off_diag[site][0][elem_ij5];
01315             lctmp5 += adj(tri_off_diag[site][1][elem_ji5]);
01316             lctmp5 -= tri_off_diag[site][1][elem_ij5];
01317 
01318 
01319             B.elem(site).elem().elem(i5,j5) = lctmp5;
01320 
01321             elem_ij5++;
01322           }
01323         }
01324       }
01325       break;
01326 
01327     case 6:
01328       /*# gamma(  23)   0 -i  0  0            # ( 0110 )  --> 6 */
01329       /*#              -i  0  0  0 */
01330       /*#               0  0  0 -i */
01331       /*#               0  0 -i  0 */
01332       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01333       {
01334         int site = rb[cb].siteTable()[ssite];
01335 
01336         RComplex<REAL> lctmp6;
01337         RScalar<REAL> lrtmp6;
01338                           
01339         for(int i6 = 0; i6 < Nc; ++i6)
01340         {
01341           int elem_ij6 = (i6+Nc)*(i6+Nc-1)/2;
01342 
01343           for(int j6 = 0; j6 < Nc; ++j6)
01344           {
01345             int elem_ji6 = (j6+Nc)*(j6+Nc-1)/2 + i6;
01346 
01347             lctmp6 = adj(tri_off_diag[site][0][elem_ji6]);
01348             lctmp6 += tri_off_diag[site][0][elem_ij6];
01349             lctmp6 += adj(tri_off_diag[site][1][elem_ji6]);
01350             lctmp6 += tri_off_diag[site][1][elem_ij6];
01351 
01352             B.elem(site).elem().elem(i6,j6) = timesMinusI(lctmp6);
01353 
01354             elem_ij6++;
01355           }
01356         }
01357       }
01358       break;
01359 
01360     case 9:
01361       /*# gamma(  14)   0  i  0  0            # ( 1001 )  --> 9 */
01362       /*#               i  0  0  0 */
01363       /*#               0  0  0 -i */
01364       /*#               0  0 -i  0 */
01365       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01366       {
01367         int site = rb[cb].siteTable()[ssite];
01368 
01369         RComplex<REAL> lctmp9;
01370         RScalar<REAL> lrtmp9;
01371                           
01372         for(int i9 = 0; i9 < Nc; ++i9)
01373         {
01374           int elem_ij9 = (i9+Nc)*(i9+Nc-1)/2;
01375 
01376           for(int j9 = 0; j9 < Nc; ++j9)
01377           {
01378             int elem_ji9 = (j9+Nc)*(j9+Nc-1)/2 + i9;
01379 
01380             lctmp9 = adj(tri_off_diag[site][0][elem_ji9]);
01381             lctmp9 += tri_off_diag[site][0][elem_ij9];
01382             lctmp9 -= adj(tri_off_diag[site][1][elem_ji9]);
01383             lctmp9 -= tri_off_diag[site][1][elem_ij9];
01384 
01385             B.elem(site).elem().elem(i9,j9) = timesI(lctmp9);
01386 
01387             elem_ij9++;
01388           }
01389         }
01390       }
01391       break;
01392 
01393     case 10:
01394       /*# gamma(  24)   0 -1  0  0            # ( 1010 )  --> 10 */
01395       /*#               1  0  0  0 */
01396       /*#               0  0  0  1 */
01397       /*#               0  0 -1  0 */
01398       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01399       {
01400         int site = rb[cb].siteTable()[ssite];
01401 
01402         RComplex<REAL> lctmp10;
01403         RScalar<REAL> lrtmp10;
01404                           
01405         for(int i10 = 0; i10 < Nc; ++i10)
01406         {
01407           int elem_ij10 = (i10+Nc)*(i10+Nc-1)/2;
01408         
01409           for(int j10 = 0; j10 < Nc; ++j10)
01410           {
01411             int elem_ji10 = (j10+Nc)*(j10+Nc-1)/2 + i10;
01412 
01413             lctmp10 = adj(tri_off_diag[site][0][elem_ji10]);
01414             lctmp10 -= tri_off_diag[site][0][elem_ij10];
01415             lctmp10 -= adj(tri_off_diag[site][1][elem_ji10]);
01416             lctmp10 += tri_off_diag[site][1][elem_ij10];
01417 
01418             B.elem(site).elem().elem(i10,j10) = lctmp10;
01419 
01420             elem_ij10++;
01421           }
01422         }
01423       }
01424       break;
01425     
01426     case 12:
01427       /*# gamma(  34)   i  0  0  0            # ( 1100 )  --> 12 */
01428       /*#               0 -i  0  0 */
01429       /*#               0  0 -i  0 */
01430       /*#               0  0  0  i */
01431       /*# From diagonal part */
01432       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) 
01433       {
01434         int site = rb[cb].siteTable()[ssite];
01435 
01436         RComplex<REAL> lctmp12;
01437         RScalar<REAL> lr_zero12;
01438         RScalar<REAL> lrtmp12;
01439                           
01440         lr_zero12 = 0;
01441   
01442         for(int i12 = 0; i12 < Nc; ++i12)
01443         {
01444           lrtmp12 = tri_diag[site][0][i12];
01445           lrtmp12 -= tri_diag[site][0][i12+Nc];
01446           lrtmp12 -= tri_diag[site][1][i12];
01447           lrtmp12 += tri_diag[site][1][i12+Nc];
01448           B.elem(site).elem().elem(i12,i12) = cmplx(lr_zero12,lrtmp12);
01449         }
01450     
01451         /*# From lower triangular portion */
01452         int elem_ij12 = 0;
01453         for(int i12 = 1; i12 < Nc; ++i12)
01454         {
01455           int elem_ijb12 = (i12+Nc)*(i12+Nc-1)/2 + Nc;
01456 
01457           for(int j12 = 0; j12 < i12; ++j12)
01458           {
01459             lctmp12 = tri_off_diag[site][0][elem_ij12];
01460             lctmp12 -= tri_off_diag[site][0][elem_ijb12];
01461             lctmp12 -= tri_off_diag[site][1][elem_ij12];
01462             lctmp12 += tri_off_diag[site][1][elem_ijb12];
01463         
01464             B.elem(site).elem().elem(i12,j12) = timesI(lctmp12);
01465             B.elem(site).elem().elem(j12,i12) = timesI(adj(lctmp12));
01466         
01467             elem_ij12++;
01468             elem_ijb12++;
01469           }
01470         }
01471       }
01472       break;
01473     
01474     default:
01475     {
01476       B = zero;
01477       QDPIO::cout << "BAD DEFAULT CASE HIT" << endl;
01478     }
01479     }
01480   
01481 
01482     END_CODE();
01483   }
01484 
01485   //! Returns the appropriate clover coefficient for indices mu and nu
01486   Real BAGELCloverTerm::getCloverCoeff(int mu, int nu) const 
01487   { 
01488     START_CODE();
01489 
01490     if( param.anisoParam.anisoP ) 
01491     { 
01492       if (mu==param.anisoParam.t_dir || nu == param.anisoParam.t_dir) { 
01493         return param.clovCoeffT;
01494       }
01495       else { 
01496         // Otherwise return the spatial coeff
01497         return param.clovCoeffR;
01498       }
01499     }
01500     else { 
01501       // If there is no anisotropy just return the spatial one, it will
01502       // be the same as the temporal one
01503       return param.clovCoeffR; 
01504     } 
01505     
01506     END_CODE();
01507   }
01508 
01509 }
01510 

Generated on Sun Nov 22 04:28:57 2009 for CHROMA by  doxygen 1.4.7