clover_term_ssed.cc

Go to the documentation of this file.
00001 // $Id: clover_term_ssed.cc,v 1.5 2009/02/06 15:53:33 bjoo Exp $
00002 /*! \file
00003  *  \brief Clover term linear operator
00004  *
00005  *  This particular implementation is specific to a scalar-like
00006  *  architecture
00007  */
00008 
00009 #include "chromabase.h"
00010 #include "actions/ferm/linop/clover_term_ssed.h"
00011 #include "meas/glue/mesfield.h"
00012 
00013 
00014 namespace Chroma 
00015 { 
00016   //extern void ssed_clover_apply(REAL64* diag, REAL64* offd, REAL64* psiptr, REAL64* chiptr, int n_sites);
00017 
00018   // Empty constructor. Must use create later
00019   SSEDCloverTerm::SSEDCloverTerm() {
00020     bool retry = false;
00021 
00022     // Allocate the arrays, with proper alignment
00023 
00024     try { 
00025       tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::FAST );
00026     }
00027     catch( std::bad_alloc ) { 
00028       retry = true;
00029     }
00030 
00031     if( retry ) { 
00032       try { 
00033         tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::DEFAULT );
00034       }
00035       catch( std::bad_alloc ) { 
00036         QDPIO::cerr << "Failed to allocate the tri_diag" << endl << flush ;
00037         QDP_abort(1);
00038       }
00039     }
00040 
00041 
00042     retry = false;
00043     try { 
00044       tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::FAST );
00045     }
00046     catch( std::bad_alloc ) { 
00047       retry = true;
00048     }
00049 
00050     if( retry ) { 
00051       try { 
00052         tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::DEFAULT );
00053       }
00054       catch( std::bad_alloc ) { 
00055         QDPIO::cerr << "Failed to allocate the tri_off_diag" << endl << flush ;
00056         QDP_abort(1);
00057       }
00058     }
00059 
00060 
00061   }
00062 
00063 
00064   SSEDCloverTerm::~SSEDCloverTerm() {
00065     if ( tri_diag != 0x0 ) { 
00066       QDP::Allocator::theQDPAllocator::Instance().free(tri_diag);
00067     }
00068     
00069     if ( tri_off_diag != 0x0 ) {        
00070       QDP::Allocator::theQDPAllocator::Instance().free(tri_off_diag);
00071     }   
00072   }
00073 
00074   //! Creation routine
00075   void SSEDCloverTerm::create(Handle< FermState<T,P,Q> > fs,
00076                                const CloverFermActParams& param_)
00077   {
00078     START_CODE();
00079     u = fs->getLinks();
00080     fbc = fs->getFermBC();
00081     param = param_;
00082 
00083     // Sanity check
00084     if (fbc.operator->() == 0)
00085     {
00086       QDPIO::cerr << "SSEDCloverTerm: error: fbc is null" << endl;
00087       QDP_abort(1);
00088     }
00089 
00090     {
00091       Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
00092       param.clovCoeffR *= Real(0.5) * ff;
00093       param.clovCoeffT *= Real(0.5);
00094     }
00095 
00096     //
00097     // Yuk. Some bits of knowledge of the dslash term are buried in the 
00098     // effective mass term. They show up here. If I wanted some more 
00099     // complicated dslash then this will have to be fixed/adjusted.
00100     //
00101     Real diag_mass;
00102     {
00103       Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
00104       diag_mass = 1 + (Nd-1)*ff + param.Mass;
00105     }
00106 
00107     /* Calculate F(mu,nu) */
00108     multi1d<LatticeColorMatrix> f;
00109     mesField(f, u);
00110     makeClov(f, diag_mass);
00111     
00112     choles_done.resize(rb.numSubsets());
00113     for(int i=0; i < rb.numSubsets(); i++) {
00114       choles_done[i] = false;
00115     }
00116 
00117 #if 0
00118     // Testing code
00119     LatticeFermion foo;
00120     LatticeFermion res1=zero;
00121     LatticeFermion res2=zero;
00122     gaussian(foo);
00123     apply(res1,foo,PLUS, 0);
00124     applySite(res2,foo,PLUS, 0);
00125 
00126     LatticeFermion diff=res1-res2;
00127     XMLFileWriter fred("fred");
00128     push(fred, "stuff");
00129     write(fred, "diff", diff);
00130     pop(fred);
00131 
00132     QDPIO::cout << "sqrt( norm2( diff))= " << sqrt(norm2(diff)) << endl << flush;
00133     QDP_abort(1);
00134 #endif
00135 
00136     END_CODE();
00137   }
00138 
00139     // Now copy
00140   void SSEDCloverTerm::create(Handle< FermState<T,P,Q> > fs,
00141                              const CloverFermActParams& param_,
00142                              const SSEDCloverTerm& from_)
00143   {
00144     START_CODE();
00145 
00146     const SSEDCloverTerm& from = dynamic_cast<const SSEDCloverTerm&>(from_);
00147     u = fs->getLinks();
00148     fbc = fs->getFermBC();
00149     param = param_;
00150     
00151     // Sanity check
00152     if (fbc.operator->() == 0) {
00153       QDPIO::cerr << "SSEDCloverTerm: error: fbc is null" << endl;
00154       QDP_abort(1);
00155     }
00156     
00157     {
00158       Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
00159       param.clovCoeffR *= Real(0.5) * ff;
00160       param.clovCoeffT *= Real(0.5);
00161     }
00162     
00163     //
00164     // Yuk. Some bits of knowledge of the dslash term are buried in the 
00165     // effective mass term. They show up here. If I wanted some more 
00166     // complicated dslash then this will have to be fixed/adjusted.
00167     //
00168     Real diag_mass;
00169     {
00170       Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
00171       diag_mass = 1 + (Nd-1)*ff + param.Mass;
00172     }
00173     
00174     
00175     /* Calculate F(mu,nu) */
00176     //multi1d<LatticeColorMatrix> f;
00177     //mesField(f, u);
00178     //makeClov(f, diag_mass);
00179     
00180     choles_done.resize(rb.numSubsets());
00181     for(int i=0; i < rb.numSubsets(); i++) {
00182       choles_done[i] = from.choles_done[i];
00183     }
00184     
00185     tr_log_diag_ = from.tr_log_diag_;
00186     
00187     // Should be allocated by constructor.
00188     // tri_diag.resize(from.tri_diag.size());
00189     // tri_off_diag.resize(from.tri_off_diag.size());
00190 
00191     // This I claim is so performance uncritical, that
00192     // for now I won't parallelize it...
00193     for(int site=0; site < Layout::sitesOnNode(); site++) {
00194       for(int block = 0; block < 2; block++) { 
00195         for(int comp=0; comp< 6; comp++) { 
00196           tri_diag[site][block][comp] = from.tri_diag[site][block][comp];
00197         }
00198         for(int comp=0; comp < 15; comp++) { 
00199           tri_off_diag[site][block][comp] = from.tri_off_diag[site][block][comp];
00200         }
00201       }
00202     }
00203                             
00204     END_CODE();
00205   }
00206 
00207 
00208   /*
00209    * MAKCLOV 
00210    *
00211    *  In this routine, MAKCLOV calculates
00212 
00213    *    1 - (1/4)*sigma(mu,nu) F(mu,nu)
00214 
00215    *  using F from mesfield
00216 
00217    *    F(mu,nu) =  (1/4) sum_p (1/2) [ U_p(x) - U^dag_p(x) ]
00218 
00219    *  using basis of SPPROD and stores in a lower triangular matrix
00220    *  (no diagonal) plus real diagonal
00221 
00222    *  where
00223    *    U_1 = u(x,mu)*u(x+mu,nu)*u_dag(x+nu,mu)*u_dag(x,nu)
00224    *    U_2 = u(x,nu)*u_dag(x-mu+nu,mu)*u_dag(x-mu,nu)*u(x-mu,mu)
00225    *    U_3 = u_dag(x-mu,mu)*u_dag(x-mu-nu,nu)*u(x-mu-nu,mu)*u(x-nu,nu)
00226    *    U_4 = u_dag(x-nu,nu)*u(x-nu,mu)*u(x-nu+mu,nu)*u_dag(x,mu)
00227 
00228    *  and
00229 
00230    *         | sigF(1)   sigF(3)     0         0     |
00231    *  sigF = | sigF(5)  -sigF(1)     0         0     |
00232    *         |   0         0      -sigF(0)  -sigF(2) |
00233    *         |   0         0      -sigF(4)   sigF(0) |
00234    *  where
00235    *    sigF(i) is a color matrix
00236 
00237    *  sigF(0) = i*(ClovT*E_z + ClovR*B_z)
00238    *          = i*(ClovT*F(3,2) + ClovR*F(1,0))
00239    *  sigF(1) = i*(ClovT*E_z - ClovR*B_z)
00240    *          = i*(ClovT*F(3,2) - ClovR*F(1,0))
00241    *  sigF(2) = i*(E_+ + B_+)
00242    *  sigF(3) = i*(E_+ - B_+)
00243    *  sigF(4) = i*(E_- + B_-)
00244    *  sigF(5) = i*(E_- - B_-)
00245    *  i*E_+ = (i*ClovT*E_x - ClovT*E_y)
00246    *        = (i*ClovT*F(3,0) - ClovT*F(3,1))
00247    *  i*E_- = (i*ClovT*E_x + ClovT*E_y)
00248    *        = (i*ClovT*F(3,0) + ClovT*F(3,1))
00249    *  i*B_+ = (i*ClovR*B_x - ClovR*B_y)
00250    *        = (i*ClovR*F(2,1) + ClovR*F(2,0))
00251    *  i*B_- = (i*ClovR*B_x + ClovR*B_y)
00252    *        = (i*ClovR*F(2,1) - ClovR*F(2,0))
00253 
00254    *  NOTE: I am using  i*F  of the usual F defined by UKQCD, Heatlie et.al.
00255 
00256    *  NOTE: the above definitions assume that the time direction, t_dir,
00257    *        is 3. In general F(k,j) is multiplied with ClovT if either
00258    *        k=t_dir or j=t_dir, and with ClovR otherwise.
00259 
00260    *+++
00261    *  Here are some notes on the origin of this routine. NOTE, ClovCoeff or u0
00262    *  are not actually used in MAKCLOV.
00263    *
00264    *  The clover mass term is suppose to act on a vector like
00265    *
00266    *  chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
00267 
00268    *  Definitions used here (NOTE: no "i")
00269    *   sigma(mu,nu) = gamma(mu)*gamma(nu) - gamma(nu)*gamma(mu)
00270    *                = 2*gamma(mu)*gamma(nu)   for mu != nu
00271    *       
00272    *   chi = sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi   for mu < nu
00273    *       = (1/2) * sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi   for mu != nu
00274    *       = (1/4) * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)*psi
00275    *
00276    *
00277    * chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
00278    *     = psi - (ClovCoeff/u0^3) * kappa * chi
00279    *     == psi - kappa * chi
00280    *
00281    *  We have absorbed ClovCoeff/u0^3 into kappa. A u0 was previously absorbed into kappa
00282    *  for compatibility to ancient conventions. 
00283    *---
00284 
00285    * Arguments:
00286    *  \param f         field strength tensor F(cb,mu,nu)        (Read)
00287    *  \param diag_mass effective mass term                      (Read)
00288    */
00289 
00290   namespace SSEDCloverEnv { 
00291     struct MakeClovArgs  {
00292       LatticeColorMatrix& f0;
00293       LatticeColorMatrix& f1;
00294       LatticeColorMatrix& f2;
00295       LatticeColorMatrix& f3;
00296       LatticeColorMatrix& f4;
00297       LatticeColorMatrix& f5;
00298       PrimitiveClovDiag* tri_diag;
00299       PrimitiveClovOffDiag* tri_off_diag;
00300       const Real& diag_mass;
00301     };
00302 
00303     inline
00304     void makeClovSiteLoop(int lo, int hi, int myId, MakeClovArgs* a)
00305     {
00306       LatticeColorMatrix& f0=a->f0;
00307       LatticeColorMatrix& f1=a->f1;
00308       LatticeColorMatrix& f2=a->f2;
00309       LatticeColorMatrix& f3=a->f3;
00310       LatticeColorMatrix& f4=a->f4;
00311       LatticeColorMatrix& f5=a->f5;
00312       PrimitiveClovDiag* tri_diag=a->tri_diag;
00313       PrimitiveClovOffDiag* tri_off_diag=a->tri_off_diag;
00314       const Real& diag_mass = a->diag_mass;
00315 
00316       for(int site = lo; site < hi; ++site) {
00317 
00318         for(int jj = 0; jj < 2; jj++) {
00319 
00320           for(int ii = 0; ii < 2*Nc; ii++) {
00321 
00322             tri_diag[site][jj][ii] = diag_mass.elem().elem().elem();
00323           }
00324         }
00325         
00326         
00327         /* The appropriate clover coeffients are already included in the
00328            field strengths F(mu,nu)! */
00329         
00330         RComplex<REAL> E_minus;
00331         RComplex<REAL> B_minus;
00332         RComplex<REAL> ctmp_0;
00333         RComplex<REAL> ctmp_1;
00334         RScalar<REAL> rtmp_0;
00335         RScalar<REAL> rtmp_1;
00336         
00337         for(int i = 0; i < Nc; ++i) {
00338 
00339           /*# diag_L(i,0) = 1 - i*diag(E_z - B_z) */
00340           /*#             = 1 - i*diag(F(3,2) - F(1,0)) */
00341           ctmp_0 = f5.elem(site).elem().elem(i,i);
00342           ctmp_0 -= f0.elem(site).elem().elem(i,i);
00343           rtmp_0 = imag(ctmp_0);
00344           tri_diag[site][0][i] += rtmp_0;
00345           
00346           /*# diag_L(i+Nc,0) = 1 + i*diag(E_z - B_z) */
00347           /*#                = 1 + i*diag(F(3,2) - F(1,0)) */
00348           tri_diag[site][0][i+Nc] -= rtmp_0;
00349           
00350           /*# diag_L(i,1) = 1 + i*diag(E_z + B_z) */
00351           /*#             = 1 + i*diag(F(3,2) + F(1,0)) */
00352           ctmp_1 = f5.elem(site).elem().elem(i,i);
00353           ctmp_1 += f0.elem(site).elem().elem(i,i);
00354           rtmp_1 = imag(ctmp_1);
00355           tri_diag[site][1][i] -= rtmp_1;
00356           
00357           /*# diag_L(i+Nc,1) = 1 - i*diag(E_z + B_z) */
00358           /*#                = 1 - i*diag(F(3,2) + F(1,0)) */
00359           tri_diag[site][1][i+Nc] += rtmp_1;
00360         }
00361         
00362         /*# Construct lower triangular portion */
00363         /*# Block diagonal terms */
00364         for(int i = 1; i < Nc; ++i) {
00365 
00366           for(int j = 0; j < i; ++j) {
00367             
00368             int elem_ij  = i*(i-1)/2 + j;
00369             int elem_tmp = (i+Nc)*(i+Nc-1)/2 + j+Nc;
00370             
00371             /*# L(i,j,0) = -i*(E_z - B_z)[i,j] */
00372             /*#          = -i*(F(3,2) - F(1,0)) */
00373             ctmp_0 = f0.elem(site).elem().elem(i,j);
00374             ctmp_0 -= f5.elem(site).elem().elem(i,j);
00375             tri_off_diag[site][0][elem_ij] = timesI(ctmp_0);
00376             
00377             /*# L(i+Nc,j+Nc,0) = +i*(E_z - B_z)[i,j] */
00378             /*#                = +i*(F(3,2) - F(1,0)) */
00379             tri_off_diag[site][0][elem_tmp] = -tri_off_diag[site][0][elem_ij];
00380             
00381             /*# L(i,j,1) = i*(E_z + B_z)[i,j] */
00382             /*#          = i*(F(3,2) + F(1,0)) */
00383             ctmp_1 = f5.elem(site).elem().elem(i,j);
00384             ctmp_1 += f0.elem(site).elem().elem(i,j);
00385             tri_off_diag[site][1][elem_ij] = timesI(ctmp_1);
00386             
00387             /*# L(i+Nc,j+Nc,1) = -i*(E_z + B_z)[i,j] */
00388             /*#                = -i*(F(3,2) + F(1,0)) */
00389             tri_off_diag[site][1][elem_tmp] = -tri_off_diag[site][1][elem_ij];
00390           }
00391         }
00392       
00393         /*# Off-diagonal */
00394         for(int i = 0; i < Nc; ++i) {
00395 
00396           for(int j = 0; j < Nc; ++j) {
00397 
00398             // Flipped index
00399             // by swapping i <-> j. In the past i would run slow
00400             // and now j runs slow
00401             int elem_ij  = (i+Nc)*(i+Nc-1)/2 + j;
00402             
00403             /*# i*E_- = (i*E_x + E_y) */
00404             /*#       = (i*F(3,0) + F(3,1)) */
00405             E_minus = timesI(f2.elem(site).elem().elem(i,j));
00406             E_minus += f4.elem(site).elem().elem(i,j);
00407             
00408             /*# i*B_- = (i*B_x + B_y) */
00409             /*#       = (i*F(2,1) - F(2,0)) */
00410             B_minus = timesI(f3.elem(site).elem().elem(i,j));
00411             B_minus -= f1.elem(site).elem().elem(i,j);
00412             
00413             /*# L(i+Nc,j,0) = -i*(E_- - B_-)  */
00414             tri_off_diag[site][0][elem_ij] = B_minus - E_minus;
00415             
00416             /*# L(i+Nc,j,1) = +i*(E_- + B_-)  */
00417             tri_off_diag[site][1][elem_ij] = E_minus + B_minus;
00418           }
00419         }
00420       } // End Site Loop
00421     } // End Function
00422   }; // End Namespace
00423 
00424 
00425   void SSEDCloverTerm::makeClov(const multi1d<LatticeColorMatrix>& f, const Real& diag_mass)
00426   {
00427     START_CODE();
00428 
00429     LatticeColorMatrix f0;
00430     LatticeColorMatrix f1;
00431     LatticeColorMatrix f2;
00432     LatticeColorMatrix f3;
00433     LatticeColorMatrix f4;
00434     LatticeColorMatrix f5;
00435     
00436     const int nodeSites = QDP::Layout::sitesOnNode();
00437 
00438     START_CODE();
00439   
00440     if ( Nd != 4 )
00441       QDP_error_exit("expecting Nd == 4", Nd);
00442   
00443     if ( Ns != 4 )
00444       QDP_error_exit("expecting Ns == 4", Ns);
00445   
00446     f0 = f[0] * getCloverCoeff(0,1);
00447     f1 = f[1] * getCloverCoeff(0,2);
00448     f2 = f[2] * getCloverCoeff(0,3);
00449     f3 = f[3] * getCloverCoeff(1,2);
00450     f4 = f[4] * getCloverCoeff(1,3);
00451     f5 = f[5] * getCloverCoeff(2,3);    
00452 
00453     SSEDCloverEnv::MakeClovArgs a={f0,f1,f2,f3,f4,f5,tri_diag,tri_off_diag, diag_mass};
00454     dispatch_to_threads(nodeSites, a, SSEDCloverEnv::makeClovSiteLoop);
00455 
00456     END_CODE();
00457   }
00458   
00459 
00460   //! Invert
00461   /*!
00462    * Computes the inverse of the term on cb using Cholesky
00463    */
00464   void SSEDCloverTerm::choles(int cb)
00465   {
00466     START_CODE();
00467 
00468     // When you are doing the cholesky - also fill out the trace_log_diag piece)
00469     //chlclovms(tr_log_diag_, cb);
00470     ldagdlinv(tr_log_diag_,cb);
00471     
00472     END_CODE();
00473   }
00474 
00475 
00476   //! Invert
00477   /*!
00478    * Computes the inverse of the term on cb using Cholesky
00479    *
00480    * \return logarithm of the determinant  
00481    */
00482   Double SSEDCloverTerm::cholesDet(int cb) const
00483   {
00484     START_CODE();
00485 
00486     if( choles_done[cb] == false ) 
00487     {
00488       QDPIO::cout << "Error: YOu have not done the Cholesky.on this operator on this subset" << endl;
00489       QDPIO::cout << "You sure you shouldn't be asking invclov?" << endl;
00490       QDP_abort(1);
00491     }
00492 LatticeReal ff=tr_log_diag_;
00493 
00494     if( param.sub_zero_usedP ) { 
00495         QDPIO::cout << "Subtracting "<< param.sub_zero<<endl;
00496         LatticeReal tmp;
00497         tmp[rb[cb]] = param.sub_zero;
00498         ff[rb[cb]] -= tmp;
00499     }
00500     END_CODE();
00501 
00502     return sum(ff, rb[cb]);
00503   }    
00504 
00505 
00506   namespace SSEDCloverEnv { 
00507     struct LDagDLInvArgs { 
00508       LatticeReal& tr_log_diag;
00509       PrimitiveClovDiag* tri_diag;
00510       PrimitiveClovOffDiag* tri_off_diag;
00511       int cb;
00512     };
00513 
00514     inline 
00515     void lDagDLInvSiteLoop(int lo, int hi, int myId, LDagDLInvArgs *a)
00516     {
00517       LatticeReal& tr_log_diag = a->tr_log_diag;
00518       PrimitiveClovDiag* tri_diag = a->tri_diag;
00519       PrimitiveClovOffDiag* tri_off_diag = a->tri_off_diag;
00520       int cb = a->cb;
00521 
00522       RScalar<REAL> zip=0;
00523       
00524       int N = 2*Nc;
00525 
00526       // Loop through the sites.
00527       for(int ssite=lo; ssite < hi; ++ssite)  {
00528 
00529         int site = rb[cb].siteTable()[ssite];
00530         
00531         int site_neg_logdet=0;
00532         // Loop through the blocks on the site.
00533         for(int block=0; block < 2; block++) { 
00534           
00535           // Triangular storage 
00536           // Big arrays to get good alignment...
00537           
00538           RScalar<REAL> inv_d[8] QDP_ALIGN16;
00539           RComplex<REAL> inv_offd[16] QDP_ALIGN16;
00540           RComplex<REAL> v[8] QDP_ALIGN16;
00541           RScalar<REAL>  diag_g[8] QDP_ALIGN16;
00542           // Algorithm 4.1.2 LDL^\dagger Decomposition
00543           // From Golub, van Loan 3rd ed, page 139
00544           for(int i=0; i < N; i++) { 
00545             inv_d[i] = tri_diag[site][block][i];
00546           }
00547           for(int i=0; i < 15; i++) { 
00548             inv_offd[i]  =tri_off_diag[site][block][i];
00549           }
00550           
00551           for(int j=0; j < N; ++j) { 
00552             
00553             // Compute v(0:j-1)
00554             //
00555             // for i=0:j-2
00556             //   v(i) = A(j,i) A(i,i)
00557             // end
00558             
00559             
00560             for(int i=0; i < j; i++) { 
00561               int elem_ji = j*(j-1)/2 + i;
00562               
00563               RComplex<REAL> A_ii = cmplx( inv_d[i], zip );
00564               v[i] = A_ii*adj(inv_offd[elem_ji]);
00565             }
00566             
00567             // v(j) = A(j,j) - A(j, 0:j-2) v(0:j-2)
00568             //                 ^ This is done with a loop over k ie:
00569             //
00570             // v(j) = A(j,j) - sum_k A*(j,k) v(k)     k=0...j-2
00571             //
00572             //      = A(j,j) - sum_k A*(j,k) A(j,k) A(k,k)
00573             //      = A(j,j) - sum_k | A(j,k) |^2 A(k,k)
00574             
00575             v[j] = cmplx(inv_d[j],zip);
00576             
00577             for(int k=0; k < j; k++) { 
00578               int elem_jk = j*(j-1)/2 + k;
00579               v[j] -= inv_offd[elem_jk]*v[k];
00580             }
00581             
00582             
00583             // At this point in time v[j] has to be real, since
00584             // A(j,j) is from diag ie real and all | A(j,k) |^2 is real
00585             // as is A(k,k)
00586             
00587             // A(j,j) is the diagonal element - so store it.
00588             inv_d[j] = real( v[j] );
00589             
00590             // Last line of algorithm:
00591             // A( j+1 : n, j) = ( A(j+1:n, j) - A(j+1:n, 1:j-1)v(1:k-1) ) / v(j)
00592             //
00593             // use k as first colon notation and l as second so
00594             // 
00595             // for k=j+1 < n-1
00596             //      A(k,j) = A(k,j) ;
00597             //      for l=0 < j-1
00598             //         A(k,j) -= A(k, l) v(l)
00599             //      end
00600             //      A(k,j) /= v(j);
00601             //
00602             for(int k=j+1; k < N; k++) { 
00603               int elem_kj = k*(k-1)/2 + j;
00604               for(int l=0; l < j; l++) { 
00605                 int elem_kl = k*(k-1)/2 + l;
00606                 inv_offd[elem_kj] -= inv_offd[elem_kl] * v[l];
00607               }
00608               inv_offd[elem_kj] /= v[j];
00609             }
00610           }
00611           
00612           // Now fix up the inverse
00613           RScalar<REAL> one;
00614           one.elem() = (REAL)1;
00615           
00616           for(int i=0; i < N; i++) { 
00617             diag_g[i] = one/inv_d[i];
00618             
00619             // Compute the trace log
00620             // NB we are always doing trace log | A | 
00621             // (because we are always working with actually A^\dagger A
00622             //  even in one flavour case where we square root)
00623             tr_log_diag.elem(site).elem().elem().elem() += log(fabs(inv_d[i].elem()));
00624             // However, it is worth counting just the no of negative logdets
00625             // on site
00626             if( inv_d[i].elem() < 0 ) { 
00627               site_neg_logdet++;
00628             }
00629           }
00630           // Now we need to invert the L D L^\dagger 
00631           // We can do this by solving:
00632           //
00633           //  L D L^\dagger M^{-1} = 1   
00634           //
00635           // This can be done by solving L D X = 1  (X = L^\dagger M^{-1})
00636           //
00637           // Then solving L^\dagger M^{-1} = X
00638           //
00639           // LD is lower diagonal and so X will also be lower diagonal.
00640           // LD X = 1 can be solved by forward substitution.
00641           //
00642           // Likewise L^\dagger is strictly upper triagonal and so
00643           // L^\dagger M^{-1} = X can be solved by forward substitution.
00644           RComplex<REAL> sum;
00645           for(int k = 0; k < N; ++k) {
00646 
00647             for(int i = 0; i < k; ++i) {
00648               zero_rep(v[i]);
00649             }
00650             
00651             /*# Forward substitution */
00652             
00653             // The first element is the inverse of the diagonal
00654             v[k] = cmplx(diag_g[k],zip);
00655             
00656             for(int i = k+1; i < N; ++i) {
00657               zero_rep(v[i]);
00658               
00659               for(int j = k; j < i; ++j) {
00660                 int elem_ij = i*(i-1)/2+j;      
00661                 
00662                 // subtract l_ij*d_j*x_{kj}
00663                 v[i] -= inv_offd[elem_ij] *inv_d[j]*v[j];
00664                 
00665               }
00666         
00667               // scale out by 1/d_i
00668               v[i] *= diag_g[i];
00669             }
00670             
00671             /*# Backward substitution */
00672             // V[N-1] remains unchanged
00673             // Start from V[N-2]
00674             
00675             for(int i = N-2; (int)i >= (int)k; --i) {
00676               for(int j = i+1; j < N; ++j) {
00677                 int elem_ji = j*(j-1)/2 + i;
00678                 // Subtract terms of typ (l_ji)*x_kj
00679                 v[i] -= adj(inv_offd[elem_ji]) * v[j];
00680               }
00681             }
00682             
00683             /*# Overwrite column k of invcl.offd */
00684             inv_d[k] = real(v[k]);
00685             for(int i = k+1; i < N; ++i) {
00686               
00687               int elem_ik = i*(i-1)/2+k;
00688               inv_offd[elem_ik] = v[i];
00689             }
00690           }
00691           
00692           
00693           // Overwrite original data
00694           for(int i=0; i < N; i++) { 
00695             tri_diag[site][block][i] = inv_d[i];
00696           }
00697           for(int i=0; i < 15; i++) { 
00698             tri_off_diag[site][block][i] = inv_offd[i];
00699           }
00700         }
00701         
00702         if( site_neg_logdet != 0 ) { 
00703           // Report if site had odd number of negative terms. (-ve def)
00704           std::cout << "WARNING: Odd number of negative terms in Clover DET (" 
00705                     << site_neg_logdet<< ") at site: " << site << endl;
00706         }
00707       } // End Site Loop
00708     } // End Function
00709   } // End Namespace
00710 
00711    /*! An LDL^\dag decomposition and inversion? */
00712   void SSEDCloverTerm::ldagdlinv(LatticeReal& tr_log_diag, int cb)
00713   {
00714     START_CODE();
00715 
00716     if ( 2*Nc < 3 )
00717       QDP_error_exit("Matrix is too small", Nc, Ns);
00718 
00719     // Zero trace log
00720     tr_log_diag = zero;
00721 
00722     SSEDCloverEnv::LDagDLInvArgs a = { tr_log_diag, tri_diag, tri_off_diag, cb };
00723     dispatch_to_threads(rb[cb].numSiteTable(), a, SSEDCloverEnv::lDagDLInvSiteLoop);
00724 
00725     
00726     // This comes from the days when we used to do Cholesky
00727     choles_done[cb] = true;
00728     END_CODE();
00729   }
00730 
00731 
00732   /**
00733    * Apply a dslash
00734    *
00735    * Performs the operation
00736    *
00737    *  chi <-   (L + D + L^dag) . psi
00738    *
00739    * where
00740    *   L       is a lower triangular matrix
00741    *   D       is the real diagonal. (stored together in type TRIANG)
00742    *
00743    * Arguments:
00744    * \param chi     result                                      (Write)
00745    * \param psi     source                                      (Read)
00746    * \param isign   D'^dag or D'  ( MINUS | PLUS ) resp.        (Read)
00747    * \param cb      Checkerboard of OUTPUT vector               (Read) 
00748    */
00749   extern void ssed_clover_apply(REAL64* diag, REAL64* offd, REAL64* psiptr, REAL64* chiptr, int n_sites);
00750 
00751   namespace SSEDCloverEnv{
00752     struct ApplyArgs { 
00753       LatticeFermion& chi;
00754       const LatticeFermion& psi;
00755       PrimitiveClovOffDiag* tri_off;
00756       PrimitiveClovDiag* tri_diag;
00757       int cb;
00758     };
00759 
00760     // Dispatch function for threading
00761     inline
00762     void orderedApplySiteLoop(int lo, int hi, int myID, 
00763                               ApplyArgs* a)
00764     {
00765       int n_4vec=hi-lo;
00766       int start=rb[ a->cb ].start()+lo;
00767       
00768       REAL64* chiptr = (REAL64 *)&( a->chi.elem(start).elem(0).elem(0).real());
00769       REAL64* psiptr = (REAL64 *)&( a->psi.elem(start).elem(0).elem(0).real());
00770       REAL64* offd = (REAL64 *)&(a->tri_off[start][0][0].real());
00771       REAL64* diag = (REAL64 *)&(a->tri_diag[start][0][0].elem());
00772       ssed_clover_apply(diag, offd, psiptr, chiptr, n_4vec);
00773     }
00774 
00775     inline 
00776     void unorderedApplySiteLoop(int lo, int hi, int myId, ApplyArgs* a)
00777     {
00778       int cb = a->cb;
00779       const multi1d<int>& tab = rb[cb].siteTable();
00780 
00781 
00782       for(int ssite=lo; ssite < hi; ++ssite) {
00783         
00784         int site = tab[ssite];
00785         unsigned long n_sites=1;
00786                 
00787         REAL64* chiptr = (REAL64 *)&( a->chi.elem(site).elem(0).elem(0).real());
00788         REAL64* psiptr = (REAL64 *)&(a->psi.elem(site).elem(0).elem(0).real());
00789         REAL64* offd = (REAL64 *)&(a->tri_off[site][0][0].real());
00790         REAL64* diag = (REAL64 *)&(a->tri_diag[site][0][0].elem());
00791         ssed_clover_apply(diag, offd, psiptr, chiptr, n_sites);
00792         
00793       }
00794     }
00795   }
00796 
00797   void SSEDCloverTerm::apply(LatticeFermion& chi, const LatticeFermion& psi, 
00798                             enum PlusMinus isign, int cb) const
00799   {
00800     START_CODE();
00801 
00802     if ( Ns != 4 )
00803       QDP_error_exit("code requires Ns == 4", Ns);
00804 
00805     int n = 2*Nc;
00806 
00807     SSEDCloverEnv::ApplyArgs a = {chi, psi, tri_off_diag, tri_diag,cb};
00808     
00809     if( rb[cb].hasOrderedRep() ) {
00810 
00811       dispatch_to_threads(rb[cb].siteTable().size(), a, 
00812                           SSEDCloverEnv::orderedApplySiteLoop);
00813 
00814       
00815     }
00816     else {
00817       dispatch_to_threads(rb[cb].siteTable().size(), a, 
00818                           SSEDCloverEnv::unorderedApplySiteLoop);
00819     }
00820 
00821     getFermBC().modifyF(chi, QDP::rb[cb]);
00822 
00823     END_CODE();
00824   }
00825 
00826   /**
00827    * Apply a dslash
00828    *
00829    * Performs the operation
00830    *
00831    *  chi <-   (L + D + L^dag) . psi
00832    *
00833    * where
00834    *   L       is a lower triangular matrix
00835    *   D       is the real diagonal. (stored together in type TRIANG)
00836    *
00837    * Arguments:
00838    * \param chi     result                                      (Write)
00839    * \param psi     source                                      (Read)
00840    * \param isign   D'^dag or D'  ( MINUS | PLUS ) resp.        (Read)
00841    * \param cb      Checkerboard of OUTPUT vector               (Read) 
00842    */
00843   void SSEDCloverTerm::applySite(LatticeFermion& chi, const LatticeFermion& psi, 
00844                             enum PlusMinus isign, int site) const
00845   {
00846     START_CODE();
00847 
00848     if ( Ns != 4 )
00849       QDP_error_exit("code requires Ns == 4", Ns);
00850 
00851     int n = 2*Nc;
00852 
00853     RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
00854     const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
00855 
00856 
00857     cchi[ 0] = tri_diag[site][0][ 0]  * ppsi[ 0]
00858       +   conj(tri_off_diag[site][0][ 0]) * ppsi[ 1]
00859       +   conj(tri_off_diag[site][0][ 1]) * ppsi[ 2]
00860       +   conj(tri_off_diag[site][0][ 3]) * ppsi[ 3]
00861       +   conj(tri_off_diag[site][0][ 6]) * ppsi[ 4]
00862       +   conj(tri_off_diag[site][0][10]) * ppsi[ 5];
00863     
00864     cchi[ 1] = tri_diag[site][0][ 1]  * ppsi[ 1]
00865       +        tri_off_diag[site][0][ 0]  * ppsi[ 0]
00866       +   conj(tri_off_diag[site][0][ 2]) * ppsi[ 2]
00867       +   conj(tri_off_diag[site][0][ 4]) * ppsi[ 3]
00868       +   conj(tri_off_diag[site][0][ 7]) * ppsi[ 4]
00869       +   conj(tri_off_diag[site][0][11]) * ppsi[ 5];
00870     
00871     cchi[ 2] = tri_diag[site][0][ 2]  * ppsi[ 2]
00872       +        tri_off_diag[site][0][ 1]  * ppsi[ 0]
00873       +        tri_off_diag[site][0][ 2]  * ppsi[ 1]
00874       +   conj(tri_off_diag[site][0][ 5]) * ppsi[ 3]
00875       +   conj(tri_off_diag[site][0][ 8]) * ppsi[ 4]
00876       +   conj(tri_off_diag[site][0][12]) * ppsi[ 5];
00877     
00878     cchi[ 3] = tri_diag[site][0][ 3]  * ppsi[ 3]
00879       +        tri_off_diag[site][0][ 3]  * ppsi[ 0]
00880       +        tri_off_diag[site][0][ 4]  * ppsi[ 1]
00881       +        tri_off_diag[site][0][ 5]  * ppsi[ 2]
00882       +   conj(tri_off_diag[site][0][ 9]) * ppsi[ 4]
00883       +   conj(tri_off_diag[site][0][13]) * ppsi[ 5];
00884     
00885     cchi[ 4] = tri_diag[site][0][ 4]  * ppsi[ 4]
00886       +        tri_off_diag[site][0][ 6]  * ppsi[ 0]
00887       +        tri_off_diag[site][0][ 7]  * ppsi[ 1]
00888       +        tri_off_diag[site][0][ 8]  * ppsi[ 2]
00889       +        tri_off_diag[site][0][ 9]  * ppsi[ 3]
00890       +   conj(tri_off_diag[site][0][14]) * ppsi[ 5];
00891     
00892     cchi[ 5] = tri_diag[site][0][ 5]  * ppsi[ 5]
00893       +        tri_off_diag[site][0][10]  * ppsi[ 0]
00894       +        tri_off_diag[site][0][11]  * ppsi[ 1]
00895       +        tri_off_diag[site][0][12]  * ppsi[ 2]
00896       +        tri_off_diag[site][0][13]  * ppsi[ 3]
00897       +        tri_off_diag[site][0][14]  * ppsi[ 4];
00898     
00899     cchi[ 6] = tri_diag[site][1][ 0]  * ppsi[ 6]
00900       +   conj(tri_off_diag[site][1][ 0]) * ppsi[ 7]
00901       +   conj(tri_off_diag[site][1][ 1]) * ppsi[ 8]
00902       +   conj(tri_off_diag[site][1][ 3]) * ppsi[ 9]
00903       +   conj(tri_off_diag[site][1][ 6]) * ppsi[10]
00904       +   conj(tri_off_diag[site][1][10]) * ppsi[11];
00905     
00906     cchi[ 7] = tri_diag[site][1][ 1]  * ppsi[ 7]
00907       +        tri_off_diag[site][1][ 0]  * ppsi[ 6]
00908       +   conj(tri_off_diag[site][1][ 2]) * ppsi[ 8]
00909       +   conj(tri_off_diag[site][1][ 4]) * ppsi[ 9]
00910       +   conj(tri_off_diag[site][1][ 7]) * ppsi[10]
00911       +   conj(tri_off_diag[site][1][11]) * ppsi[11];
00912     
00913     cchi[ 8] = tri_diag[site][1][ 2]  * ppsi[ 8]
00914       +        tri_off_diag[site][1][ 1]  * ppsi[ 6]
00915       +        tri_off_diag[site][1][ 2]  * ppsi[ 7]
00916       +   conj(tri_off_diag[site][1][ 5]) * ppsi[ 9]
00917       +   conj(tri_off_diag[site][1][ 8]) * ppsi[10]
00918       +   conj(tri_off_diag[site][1][12]) * ppsi[11];
00919     
00920     cchi[ 9] = tri_diag[site][1][ 3]  * ppsi[ 9]
00921       +        tri_off_diag[site][1][ 3]  * ppsi[ 6]
00922       +        tri_off_diag[site][1][ 4]  * ppsi[ 7]
00923       +        tri_off_diag[site][1][ 5]  * ppsi[ 8]
00924       +   conj(tri_off_diag[site][1][ 9]) * ppsi[10]
00925       +   conj(tri_off_diag[site][1][13]) * ppsi[11];
00926     
00927     cchi[10] = tri_diag[site][1][ 4]  * ppsi[10]
00928       +        tri_off_diag[site][1][ 6]  * ppsi[ 6]
00929       +        tri_off_diag[site][1][ 7]  * ppsi[ 7]
00930       +        tri_off_diag[site][1][ 8]  * ppsi[ 8]
00931       +        tri_off_diag[site][1][ 9]  * ppsi[ 9]
00932       +   conj(tri_off_diag[site][1][14]) * ppsi[11];
00933     
00934     cchi[11] = tri_diag[site][1][ 5]  * ppsi[11]
00935       +        tri_off_diag[site][1][10]  * ppsi[ 6]
00936       +        tri_off_diag[site][1][11]  * ppsi[ 7]
00937       +        tri_off_diag[site][1][12]  * ppsi[ 8]
00938       +        tri_off_diag[site][1][13]  * ppsi[ 9]
00939       +        tri_off_diag[site][1][14]  * ppsi[10];
00940 
00941 
00942     END_CODE();
00943   }
00944 
00945 
00946 
00947 
00948   //! TRIACNTR 
00949   /*! 
00950    * \ingroup linop
00951    *
00952    *  Calculates
00953    *     Tr_D ( Gamma_mat L )
00954    *
00955    * This routine is specific to Wilson fermions!
00956    * 
00957    *  the trace over the Dirac indices for one of the 16 Gamma matrices
00958    *  and a hermitian color x spin matrix A, stored as a block diagonal
00959    *  complex lower triangular matrix L and a real diagonal diag_L.
00960 
00961    *  Here 0 <= mat <= 15 and
00962    *  if mat = mat_1 + mat_2 * 2 + mat_3 * 4 + mat_4 * 8
00963    *
00964    *  Gamma(mat) = gamma(1)^(mat_1) * gamma(2)^(mat_2) * gamma(3)^(mat_3)
00965    *             * gamma(4)^(mat_4)
00966    *
00967    *  Further, in basis for the Gamma matrices used, A is of the form
00968    *
00969    *      | A_0 |  0  |
00970    *  A = | --------- |
00971    *      |  0  | A_1 |
00972    *
00973    *
00974    * Arguments:
00975    *
00976    *  \param B         the resulting SU(N) color matrix   (Write) 
00977    *  \param clov      clover term                        (Read) 
00978    *  \param mat       label of the Gamma matrix          (Read)
00979    */
00980 
00981   namespace SSEDCloverEnv {
00982     struct TriaCntrArgs { 
00983       LatticeColorMatrix& B;
00984       PrimitiveClovDiag* tri_diag;
00985       PrimitiveClovOffDiag* tri_off;
00986       int mat;
00987       int cb;
00988     };
00989   
00990 
00991     inline 
00992     void triaCntrSiteLoop(int lo, int hi, int myId, TriaCntrArgs* a) 
00993     {
00994       PrimitiveClovDiag* tri_diag = a->tri_diag;
00995       PrimitiveClovOffDiag* tri_off_diag = a->tri_off;
00996       LatticeColorMatrix& B = a->B;
00997       int mat = a->mat;
00998       int cb = a->cb;
00999 
01000 
01001       for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) {
01002       
01003         int site = rb[cb].siteTable()[ssite];
01004         
01005         switch( mat ) {
01006 
01007         case 0:
01008           /*# gamma(   0)   1  0  0  0            # ( 0000 )  --> 0 */
01009           /*#               0  1  0  0 */
01010           /*#               0  0  1  0 */
01011           /*#               0  0  0  1 */
01012           /*# From diagonal part */
01013           {
01014             RComplex<REAL> lctmp0;
01015             RScalar<REAL> lr_zero0;
01016             RScalar<REAL> lrtmp0;
01017             
01018             lr_zero0 = 0;
01019             
01020             for(int i0 = 0; i0 < Nc; ++i0) {
01021               lrtmp0 = tri_diag[site][0][i0];
01022               lrtmp0 += tri_diag[site][0][i0+Nc];
01023               lrtmp0 += tri_diag[site][1][i0];
01024               lrtmp0 += tri_diag[site][1][i0+Nc];
01025               B.elem(site).elem().elem(i0,i0) = cmplx(lrtmp0,lr_zero0);
01026             }
01027             
01028             /*# From lower triangular portion */
01029             int elem_ij0 = 0;
01030             for(int i0 = 1; i0 < Nc; ++i0) {
01031               
01032               int elem_ijb0 = (i0+Nc)*(i0+Nc-1)/2 + Nc;
01033               
01034               for(int j0 = 0; j0 < i0; ++j0) {
01035                 
01036                 lctmp0 = tri_off_diag[site][0][elem_ij0];
01037                 lctmp0 += tri_off_diag[site][0][elem_ijb0];
01038                 lctmp0 += tri_off_diag[site][1][elem_ij0];
01039                 lctmp0 += tri_off_diag[site][1][elem_ijb0];
01040                 
01041                 B.elem(site).elem().elem(j0,i0) = lctmp0;
01042                 B.elem(site).elem().elem(i0,j0) = adj(lctmp0);
01043                 
01044                 
01045                 elem_ij0++;
01046                 elem_ijb0++;
01047               }
01048             }
01049           }
01050           break;
01051           
01052         case 3:
01053           /*# gamma(  12)  -i  0  0  0            # ( 0011 )  --> 3 */
01054           /*#               0  i  0  0 */
01055           /*#               0  0 -i  0 */
01056           /*#               0  0  0  i */
01057           /*# From diagonal part */
01058           {
01059             
01060             RComplex<REAL> lctmp3;
01061             RScalar<REAL> lr_zero3;
01062             RScalar<REAL> lrtmp3;
01063             
01064             lr_zero3 = 0;
01065             
01066             for(int i3 = 0; i3 < Nc; ++i3) {
01067               
01068               lrtmp3 = tri_diag[site][0][i3+Nc];
01069               lrtmp3 -= tri_diag[site][0][i3];
01070               lrtmp3 -= tri_diag[site][1][i3];
01071               lrtmp3 += tri_diag[site][1][i3+Nc];
01072               B.elem(site).elem().elem(i3,i3) = cmplx(lr_zero3,lrtmp3);
01073             }
01074             
01075             /*# From lower triangular portion */
01076             int elem_ij3 = 0;
01077             for(int i3 = 1; i3 < Nc; ++i3) {
01078               
01079               int elem_ijb3 = (i3+Nc)*(i3+Nc-1)/2 + Nc;
01080               
01081               for(int j3 = 0; j3 < i3; ++j3) {
01082                 
01083                 lctmp3 = tri_off_diag[site][0][elem_ijb3];
01084                 lctmp3 -= tri_off_diag[site][0][elem_ij3];
01085                 lctmp3 -= tri_off_diag[site][1][elem_ij3];
01086                 lctmp3 += tri_off_diag[site][1][elem_ijb3];
01087                 
01088                 B.elem(site).elem().elem(j3,i3) = timesI(adj(lctmp3));
01089                 B.elem(site).elem().elem(i3,j3) = timesI(lctmp3);
01090                 
01091                 elem_ij3++;
01092                 elem_ijb3++;
01093               }
01094             }
01095           }
01096           break;
01097           
01098         case 5:
01099           /*# gamma(  13)   0 -1  0  0            # ( 0101 )  --> 5 */
01100           /*#               1  0  0  0 */
01101           /*#               0  0  0 -1 */
01102           /*#               0  0  1  0 */
01103           {
01104             
01105             
01106             RComplex<REAL> lctmp5;
01107             RScalar<REAL> lrtmp5;
01108             
01109             for(int i5 = 0; i5 < Nc; ++i5) {
01110               
01111               int elem_ij5 = (i5+Nc)*(i5+Nc-1)/2;
01112               
01113               for(int j5 = 0; j5 < Nc; ++j5) {
01114                 
01115                 int elem_ji5 = (j5+Nc)*(j5+Nc-1)/2 + i5;
01116                 
01117                 
01118                 lctmp5 = adj(tri_off_diag[site][0][elem_ji5]);
01119                 lctmp5 -= tri_off_diag[site][0][elem_ij5];
01120                 lctmp5 += adj(tri_off_diag[site][1][elem_ji5]);
01121                 lctmp5 -= tri_off_diag[site][1][elem_ij5];
01122                 
01123                 
01124                 B.elem(site).elem().elem(i5,j5) = lctmp5;
01125                 
01126                 elem_ij5++;
01127               }
01128             }
01129           }
01130           break;
01131           
01132         case 6:
01133           /*# gamma(  23)   0 -i  0  0            # ( 0110 )  --> 6 */
01134           /*#              -i  0  0  0 */
01135           /*#               0  0  0 -i */
01136           /*#               0  0 -i  0 */
01137           {
01138             
01139             RComplex<REAL> lctmp6;
01140             RScalar<REAL> lrtmp6;
01141             
01142             for(int i6 = 0; i6 < Nc; ++i6) {
01143               
01144               int elem_ij6 = (i6+Nc)*(i6+Nc-1)/2;
01145               
01146               for(int j6 = 0; j6 < Nc; ++j6) {
01147                 
01148                 int elem_ji6 = (j6+Nc)*(j6+Nc-1)/2 + i6;
01149                 
01150                 lctmp6 = adj(tri_off_diag[site][0][elem_ji6]);
01151                 lctmp6 += tri_off_diag[site][0][elem_ij6];
01152                 lctmp6 += adj(tri_off_diag[site][1][elem_ji6]);
01153                 lctmp6 += tri_off_diag[site][1][elem_ij6];
01154                 
01155                 B.elem(site).elem().elem(i6,j6) = timesMinusI(lctmp6);
01156                 
01157                 elem_ij6++;
01158               }
01159             }
01160           }
01161           break;
01162           
01163         case 9:
01164           /*# gamma(  14)   0  i  0  0            # ( 1001 )  --> 9 */
01165           /*#               i  0  0  0 */
01166           /*#               0  0  0 -i */
01167           /*#               0  0 -i  0 */
01168           {
01169             RComplex<REAL> lctmp9;
01170             RScalar<REAL> lrtmp9;
01171             
01172             for(int i9 = 0; i9 < Nc; ++i9) {
01173               
01174               int elem_ij9 = (i9+Nc)*(i9+Nc-1)/2;
01175               
01176               for(int j9 = 0; j9 < Nc; ++j9) {
01177                 
01178                 int elem_ji9 = (j9+Nc)*(j9+Nc-1)/2 + i9;
01179                 
01180                 lctmp9 = adj(tri_off_diag[site][0][elem_ji9]);
01181                 lctmp9 += tri_off_diag[site][0][elem_ij9];
01182                 lctmp9 -= adj(tri_off_diag[site][1][elem_ji9]);
01183                 lctmp9 -= tri_off_diag[site][1][elem_ij9];
01184                 
01185                 B.elem(site).elem().elem(i9,j9) = timesI(lctmp9);
01186                 
01187                 elem_ij9++;
01188               }
01189             }
01190           }
01191           break;
01192           
01193         case 10:
01194         /*# gamma(  24)   0 -1  0  0            # ( 1010 )  --> 10 */
01195         /*#               1  0  0  0 */
01196         /*#               0  0  0  1 */
01197         /*#               0  0 -1  0 */
01198           {
01199             
01200             RComplex<REAL> lctmp10;
01201             RScalar<REAL> lrtmp10;
01202             
01203             for(int i10 = 0; i10 < Nc; ++i10) {
01204               
01205               int elem_ij10 = (i10+Nc)*(i10+Nc-1)/2; 
01206                 
01207               for(int j10 = 0; j10 < Nc; ++j10) {
01208                 
01209                 int elem_ji10 = (j10+Nc)*(j10+Nc-1)/2 + i10;
01210                 
01211                 lctmp10 = adj(tri_off_diag[site][0][elem_ji10]);
01212                 lctmp10 -= tri_off_diag[site][0][elem_ij10];
01213                 lctmp10 -= adj(tri_off_diag[site][1][elem_ji10]);
01214                 lctmp10 += tri_off_diag[site][1][elem_ij10];
01215                 
01216                 B.elem(site).elem().elem(i10,j10) = lctmp10;
01217                 
01218                 elem_ij10++;
01219               }
01220             }
01221           }
01222           
01223           break;
01224           
01225         case 12:
01226           /*# gamma(  34)   i  0  0  0            # ( 1100 )  --> 12 */
01227           /*#               0 -i  0  0 */
01228           /*#               0  0 -i  0 */
01229           /*#               0  0  0  i */
01230           /*# From diagonal part */
01231           {
01232           
01233           
01234             RComplex<REAL> lctmp12;
01235             RScalar<REAL> lr_zero12;
01236             RScalar<REAL> lrtmp12;
01237             
01238             lr_zero12 = 0;
01239             
01240             for(int i12 = 0; i12 < Nc; ++i12) {
01241               
01242               lrtmp12 = tri_diag[site][0][i12];
01243               lrtmp12 -= tri_diag[site][0][i12+Nc];
01244               lrtmp12 -= tri_diag[site][1][i12];
01245               lrtmp12 += tri_diag[site][1][i12+Nc];
01246               B.elem(site).elem().elem(i12,i12) = cmplx(lr_zero12,lrtmp12);
01247             }
01248             
01249             /*# From lower triangular portion */
01250             int elem_ij12 = 0;
01251             for(int i12 = 1; i12 < Nc; ++i12) {
01252               
01253               int elem_ijb12 = (i12+Nc)*(i12+Nc-1)/2 + Nc;
01254               
01255               for(int j12 = 0; j12 < i12; ++j12) {
01256                 
01257                 lctmp12 = tri_off_diag[site][0][elem_ij12];
01258                 lctmp12 -= tri_off_diag[site][0][elem_ijb12];
01259                 lctmp12 -= tri_off_diag[site][1][elem_ij12];
01260                 lctmp12 += tri_off_diag[site][1][elem_ijb12];
01261                 
01262                 B.elem(site).elem().elem(i12,j12) = timesI(lctmp12);
01263                 B.elem(site).elem().elem(j12,i12) = timesI(adj(lctmp12));
01264                 
01265                 elem_ij12++;
01266                 elem_ijb12++;
01267               }
01268             }
01269           }
01270           break;
01271         
01272         default:
01273           {
01274             B = zero;
01275             QDPIO::cout << "BAD DEFAULT CASE HIT" << endl;
01276           }
01277         } // End Switch
01278         
01279       } // End Site Loop.
01280     } // End Function
01281   } // End Namespace 
01282 
01283   void SSEDCloverTerm::triacntr(LatticeColorMatrix& B, int mat, int cb) const
01284   {
01285     START_CODE();
01286 
01287     B = zero;
01288 
01289     if ( mat < 0  ||  mat > 15 )
01290     {
01291       QDPIO::cerr << __func__ << ": Gamma out of range: mat = " << mat << endl;
01292       QDP_abort(1);
01293     }
01294     
01295     SSEDCloverEnv::TriaCntrArgs a = { B, tri_diag, tri_off_diag, mat, cb};
01296     dispatch_to_threads(rb[cb].numSiteTable(), a, 
01297                         SSEDCloverEnv::triaCntrSiteLoop);
01298   
01299 
01300     END_CODE();
01301   }
01302 
01303   //! Returns the appropriate clover coefficient for indices mu and nu
01304   Real SSEDCloverTerm::getCloverCoeff(int mu, int nu) const 
01305   { 
01306     START_CODE();
01307 
01308     if( param.anisoParam.anisoP ) 
01309     { 
01310       if (mu==param.anisoParam.t_dir || nu == param.anisoParam.t_dir) { 
01311         return param.clovCoeffT;
01312       }
01313       else { 
01314         // Otherwise return the spatial coeff
01315         return param.clovCoeffR;
01316       }
01317     }
01318     else { 
01319       // If there is no anisotropy just return the spatial one, it will
01320       // be the same as the temporal one
01321       return param.clovCoeffR; 
01322     } 
01323     
01324     END_CODE();
01325   }
01326 }
01327 
01328 
01329  
01330 

Generated on Sun Nov 22 04:28:58 2009 for CHROMA by  doxygen 1.4.7