00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "chromabase.h"
00010 #include "actions/ferm/linop/clover_term_ssed.h"
00011 #include "meas/glue/mesfield.h"
00012
00013
00014 namespace Chroma
00015 {
00016
00017
00018
00019 SSEDCloverTerm::SSEDCloverTerm() {
00020 bool retry = false;
00021
00022
00023
00024 try {
00025 tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::FAST );
00026 }
00027 catch( std::bad_alloc ) {
00028 retry = true;
00029 }
00030
00031 if( retry ) {
00032 try {
00033 tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::DEFAULT );
00034 }
00035 catch( std::bad_alloc ) {
00036 QDPIO::cerr << "Failed to allocate the tri_diag" << endl << flush ;
00037 QDP_abort(1);
00038 }
00039 }
00040
00041
00042 retry = false;
00043 try {
00044 tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::FAST );
00045 }
00046 catch( std::bad_alloc ) {
00047 retry = true;
00048 }
00049
00050 if( retry ) {
00051 try {
00052 tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::DEFAULT );
00053 }
00054 catch( std::bad_alloc ) {
00055 QDPIO::cerr << "Failed to allocate the tri_off_diag" << endl << flush ;
00056 QDP_abort(1);
00057 }
00058 }
00059
00060
00061 }
00062
00063
00064 SSEDCloverTerm::~SSEDCloverTerm() {
00065 if ( tri_diag != 0x0 ) {
00066 QDP::Allocator::theQDPAllocator::Instance().free(tri_diag);
00067 }
00068
00069 if ( tri_off_diag != 0x0 ) {
00070 QDP::Allocator::theQDPAllocator::Instance().free(tri_off_diag);
00071 }
00072 }
00073
00074
00075 void SSEDCloverTerm::create(Handle< FermState<T,P,Q> > fs,
00076 const CloverFermActParams& param_)
00077 {
00078 START_CODE();
00079 u = fs->getLinks();
00080 fbc = fs->getFermBC();
00081 param = param_;
00082
00083
00084 if (fbc.operator->() == 0)
00085 {
00086 QDPIO::cerr << "SSEDCloverTerm: error: fbc is null" << endl;
00087 QDP_abort(1);
00088 }
00089
00090 {
00091 Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
00092 param.clovCoeffR *= Real(0.5) * ff;
00093 param.clovCoeffT *= Real(0.5);
00094 }
00095
00096
00097
00098
00099
00100
00101 Real diag_mass;
00102 {
00103 Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
00104 diag_mass = 1 + (Nd-1)*ff + param.Mass;
00105 }
00106
00107
00108 multi1d<LatticeColorMatrix> f;
00109 mesField(f, u);
00110 makeClov(f, diag_mass);
00111
00112 choles_done.resize(rb.numSubsets());
00113 for(int i=0; i < rb.numSubsets(); i++) {
00114 choles_done[i] = false;
00115 }
00116
00117 #if 0
00118
00119 LatticeFermion foo;
00120 LatticeFermion res1=zero;
00121 LatticeFermion res2=zero;
00122 gaussian(foo);
00123 apply(res1,foo,PLUS, 0);
00124 applySite(res2,foo,PLUS, 0);
00125
00126 LatticeFermion diff=res1-res2;
00127 XMLFileWriter fred("fred");
00128 push(fred, "stuff");
00129 write(fred, "diff", diff);
00130 pop(fred);
00131
00132 QDPIO::cout << "sqrt( norm2( diff))= " << sqrt(norm2(diff)) << endl << flush;
00133 QDP_abort(1);
00134 #endif
00135
00136 END_CODE();
00137 }
00138
00139
00140 void SSEDCloverTerm::create(Handle< FermState<T,P,Q> > fs,
00141 const CloverFermActParams& param_,
00142 const SSEDCloverTerm& from_)
00143 {
00144 START_CODE();
00145
00146 const SSEDCloverTerm& from = dynamic_cast<const SSEDCloverTerm&>(from_);
00147 u = fs->getLinks();
00148 fbc = fs->getFermBC();
00149 param = param_;
00150
00151
00152 if (fbc.operator->() == 0) {
00153 QDPIO::cerr << "SSEDCloverTerm: error: fbc is null" << endl;
00154 QDP_abort(1);
00155 }
00156
00157 {
00158 Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
00159 param.clovCoeffR *= Real(0.5) * ff;
00160 param.clovCoeffT *= Real(0.5);
00161 }
00162
00163
00164
00165
00166
00167
00168 Real diag_mass;
00169 {
00170 Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
00171 diag_mass = 1 + (Nd-1)*ff + param.Mass;
00172 }
00173
00174
00175
00176
00177
00178
00179
00180 choles_done.resize(rb.numSubsets());
00181 for(int i=0; i < rb.numSubsets(); i++) {
00182 choles_done[i] = from.choles_done[i];
00183 }
00184
00185 tr_log_diag_ = from.tr_log_diag_;
00186
00187
00188
00189
00190
00191
00192
00193 for(int site=0; site < Layout::sitesOnNode(); site++) {
00194 for(int block = 0; block < 2; block++) {
00195 for(int comp=0; comp< 6; comp++) {
00196 tri_diag[site][block][comp] = from.tri_diag[site][block][comp];
00197 }
00198 for(int comp=0; comp < 15; comp++) {
00199 tri_off_diag[site][block][comp] = from.tri_off_diag[site][block][comp];
00200 }
00201 }
00202 }
00203
00204 END_CODE();
00205 }
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290 namespace SSEDCloverEnv {
00291 struct MakeClovArgs {
00292 LatticeColorMatrix& f0;
00293 LatticeColorMatrix& f1;
00294 LatticeColorMatrix& f2;
00295 LatticeColorMatrix& f3;
00296 LatticeColorMatrix& f4;
00297 LatticeColorMatrix& f5;
00298 PrimitiveClovDiag* tri_diag;
00299 PrimitiveClovOffDiag* tri_off_diag;
00300 const Real& diag_mass;
00301 };
00302
00303 inline
00304 void makeClovSiteLoop(int lo, int hi, int myId, MakeClovArgs* a)
00305 {
00306 LatticeColorMatrix& f0=a->f0;
00307 LatticeColorMatrix& f1=a->f1;
00308 LatticeColorMatrix& f2=a->f2;
00309 LatticeColorMatrix& f3=a->f3;
00310 LatticeColorMatrix& f4=a->f4;
00311 LatticeColorMatrix& f5=a->f5;
00312 PrimitiveClovDiag* tri_diag=a->tri_diag;
00313 PrimitiveClovOffDiag* tri_off_diag=a->tri_off_diag;
00314 const Real& diag_mass = a->diag_mass;
00315
00316 for(int site = lo; site < hi; ++site) {
00317
00318 for(int jj = 0; jj < 2; jj++) {
00319
00320 for(int ii = 0; ii < 2*Nc; ii++) {
00321
00322 tri_diag[site][jj][ii] = diag_mass.elem().elem().elem();
00323 }
00324 }
00325
00326
00327
00328
00329
00330 RComplex<REAL> E_minus;
00331 RComplex<REAL> B_minus;
00332 RComplex<REAL> ctmp_0;
00333 RComplex<REAL> ctmp_1;
00334 RScalar<REAL> rtmp_0;
00335 RScalar<REAL> rtmp_1;
00336
00337 for(int i = 0; i < Nc; ++i) {
00338
00339
00340
00341 ctmp_0 = f5.elem(site).elem().elem(i,i);
00342 ctmp_0 -= f0.elem(site).elem().elem(i,i);
00343 rtmp_0 = imag(ctmp_0);
00344 tri_diag[site][0][i] += rtmp_0;
00345
00346
00347
00348 tri_diag[site][0][i+Nc] -= rtmp_0;
00349
00350
00351
00352 ctmp_1 = f5.elem(site).elem().elem(i,i);
00353 ctmp_1 += f0.elem(site).elem().elem(i,i);
00354 rtmp_1 = imag(ctmp_1);
00355 tri_diag[site][1][i] -= rtmp_1;
00356
00357
00358
00359 tri_diag[site][1][i+Nc] += rtmp_1;
00360 }
00361
00362
00363
00364 for(int i = 1; i < Nc; ++i) {
00365
00366 for(int j = 0; j < i; ++j) {
00367
00368 int elem_ij = i*(i-1)/2 + j;
00369 int elem_tmp = (i+Nc)*(i+Nc-1)/2 + j+Nc;
00370
00371
00372
00373 ctmp_0 = f0.elem(site).elem().elem(i,j);
00374 ctmp_0 -= f5.elem(site).elem().elem(i,j);
00375 tri_off_diag[site][0][elem_ij] = timesI(ctmp_0);
00376
00377
00378
00379 tri_off_diag[site][0][elem_tmp] = -tri_off_diag[site][0][elem_ij];
00380
00381
00382
00383 ctmp_1 = f5.elem(site).elem().elem(i,j);
00384 ctmp_1 += f0.elem(site).elem().elem(i,j);
00385 tri_off_diag[site][1][elem_ij] = timesI(ctmp_1);
00386
00387
00388
00389 tri_off_diag[site][1][elem_tmp] = -tri_off_diag[site][1][elem_ij];
00390 }
00391 }
00392
00393
00394 for(int i = 0; i < Nc; ++i) {
00395
00396 for(int j = 0; j < Nc; ++j) {
00397
00398
00399
00400
00401 int elem_ij = (i+Nc)*(i+Nc-1)/2 + j;
00402
00403
00404
00405 E_minus = timesI(f2.elem(site).elem().elem(i,j));
00406 E_minus += f4.elem(site).elem().elem(i,j);
00407
00408
00409
00410 B_minus = timesI(f3.elem(site).elem().elem(i,j));
00411 B_minus -= f1.elem(site).elem().elem(i,j);
00412
00413
00414 tri_off_diag[site][0][elem_ij] = B_minus - E_minus;
00415
00416
00417 tri_off_diag[site][1][elem_ij] = E_minus + B_minus;
00418 }
00419 }
00420 }
00421 }
00422 };
00423
00424
00425 void SSEDCloverTerm::makeClov(const multi1d<LatticeColorMatrix>& f, const Real& diag_mass)
00426 {
00427 START_CODE();
00428
00429 LatticeColorMatrix f0;
00430 LatticeColorMatrix f1;
00431 LatticeColorMatrix f2;
00432 LatticeColorMatrix f3;
00433 LatticeColorMatrix f4;
00434 LatticeColorMatrix f5;
00435
00436 const int nodeSites = QDP::Layout::sitesOnNode();
00437
00438 START_CODE();
00439
00440 if ( Nd != 4 )
00441 QDP_error_exit("expecting Nd == 4", Nd);
00442
00443 if ( Ns != 4 )
00444 QDP_error_exit("expecting Ns == 4", Ns);
00445
00446 f0 = f[0] * getCloverCoeff(0,1);
00447 f1 = f[1] * getCloverCoeff(0,2);
00448 f2 = f[2] * getCloverCoeff(0,3);
00449 f3 = f[3] * getCloverCoeff(1,2);
00450 f4 = f[4] * getCloverCoeff(1,3);
00451 f5 = f[5] * getCloverCoeff(2,3);
00452
00453 SSEDCloverEnv::MakeClovArgs a={f0,f1,f2,f3,f4,f5,tri_diag,tri_off_diag, diag_mass};
00454 dispatch_to_threads(nodeSites, a, SSEDCloverEnv::makeClovSiteLoop);
00455
00456 END_CODE();
00457 }
00458
00459
00460
00461
00462
00463
00464 void SSEDCloverTerm::choles(int cb)
00465 {
00466 START_CODE();
00467
00468
00469
00470 ldagdlinv(tr_log_diag_,cb);
00471
00472 END_CODE();
00473 }
00474
00475
00476
00477
00478
00479
00480
00481
00482 Double SSEDCloverTerm::cholesDet(int cb) const
00483 {
00484 START_CODE();
00485
00486 if( choles_done[cb] == false )
00487 {
00488 QDPIO::cout << "Error: YOu have not done the Cholesky.on this operator on this subset" << endl;
00489 QDPIO::cout << "You sure you shouldn't be asking invclov?" << endl;
00490 QDP_abort(1);
00491 }
00492 LatticeReal ff=tr_log_diag_;
00493
00494 if( param.sub_zero_usedP ) {
00495 QDPIO::cout << "Subtracting "<< param.sub_zero<<endl;
00496 LatticeReal tmp;
00497 tmp[rb[cb]] = param.sub_zero;
00498 ff[rb[cb]] -= tmp;
00499 }
00500 END_CODE();
00501
00502 return sum(ff, rb[cb]);
00503 }
00504
00505
00506 namespace SSEDCloverEnv {
00507 struct LDagDLInvArgs {
00508 LatticeReal& tr_log_diag;
00509 PrimitiveClovDiag* tri_diag;
00510 PrimitiveClovOffDiag* tri_off_diag;
00511 int cb;
00512 };
00513
00514 inline
00515 void lDagDLInvSiteLoop(int lo, int hi, int myId, LDagDLInvArgs *a)
00516 {
00517 LatticeReal& tr_log_diag = a->tr_log_diag;
00518 PrimitiveClovDiag* tri_diag = a->tri_diag;
00519 PrimitiveClovOffDiag* tri_off_diag = a->tri_off_diag;
00520 int cb = a->cb;
00521
00522 RScalar<REAL> zip=0;
00523
00524 int N = 2*Nc;
00525
00526
00527 for(int ssite=lo; ssite < hi; ++ssite) {
00528
00529 int site = rb[cb].siteTable()[ssite];
00530
00531 int site_neg_logdet=0;
00532
00533 for(int block=0; block < 2; block++) {
00534
00535
00536
00537
00538 RScalar<REAL> inv_d[8] QDP_ALIGN16;
00539 RComplex<REAL> inv_offd[16] QDP_ALIGN16;
00540 RComplex<REAL> v[8] QDP_ALIGN16;
00541 RScalar<REAL> diag_g[8] QDP_ALIGN16;
00542
00543
00544 for(int i=0; i < N; i++) {
00545 inv_d[i] = tri_diag[site][block][i];
00546 }
00547 for(int i=0; i < 15; i++) {
00548 inv_offd[i] =tri_off_diag[site][block][i];
00549 }
00550
00551 for(int j=0; j < N; ++j) {
00552
00553
00554
00555
00556
00557
00558
00559
00560 for(int i=0; i < j; i++) {
00561 int elem_ji = j*(j-1)/2 + i;
00562
00563 RComplex<REAL> A_ii = cmplx( inv_d[i], zip );
00564 v[i] = A_ii*adj(inv_offd[elem_ji]);
00565 }
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575 v[j] = cmplx(inv_d[j],zip);
00576
00577 for(int k=0; k < j; k++) {
00578 int elem_jk = j*(j-1)/2 + k;
00579 v[j] -= inv_offd[elem_jk]*v[k];
00580 }
00581
00582
00583
00584
00585
00586
00587
00588 inv_d[j] = real( v[j] );
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602 for(int k=j+1; k < N; k++) {
00603 int elem_kj = k*(k-1)/2 + j;
00604 for(int l=0; l < j; l++) {
00605 int elem_kl = k*(k-1)/2 + l;
00606 inv_offd[elem_kj] -= inv_offd[elem_kl] * v[l];
00607 }
00608 inv_offd[elem_kj] /= v[j];
00609 }
00610 }
00611
00612
00613 RScalar<REAL> one;
00614 one.elem() = (REAL)1;
00615
00616 for(int i=0; i < N; i++) {
00617 diag_g[i] = one/inv_d[i];
00618
00619
00620
00621
00622
00623 tr_log_diag.elem(site).elem().elem().elem() += log(fabs(inv_d[i].elem()));
00624
00625
00626 if( inv_d[i].elem() < 0 ) {
00627 site_neg_logdet++;
00628 }
00629 }
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644 RComplex<REAL> sum;
00645 for(int k = 0; k < N; ++k) {
00646
00647 for(int i = 0; i < k; ++i) {
00648 zero_rep(v[i]);
00649 }
00650
00651
00652
00653
00654 v[k] = cmplx(diag_g[k],zip);
00655
00656 for(int i = k+1; i < N; ++i) {
00657 zero_rep(v[i]);
00658
00659 for(int j = k; j < i; ++j) {
00660 int elem_ij = i*(i-1)/2+j;
00661
00662
00663 v[i] -= inv_offd[elem_ij] *inv_d[j]*v[j];
00664
00665 }
00666
00667
00668 v[i] *= diag_g[i];
00669 }
00670
00671
00672
00673
00674
00675 for(int i = N-2; (int)i >= (int)k; --i) {
00676 for(int j = i+1; j < N; ++j) {
00677 int elem_ji = j*(j-1)/2 + i;
00678
00679 v[i] -= adj(inv_offd[elem_ji]) * v[j];
00680 }
00681 }
00682
00683
00684 inv_d[k] = real(v[k]);
00685 for(int i = k+1; i < N; ++i) {
00686
00687 int elem_ik = i*(i-1)/2+k;
00688 inv_offd[elem_ik] = v[i];
00689 }
00690 }
00691
00692
00693
00694 for(int i=0; i < N; i++) {
00695 tri_diag[site][block][i] = inv_d[i];
00696 }
00697 for(int i=0; i < 15; i++) {
00698 tri_off_diag[site][block][i] = inv_offd[i];
00699 }
00700 }
00701
00702 if( site_neg_logdet != 0 ) {
00703
00704 std::cout << "WARNING: Odd number of negative terms in Clover DET ("
00705 << site_neg_logdet<< ") at site: " << site << endl;
00706 }
00707 }
00708 }
00709 }
00710
00711
00712 void SSEDCloverTerm::ldagdlinv(LatticeReal& tr_log_diag, int cb)
00713 {
00714 START_CODE();
00715
00716 if ( 2*Nc < 3 )
00717 QDP_error_exit("Matrix is too small", Nc, Ns);
00718
00719
00720 tr_log_diag = zero;
00721
00722 SSEDCloverEnv::LDagDLInvArgs a = { tr_log_diag, tri_diag, tri_off_diag, cb };
00723 dispatch_to_threads(rb[cb].numSiteTable(), a, SSEDCloverEnv::lDagDLInvSiteLoop);
00724
00725
00726
00727 choles_done[cb] = true;
00728 END_CODE();
00729 }
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749 extern void ssed_clover_apply(REAL64* diag, REAL64* offd, REAL64* psiptr, REAL64* chiptr, int n_sites);
00750
00751 namespace SSEDCloverEnv{
00752 struct ApplyArgs {
00753 LatticeFermion& chi;
00754 const LatticeFermion& psi;
00755 PrimitiveClovOffDiag* tri_off;
00756 PrimitiveClovDiag* tri_diag;
00757 int cb;
00758 };
00759
00760
00761 inline
00762 void orderedApplySiteLoop(int lo, int hi, int myID,
00763 ApplyArgs* a)
00764 {
00765 int n_4vec=hi-lo;
00766 int start=rb[ a->cb ].start()+lo;
00767
00768 REAL64* chiptr = (REAL64 *)&( a->chi.elem(start).elem(0).elem(0).real());
00769 REAL64* psiptr = (REAL64 *)&( a->psi.elem(start).elem(0).elem(0).real());
00770 REAL64* offd = (REAL64 *)&(a->tri_off[start][0][0].real());
00771 REAL64* diag = (REAL64 *)&(a->tri_diag[start][0][0].elem());
00772 ssed_clover_apply(diag, offd, psiptr, chiptr, n_4vec);
00773 }
00774
00775 inline
00776 void unorderedApplySiteLoop(int lo, int hi, int myId, ApplyArgs* a)
00777 {
00778 int cb = a->cb;
00779 const multi1d<int>& tab = rb[cb].siteTable();
00780
00781
00782 for(int ssite=lo; ssite < hi; ++ssite) {
00783
00784 int site = tab[ssite];
00785 unsigned long n_sites=1;
00786
00787 REAL64* chiptr = (REAL64 *)&( a->chi.elem(site).elem(0).elem(0).real());
00788 REAL64* psiptr = (REAL64 *)&(a->psi.elem(site).elem(0).elem(0).real());
00789 REAL64* offd = (REAL64 *)&(a->tri_off[site][0][0].real());
00790 REAL64* diag = (REAL64 *)&(a->tri_diag[site][0][0].elem());
00791 ssed_clover_apply(diag, offd, psiptr, chiptr, n_sites);
00792
00793 }
00794 }
00795 }
00796
00797 void SSEDCloverTerm::apply(LatticeFermion& chi, const LatticeFermion& psi,
00798 enum PlusMinus isign, int cb) const
00799 {
00800 START_CODE();
00801
00802 if ( Ns != 4 )
00803 QDP_error_exit("code requires Ns == 4", Ns);
00804
00805 int n = 2*Nc;
00806
00807 SSEDCloverEnv::ApplyArgs a = {chi, psi, tri_off_diag, tri_diag,cb};
00808
00809 if( rb[cb].hasOrderedRep() ) {
00810
00811 dispatch_to_threads(rb[cb].siteTable().size(), a,
00812 SSEDCloverEnv::orderedApplySiteLoop);
00813
00814
00815 }
00816 else {
00817 dispatch_to_threads(rb[cb].siteTable().size(), a,
00818 SSEDCloverEnv::unorderedApplySiteLoop);
00819 }
00820
00821 getFermBC().modifyF(chi, QDP::rb[cb]);
00822
00823 END_CODE();
00824 }
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843 void SSEDCloverTerm::applySite(LatticeFermion& chi, const LatticeFermion& psi,
00844 enum PlusMinus isign, int site) const
00845 {
00846 START_CODE();
00847
00848 if ( Ns != 4 )
00849 QDP_error_exit("code requires Ns == 4", Ns);
00850
00851 int n = 2*Nc;
00852
00853 RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
00854 const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
00855
00856
00857 cchi[ 0] = tri_diag[site][0][ 0] * ppsi[ 0]
00858 + conj(tri_off_diag[site][0][ 0]) * ppsi[ 1]
00859 + conj(tri_off_diag[site][0][ 1]) * ppsi[ 2]
00860 + conj(tri_off_diag[site][0][ 3]) * ppsi[ 3]
00861 + conj(tri_off_diag[site][0][ 6]) * ppsi[ 4]
00862 + conj(tri_off_diag[site][0][10]) * ppsi[ 5];
00863
00864 cchi[ 1] = tri_diag[site][0][ 1] * ppsi[ 1]
00865 + tri_off_diag[site][0][ 0] * ppsi[ 0]
00866 + conj(tri_off_diag[site][0][ 2]) * ppsi[ 2]
00867 + conj(tri_off_diag[site][0][ 4]) * ppsi[ 3]
00868 + conj(tri_off_diag[site][0][ 7]) * ppsi[ 4]
00869 + conj(tri_off_diag[site][0][11]) * ppsi[ 5];
00870
00871 cchi[ 2] = tri_diag[site][0][ 2] * ppsi[ 2]
00872 + tri_off_diag[site][0][ 1] * ppsi[ 0]
00873 + tri_off_diag[site][0][ 2] * ppsi[ 1]
00874 + conj(tri_off_diag[site][0][ 5]) * ppsi[ 3]
00875 + conj(tri_off_diag[site][0][ 8]) * ppsi[ 4]
00876 + conj(tri_off_diag[site][0][12]) * ppsi[ 5];
00877
00878 cchi[ 3] = tri_diag[site][0][ 3] * ppsi[ 3]
00879 + tri_off_diag[site][0][ 3] * ppsi[ 0]
00880 + tri_off_diag[site][0][ 4] * ppsi[ 1]
00881 + tri_off_diag[site][0][ 5] * ppsi[ 2]
00882 + conj(tri_off_diag[site][0][ 9]) * ppsi[ 4]
00883 + conj(tri_off_diag[site][0][13]) * ppsi[ 5];
00884
00885 cchi[ 4] = tri_diag[site][0][ 4] * ppsi[ 4]
00886 + tri_off_diag[site][0][ 6] * ppsi[ 0]
00887 + tri_off_diag[site][0][ 7] * ppsi[ 1]
00888 + tri_off_diag[site][0][ 8] * ppsi[ 2]
00889 + tri_off_diag[site][0][ 9] * ppsi[ 3]
00890 + conj(tri_off_diag[site][0][14]) * ppsi[ 5];
00891
00892 cchi[ 5] = tri_diag[site][0][ 5] * ppsi[ 5]
00893 + tri_off_diag[site][0][10] * ppsi[ 0]
00894 + tri_off_diag[site][0][11] * ppsi[ 1]
00895 + tri_off_diag[site][0][12] * ppsi[ 2]
00896 + tri_off_diag[site][0][13] * ppsi[ 3]
00897 + tri_off_diag[site][0][14] * ppsi[ 4];
00898
00899 cchi[ 6] = tri_diag[site][1][ 0] * ppsi[ 6]
00900 + conj(tri_off_diag[site][1][ 0]) * ppsi[ 7]
00901 + conj(tri_off_diag[site][1][ 1]) * ppsi[ 8]
00902 + conj(tri_off_diag[site][1][ 3]) * ppsi[ 9]
00903 + conj(tri_off_diag[site][1][ 6]) * ppsi[10]
00904 + conj(tri_off_diag[site][1][10]) * ppsi[11];
00905
00906 cchi[ 7] = tri_diag[site][1][ 1] * ppsi[ 7]
00907 + tri_off_diag[site][1][ 0] * ppsi[ 6]
00908 + conj(tri_off_diag[site][1][ 2]) * ppsi[ 8]
00909 + conj(tri_off_diag[site][1][ 4]) * ppsi[ 9]
00910 + conj(tri_off_diag[site][1][ 7]) * ppsi[10]
00911 + conj(tri_off_diag[site][1][11]) * ppsi[11];
00912
00913 cchi[ 8] = tri_diag[site][1][ 2] * ppsi[ 8]
00914 + tri_off_diag[site][1][ 1] * ppsi[ 6]
00915 + tri_off_diag[site][1][ 2] * ppsi[ 7]
00916 + conj(tri_off_diag[site][1][ 5]) * ppsi[ 9]
00917 + conj(tri_off_diag[site][1][ 8]) * ppsi[10]
00918 + conj(tri_off_diag[site][1][12]) * ppsi[11];
00919
00920 cchi[ 9] = tri_diag[site][1][ 3] * ppsi[ 9]
00921 + tri_off_diag[site][1][ 3] * ppsi[ 6]
00922 + tri_off_diag[site][1][ 4] * ppsi[ 7]
00923 + tri_off_diag[site][1][ 5] * ppsi[ 8]
00924 + conj(tri_off_diag[site][1][ 9]) * ppsi[10]
00925 + conj(tri_off_diag[site][1][13]) * ppsi[11];
00926
00927 cchi[10] = tri_diag[site][1][ 4] * ppsi[10]
00928 + tri_off_diag[site][1][ 6] * ppsi[ 6]
00929 + tri_off_diag[site][1][ 7] * ppsi[ 7]
00930 + tri_off_diag[site][1][ 8] * ppsi[ 8]
00931 + tri_off_diag[site][1][ 9] * ppsi[ 9]
00932 + conj(tri_off_diag[site][1][14]) * ppsi[11];
00933
00934 cchi[11] = tri_diag[site][1][ 5] * ppsi[11]
00935 + tri_off_diag[site][1][10] * ppsi[ 6]
00936 + tri_off_diag[site][1][11] * ppsi[ 7]
00937 + tri_off_diag[site][1][12] * ppsi[ 8]
00938 + tri_off_diag[site][1][13] * ppsi[ 9]
00939 + tri_off_diag[site][1][14] * ppsi[10];
00940
00941
00942 END_CODE();
00943 }
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981 namespace SSEDCloverEnv {
00982 struct TriaCntrArgs {
00983 LatticeColorMatrix& B;
00984 PrimitiveClovDiag* tri_diag;
00985 PrimitiveClovOffDiag* tri_off;
00986 int mat;
00987 int cb;
00988 };
00989
00990
00991 inline
00992 void triaCntrSiteLoop(int lo, int hi, int myId, TriaCntrArgs* a)
00993 {
00994 PrimitiveClovDiag* tri_diag = a->tri_diag;
00995 PrimitiveClovOffDiag* tri_off_diag = a->tri_off;
00996 LatticeColorMatrix& B = a->B;
00997 int mat = a->mat;
00998 int cb = a->cb;
00999
01000
01001 for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) {
01002
01003 int site = rb[cb].siteTable()[ssite];
01004
01005 switch( mat ) {
01006
01007 case 0:
01008
01009
01010
01011
01012
01013 {
01014 RComplex<REAL> lctmp0;
01015 RScalar<REAL> lr_zero0;
01016 RScalar<REAL> lrtmp0;
01017
01018 lr_zero0 = 0;
01019
01020 for(int i0 = 0; i0 < Nc; ++i0) {
01021 lrtmp0 = tri_diag[site][0][i0];
01022 lrtmp0 += tri_diag[site][0][i0+Nc];
01023 lrtmp0 += tri_diag[site][1][i0];
01024 lrtmp0 += tri_diag[site][1][i0+Nc];
01025 B.elem(site).elem().elem(i0,i0) = cmplx(lrtmp0,lr_zero0);
01026 }
01027
01028
01029 int elem_ij0 = 0;
01030 for(int i0 = 1; i0 < Nc; ++i0) {
01031
01032 int elem_ijb0 = (i0+Nc)*(i0+Nc-1)/2 + Nc;
01033
01034 for(int j0 = 0; j0 < i0; ++j0) {
01035
01036 lctmp0 = tri_off_diag[site][0][elem_ij0];
01037 lctmp0 += tri_off_diag[site][0][elem_ijb0];
01038 lctmp0 += tri_off_diag[site][1][elem_ij0];
01039 lctmp0 += tri_off_diag[site][1][elem_ijb0];
01040
01041 B.elem(site).elem().elem(j0,i0) = lctmp0;
01042 B.elem(site).elem().elem(i0,j0) = adj(lctmp0);
01043
01044
01045 elem_ij0++;
01046 elem_ijb0++;
01047 }
01048 }
01049 }
01050 break;
01051
01052 case 3:
01053
01054
01055
01056
01057
01058 {
01059
01060 RComplex<REAL> lctmp3;
01061 RScalar<REAL> lr_zero3;
01062 RScalar<REAL> lrtmp3;
01063
01064 lr_zero3 = 0;
01065
01066 for(int i3 = 0; i3 < Nc; ++i3) {
01067
01068 lrtmp3 = tri_diag[site][0][i3+Nc];
01069 lrtmp3 -= tri_diag[site][0][i3];
01070 lrtmp3 -= tri_diag[site][1][i3];
01071 lrtmp3 += tri_diag[site][1][i3+Nc];
01072 B.elem(site).elem().elem(i3,i3) = cmplx(lr_zero3,lrtmp3);
01073 }
01074
01075
01076 int elem_ij3 = 0;
01077 for(int i3 = 1; i3 < Nc; ++i3) {
01078
01079 int elem_ijb3 = (i3+Nc)*(i3+Nc-1)/2 + Nc;
01080
01081 for(int j3 = 0; j3 < i3; ++j3) {
01082
01083 lctmp3 = tri_off_diag[site][0][elem_ijb3];
01084 lctmp3 -= tri_off_diag[site][0][elem_ij3];
01085 lctmp3 -= tri_off_diag[site][1][elem_ij3];
01086 lctmp3 += tri_off_diag[site][1][elem_ijb3];
01087
01088 B.elem(site).elem().elem(j3,i3) = timesI(adj(lctmp3));
01089 B.elem(site).elem().elem(i3,j3) = timesI(lctmp3);
01090
01091 elem_ij3++;
01092 elem_ijb3++;
01093 }
01094 }
01095 }
01096 break;
01097
01098 case 5:
01099
01100
01101
01102
01103 {
01104
01105
01106 RComplex<REAL> lctmp5;
01107 RScalar<REAL> lrtmp5;
01108
01109 for(int i5 = 0; i5 < Nc; ++i5) {
01110
01111 int elem_ij5 = (i5+Nc)*(i5+Nc-1)/2;
01112
01113 for(int j5 = 0; j5 < Nc; ++j5) {
01114
01115 int elem_ji5 = (j5+Nc)*(j5+Nc-1)/2 + i5;
01116
01117
01118 lctmp5 = adj(tri_off_diag[site][0][elem_ji5]);
01119 lctmp5 -= tri_off_diag[site][0][elem_ij5];
01120 lctmp5 += adj(tri_off_diag[site][1][elem_ji5]);
01121 lctmp5 -= tri_off_diag[site][1][elem_ij5];
01122
01123
01124 B.elem(site).elem().elem(i5,j5) = lctmp5;
01125
01126 elem_ij5++;
01127 }
01128 }
01129 }
01130 break;
01131
01132 case 6:
01133
01134
01135
01136
01137 {
01138
01139 RComplex<REAL> lctmp6;
01140 RScalar<REAL> lrtmp6;
01141
01142 for(int i6 = 0; i6 < Nc; ++i6) {
01143
01144 int elem_ij6 = (i6+Nc)*(i6+Nc-1)/2;
01145
01146 for(int j6 = 0; j6 < Nc; ++j6) {
01147
01148 int elem_ji6 = (j6+Nc)*(j6+Nc-1)/2 + i6;
01149
01150 lctmp6 = adj(tri_off_diag[site][0][elem_ji6]);
01151 lctmp6 += tri_off_diag[site][0][elem_ij6];
01152 lctmp6 += adj(tri_off_diag[site][1][elem_ji6]);
01153 lctmp6 += tri_off_diag[site][1][elem_ij6];
01154
01155 B.elem(site).elem().elem(i6,j6) = timesMinusI(lctmp6);
01156
01157 elem_ij6++;
01158 }
01159 }
01160 }
01161 break;
01162
01163 case 9:
01164
01165
01166
01167
01168 {
01169 RComplex<REAL> lctmp9;
01170 RScalar<REAL> lrtmp9;
01171
01172 for(int i9 = 0; i9 < Nc; ++i9) {
01173
01174 int elem_ij9 = (i9+Nc)*(i9+Nc-1)/2;
01175
01176 for(int j9 = 0; j9 < Nc; ++j9) {
01177
01178 int elem_ji9 = (j9+Nc)*(j9+Nc-1)/2 + i9;
01179
01180 lctmp9 = adj(tri_off_diag[site][0][elem_ji9]);
01181 lctmp9 += tri_off_diag[site][0][elem_ij9];
01182 lctmp9 -= adj(tri_off_diag[site][1][elem_ji9]);
01183 lctmp9 -= tri_off_diag[site][1][elem_ij9];
01184
01185 B.elem(site).elem().elem(i9,j9) = timesI(lctmp9);
01186
01187 elem_ij9++;
01188 }
01189 }
01190 }
01191 break;
01192
01193 case 10:
01194
01195
01196
01197
01198 {
01199
01200 RComplex<REAL> lctmp10;
01201 RScalar<REAL> lrtmp10;
01202
01203 for(int i10 = 0; i10 < Nc; ++i10) {
01204
01205 int elem_ij10 = (i10+Nc)*(i10+Nc-1)/2;
01206
01207 for(int j10 = 0; j10 < Nc; ++j10) {
01208
01209 int elem_ji10 = (j10+Nc)*(j10+Nc-1)/2 + i10;
01210
01211 lctmp10 = adj(tri_off_diag[site][0][elem_ji10]);
01212 lctmp10 -= tri_off_diag[site][0][elem_ij10];
01213 lctmp10 -= adj(tri_off_diag[site][1][elem_ji10]);
01214 lctmp10 += tri_off_diag[site][1][elem_ij10];
01215
01216 B.elem(site).elem().elem(i10,j10) = lctmp10;
01217
01218 elem_ij10++;
01219 }
01220 }
01221 }
01222
01223 break;
01224
01225 case 12:
01226
01227
01228
01229
01230
01231 {
01232
01233
01234 RComplex<REAL> lctmp12;
01235 RScalar<REAL> lr_zero12;
01236 RScalar<REAL> lrtmp12;
01237
01238 lr_zero12 = 0;
01239
01240 for(int i12 = 0; i12 < Nc; ++i12) {
01241
01242 lrtmp12 = tri_diag[site][0][i12];
01243 lrtmp12 -= tri_diag[site][0][i12+Nc];
01244 lrtmp12 -= tri_diag[site][1][i12];
01245 lrtmp12 += tri_diag[site][1][i12+Nc];
01246 B.elem(site).elem().elem(i12,i12) = cmplx(lr_zero12,lrtmp12);
01247 }
01248
01249
01250 int elem_ij12 = 0;
01251 for(int i12 = 1; i12 < Nc; ++i12) {
01252
01253 int elem_ijb12 = (i12+Nc)*(i12+Nc-1)/2 + Nc;
01254
01255 for(int j12 = 0; j12 < i12; ++j12) {
01256
01257 lctmp12 = tri_off_diag[site][0][elem_ij12];
01258 lctmp12 -= tri_off_diag[site][0][elem_ijb12];
01259 lctmp12 -= tri_off_diag[site][1][elem_ij12];
01260 lctmp12 += tri_off_diag[site][1][elem_ijb12];
01261
01262 B.elem(site).elem().elem(i12,j12) = timesI(lctmp12);
01263 B.elem(site).elem().elem(j12,i12) = timesI(adj(lctmp12));
01264
01265 elem_ij12++;
01266 elem_ijb12++;
01267 }
01268 }
01269 }
01270 break;
01271
01272 default:
01273 {
01274 B = zero;
01275 QDPIO::cout << "BAD DEFAULT CASE HIT" << endl;
01276 }
01277 }
01278
01279 }
01280 }
01281 }
01282
01283 void SSEDCloverTerm::triacntr(LatticeColorMatrix& B, int mat, int cb) const
01284 {
01285 START_CODE();
01286
01287 B = zero;
01288
01289 if ( mat < 0 || mat > 15 )
01290 {
01291 QDPIO::cerr << __func__ << ": Gamma out of range: mat = " << mat << endl;
01292 QDP_abort(1);
01293 }
01294
01295 SSEDCloverEnv::TriaCntrArgs a = { B, tri_diag, tri_off_diag, mat, cb};
01296 dispatch_to_threads(rb[cb].numSiteTable(), a,
01297 SSEDCloverEnv::triaCntrSiteLoop);
01298
01299
01300 END_CODE();
01301 }
01302
01303
01304 Real SSEDCloverTerm::getCloverCoeff(int mu, int nu) const
01305 {
01306 START_CODE();
01307
01308 if( param.anisoParam.anisoP )
01309 {
01310 if (mu==param.anisoParam.t_dir || nu == param.anisoParam.t_dir) {
01311 return param.clovCoeffT;
01312 }
01313 else {
01314
01315 return param.clovCoeffR;
01316 }
01317 }
01318 else {
01319
01320
01321 return param.clovCoeffR;
01322 }
01323
01324 END_CODE();
01325 }
01326 }
01327
01328
01329
01330