/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int Lanczos_EigenValue(struct BindStruct *X) { fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart); FILE *fp; char sdt[D_FileNameMax],sdt_2[D_FileNameMax]; int stp, iproc; long int i,iv,i_max; unsigned long int i_max_tmp, sum_i_max; int k_exct,Target; int iconv=-1; double beta1,alpha1; //beta,alpha1 should be real double complex temp1,temp2; double complex cbeta1; double E[5],ebefor; int mythread; // for GC double dnorm; double complex cdnorm; long unsigned int u_long_i; dsfmt_t dsfmt; #ifdef lapack double **tmp_mat; double *tmp_E; int int_i,int_j,mfint[7]; #endif sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead); i_max=X->Check.idim_max; k_exct = X->Def.k_exct; if(initial_mode == 0){ sum_i_max = SumMPI_li(X->Check.idim_max); X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1; iv=X->Large.iv; fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v0[i]=0.0; v1[i]=0.0; } sum_i_max = 0; for (iproc = 0; iproc < nproc; iproc++) { i_max_tmp = BcastMPI_li(iproc, i_max); if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { if (myrank == iproc) { v1[iv - sum_i_max+1] = 1.0; if (X->Def.iInitialVecType == 0) { v1[iv - sum_i_max+1] += 1.0*I; v1[iv - sum_i_max+1] /= sqrt(2.0); } }/*if (myrank == iproc)*/ }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ sum_i_max += i_max_tmp; }/*for (iproc = 0; iproc < nproc; iproc++)*/ }/*if(initial_mode == 0)*/ else if(initial_mode==1){ iv = X->Def.initial_iv; fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max) { #pragma omp for for (i = 1; i <= i_max; i++) { v0[i] = 0.0; } /* Initialise MT */ #ifdef _OPENMP mythread = omp_get_thread_num(); #else mythread = 0; #endif u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; dsfmt_init_gen_rand(&dsfmt, u_long_i); if (X->Def.iInitialVecType == 0) { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; } else { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } }/*#pragma omp parallel*/ cdnorm=0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) for(i=1;i<=i_max;i++){ cdnorm += conj(v1[i])*v1[i]; } cdnorm = SumMPI_dc(cdnorm); dnorm=creal(cdnorm); dnorm=sqrt(dnorm); #pragma omp parallel for default(none) private(i) shared(v1) firstprivate(i_max, dnorm) for(i=1;i<=i_max;i++){ v1[i] = v1[i]/dnorm; } }/*else if(initial_mode==1)*/ //Eigenvalues by Lanczos method TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); mltply(X, v0, v1); stp=1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v alpha[1]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i = 1; i <= i_max; i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[1]=beta1; ebefor=0; /* Set Maximum number of loop to the dimention of the Wavefunction */ i_max_tmp = SumMPI_li(i_max); if(i_max_tmp < X->Def.Lanczos_max){ X->Def.Lanczos_max = i_max_tmp; } if(i_max_tmp < X->Def.LanczosTarget){ X->Def.LanczosTarget = i_max_tmp; } if(i_max_tmp == 1){ E[1]=alpha[1]; vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; fprintf(stdoutMPI," LanczosStep E[1] \n"); fprintf(stdoutMPI," stp=%d %.10lf \n",stp,E[1]); } else{ #ifdef lapack fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); #else fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] \n"); #endif for(stp = 2; stp <= X->Def.Lanczos_max; stp++){ #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(i=1;i<=i_max;i++){ temp1 = v1[i]; temp2 = (v0[i]-alpha1*v1[i])/beta1; v0[i] = -beta1*temp1; v1[i] = temp2; } mltply(X, v0, v1); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct); alpha[stp]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i=1;i<=i_max;i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[stp]=beta1; Target = X->Def.LanczosTarget; if(stp==2){ #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; tmp_mat[1][0] = beta[1]; tmp_mat[1][1] = alpha[2]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); #else bisec(alpha,beta,stp,E,4,eps_Bisec); #endif ebefor=E[Target]; childfopenMPI(sdt_2,"w", &fp); #ifdef lapack fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #else fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] \n"); fprintf(fp,"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #endif fclose(fp); } if(stp>2 && stp%2==0){ childfopenMPI(sdt_2,"a", &fp); #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; for(int_i=1;int_i<stp-1;int_i++){ tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i+1] = beta[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; } tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; E[0] = tmp_E[stp-1]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); #else bisec(alpha,beta,stp,E,4,eps_Bisec); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf \n",stp,E[1],E[2],E[3],E[4]); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4]); #endif fclose(fp); if(fabs((E[Target]-ebefor)/E[Target])<eps_Lanczos || fabs(beta[stp])<pow(10.0, -14)){ vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; break; } ebefor=E[Target]; } } } sprintf(sdt,cFileNameTimeKeep,X->Def.CDataFileHead); if(iconv!=0){ sprintf(sdt, cLogLanczos_EigenValueNotConverged); return -1; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a"); fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd); return 0; }
/** * @brief Calculate tridiagonal matrix components by Lanczos method * * @param _alpha * @param _beta * @param _v1 * @param Lanczos_step * * @return 0 */ int Lanczos_GetTridiagonalMatrixComponents( struct BindStruct *X, double *_alpha, double *_beta, double complex *tmp_v1, unsigned long int *liLanczos_step ) { FILE *fp; char sdt[D_FileNameMax]; int stp, iproc; long int i,iv,i_max; i_max=X->Check.idim_max; unsigned long int i_max_tmp, sum_i_max; int k_exct,Target; double beta1,alpha1; //beta,alpha1 should be real double complex temp1,temp2; double complex cbeta1; double complex *tmp_v0; c_malloc1(tmp_v0, i_max); sprintf(sdt, cFileNameLanczosStep, X->Def.CDataFileHead); /* Set Maximum number of loop to the dimention of the Wavefunction */ i_max_tmp = SumMPI_li(i_max); if(i_max_tmp < *liLanczos_step){ *liLanczos_step = i_max_tmp; } if(i_max_tmp < X->Def.LanczosTarget){ *liLanczos_step = i_max_tmp; } #pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v0[i]=0.0; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); mltply(X, v0, tmp_v1); stp=1; alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v alpha[1]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i = 1; i <= i_max; i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[1]=beta1; for(stp = 2; stp <= *liLanczos_step; stp++){ if(fabs(beta[stp-1])<pow(10.0, -14)){ *liLanczos_step=stp-1; break; } #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(i=1;i<=i_max;i++){ temp1 = v1[i]; temp2 = (v0[i]-alpha1*v1[i])/beta1; v0[i] = -beta1*temp1; v1[i] = temp2; } mltply(X, v0, v1); alpha1=creal(X->Large.prdct); alpha[stp]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i=1;i<=i_max;i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[stp]=beta1; } for(stp = 1; stp <= *liLanczos_step; stp++) { _alpha[stp] = alpha[stp]; _beta[stp]=beta[stp]; } return TRUE; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int expec_energy_flct(struct BindStruct *X){ long unsigned int i,j; long unsigned int irght,ilft,ihfbit; long unsigned int isite1; long unsigned int is1_up,is1_down; long unsigned int is1; double complex dam_pr,dam_pr1; long int num1_up, num1_down; long unsigned int ibit1; double tmp_num_up, tmp_num_down; double D,tmp_D,tmp_D2; double N,tmp_N,tmp_N2; double Sz,tmp_Sz, tmp_Sz2; double tmp_v02; long unsigned int i_max,tmp_list_1; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeper(X, cFileNameTimeKeep, cExpecStart, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecStart, "a", step_i); #endif break; case FullDiag: break; default: return -1; //break; } i_max=X->Check.idim_max; if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ return -1; } X->Large.i_max = i_max; X->Large.irght = irght; X->Large.ilft = ilft; X->Large.ihfbit = ihfbit; X->Large.mode = M_ENERGY; X->Phys.energy=0.0; dam_pr=0.0; // tentative doublon tmp_D = 0.0; tmp_D2 = 0.0; tmp_N = 0.0; tmp_N2 = 0.0; tmp_Sz = 0.0; tmp_Sz2 = 0.0; tmp_num_up = 0.0; tmp_num_down = 0.0; int nCalcFlct; if(X->Def.iCalcType == Lanczos){ nCalcFlct=4301; } else if (X->Def.iCalcType == TPQCalc){ nCalcFlct=3201; } else{//For FullDiag nCalcFlct=5301; } StartTimer(nCalcFlct); switch(X->Def.iCalcModel){ case HubbardGC: #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2, tmp_num_up, tmp_num_down) default(none) shared(v0) \ firstprivate(i_max, num1_up, num1_down,X,myrank) private(j, tmp_v02,D,N,Sz,isite1,is1_up,is1_down,is1,ibit1) for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; D = 0.0; N = 0.0; Sz = 0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ if(isite1 > X->Def.Nsite){ is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; is1 = is1_up+is1_down; ibit1 = (unsigned long int)myrank & is1; num1_up = (ibit1&is1_up) / is1_up; num1_down = (ibit1&is1_down) / is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; }else{ is1_up = X->Def.Tpow[2*isite1-2]; is1_down = X->Def.Tpow[2*isite1-1]; is1 = is1_up+is1_down; ibit1 = (j-1)&is1; num1_up = ((j-1)&is1_up)/is1_up; num1_down = ((j-1)&is1_down)/is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; } } tmp_D += tmp_v02*D; tmp_D2 += tmp_v02*D*D; tmp_N += tmp_v02*N; tmp_N2 += tmp_v02*N*N; tmp_Sz += tmp_v02*Sz; tmp_Sz2 += tmp_v02*Sz*Sz; } break; case KondoGC: case Hubbard: case Kondo: #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ firstprivate(i_max, num1_up, num1_down,X,myrank) private(j, tmp_v02,D,N,Sz,isite1,is1_up,is1_down,is1,ibit1,tmp_list_1) for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; D = 0.0; N = 0.0; Sz = 0.0; tmp_list_1 = list_1[j]; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ //printf("DEBUG: j=%d %d %d\n",j,isite1,myrank); if(isite1 > X->Def.Nsite){ is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; is1 = is1_up+is1_down; ibit1 = (unsigned long int)myrank & is1; num1_up = (ibit1&is1_up) / is1_up; num1_down = (ibit1&is1_down) / is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; }else{ is1_up = X->Def.Tpow[2*isite1-2]; is1_down = X->Def.Tpow[2*isite1-1]; is1 = is1_up+is1_down; //ibit1 = tmp_list_1&is1; num1_up = (tmp_list_1&is1_up)/is1_up; num1_down = (tmp_list_1&is1_down)/is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; } } tmp_D += tmp_v02*D; tmp_D2 += tmp_v02*D*D; tmp_N += tmp_v02*N; tmp_N2 += tmp_v02*N*N; tmp_Sz += tmp_v02*Sz; tmp_Sz2 += tmp_v02*Sz*Sz; } break; case SpinGC: if(X->Def.iFlgGeneralSpin == FALSE) { #pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0) \ firstprivate(i_max,X,myrank) private(j,Sz, is1_up,ibit1,isite1,tmp_v02) for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; Sz = 0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ if(isite1 > X->Def.Nsite){ is1_up = X->Def.Tpow[isite1 - 1]; ibit1 = (unsigned long int)myrank& is1_up; if(ibit1==is1_up){ Sz += 1.0; }else{ Sz += -1.0; } }else{ is1_up=X->Def.Tpow[isite1-1]; ibit1=(j-1)&is1_up; if(ibit1==is1_up){ Sz += 1.0; }else{ Sz += -1.0; } } } tmp_Sz += Sz*tmp_v02; tmp_Sz2 += Sz*Sz*tmp_v02; } } else{//for generalspin for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; Sz = 0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ //prefactor 0.5 is added later. if(isite1 > X->Def.Nsite){ Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); }else{ Sz += GetLocal2Sz(isite1, j-1, X->Def.SiteToBit, X->Def.Tpow); } } tmp_Sz += Sz*tmp_v02; tmp_Sz2 += Sz*Sz*tmp_v02; } } break;/*case SpinGC*/ /* SpinGCBoost */ case Spin: break; default: return -1; } tmp_D = SumMPI_d(tmp_D); tmp_D2 = SumMPI_d(tmp_D2); tmp_N = SumMPI_d(tmp_N); tmp_N2 = SumMPI_d(tmp_N2); tmp_Sz = SumMPI_d(tmp_Sz); tmp_Sz2 = SumMPI_d(tmp_Sz2); // tmp_num_up = SumMPI_d(tmp_num_up); // tmp_num_down = SumMPI_d(tmp_num_down); switch(X->Def.iCalcModel){ case HubbardGC: case KondoGC: case Hubbard: case Kondo: X->Phys.doublon = tmp_D; X->Phys.doublon2 = tmp_D2; X->Phys.num = tmp_N; X->Phys.num2 = tmp_N2; X->Phys.Sz = tmp_Sz*0.5; X->Phys.Sz2 = tmp_Sz2*0.25; X->Phys.num_up = 0.5*(tmp_N+tmp_Sz); X->Phys.num_down = 0.5*(tmp_N-tmp_Sz); break; case SpinGC: X->Phys.doublon = 0.0; X->Phys.doublon2 = 0.0; X->Phys.num = X->Def.NsiteMPI; X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; X->Phys.Sz = tmp_Sz*0.5; X->Phys.Sz2 = tmp_Sz2*0.25; X->Phys.num_up = 0.5*(X->Def.NsiteMPI+tmp_Sz); X->Phys.num_down = 0.5*(X->Def.NsiteMPI-tmp_Sz); break; case Spin: X->Phys.doublon = 0.0; X->Phys.doublon2 = 0.0; X->Phys.num_up = X->Def.Nup; X->Phys.num_down = X->Def.Ndown; X->Phys.num = (X->Def.Nup+X->Def.Ndown); X->Phys.num2 = (X->Def.Nup+X->Def.Ndown)*(X->Def.Nup+X->Def.Ndown); X->Phys.Sz = 0.5*(X->Def.Total2SzMPI); X->Phys.Sz2 = 0.25*pow((X->Def.Total2SzMPI),2); break; default: return -1; } StopTimer(nCalcFlct); #pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v1[i]=v0[i]; v0[i]=0.0+0.0*I; } int nCalcExpec; if(X->Def.iCalcType == Lanczos){ nCalcExpec=4302; } else if (X->Def.iCalcType == TPQCalc){ nCalcExpec=3202; } else{//For FullDiag nCalcExpec=5302; } StartTimer(nCalcExpec); mltply(X, v0, v1); // v0+=H*v1 StopTimer(nCalcExpec); /* switch -> SpinGCBoost */ dam_pr=0.0; dam_pr1=0.0; #pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) for(j=1;j<=i_max;j++){ dam_pr += conj(v1[j])*v0[j]; // E = <v1|H|v1>=<v1|v0> dam_pr1 += conj(v0[j])*v0[j]; // E^2 = <v1|H*H|v1>=<v0|v0> //v0[j]=v1[j]; v1-> orginal v0=H*v1 } dam_pr = SumMPI_dc(dam_pr); dam_pr1 = SumMPI_dc(dam_pr1); // fprintf(stdoutMPI, "Debug: ene=%lf, var=%lf\n", creal(dam_pr), creal(dam_pr1)); X->Phys.energy = dam_pr; X->Phys.var = dam_pr1; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeper(X, cFileNameTimeKeep, cExpecEnd, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); #endif break; default: break; } return 0; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int expec_energy(struct BindStruct *X){ long unsigned int i,j; long unsigned int irght,ilft,ihfbit; long unsigned int isite1; long unsigned int is1_up,is1_down; long unsigned int is1; double complex dam_pr,dam_pr1; long unsigned int num1_up, num1_down; long unsigned int ibit1; double tmp_num_up, tmp_num_down, tmp_doublon, tmp_num; double tmp_v02; long unsigned int i_max; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeper(X, cFileNameTimeKeep, cExpecStart, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecStart, "a", step_i); #endif break; case FullDiag: break; default: return -1; //break; } i_max=X->Check.idim_max; if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ return -1; } X->Large.i_max = i_max; X->Large.irght = irght; X->Large.ilft = ilft; X->Large.ihfbit = ihfbit; X->Large.mode = M_ENERGY; X->Phys.energy=0.0; dam_pr=0.0; // tentative doublon tmp_doublon=0.0; tmp_num_up=0.0; tmp_num_down=0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ if(isite1 > X->Def.Nsite){ switch(X->Def.iCalcModel){ case HubbardGC: case KondoGC: case Hubbard: case Kondo: is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; is1 = is1_up+is1_down; ibit1 = (unsigned long int)myrank & is1; num1_up = (ibit1&is1_up) / is1_up; num1_down = (ibit1&is1_down) / is1_down; #pragma omp parallel for reduction(+:tmp_doublon, tmp_num_up, tmp_num_down) default(none) shared(v0) \ firstprivate(i_max, num1_up, num1_down) private(j, tmp_v02) for (j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; tmp_doublon += tmp_v02*num1_up*num1_down; tmp_num_up += tmp_v02*num1_up; tmp_num_up += tmp_v02*num1_down; } break; case SpinGC: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; ibit1 = (unsigned long int)myrank& is1_up; tmp_num=0; #pragma omp parallel for reduction(+:tmp_num)default(none) shared(v0) \ firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) tmp_num += conj(v0[j])*v0[j]; if(ibit1==is1_up){ tmp_num_up += tmp_num; } else{ tmp_num_down += tmp_num; } } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ break;/*case SpinGC*/ /* SpinGCBoost */ case Spin: break; default: return -1; } } else{ switch(X->Def.iCalcModel){ case HubbardGC: #pragma omp parallel for reduction(+:tmp_doublon, tmp_num_up, tmp_num_down) default(none) private(j, is1_up, is1_down, is1, ibit1,num1_up,num1_down, tmp_v02) shared(v0) firstprivate(i_max, X, isite1) for(j=1;j<=i_max;j++){ is1_up=X->Def.Tpow[2*isite1-2]; is1_down=X->Def.Tpow[2*isite1-1]; is1=is1_up+is1_down; ibit1=(j-1)&is1; num1_up = ((j-1)&is1_up)/is1_up; num1_down = ((j-1)&is1_down)/is1_down; tmp_v02 = conj(v0[j])*v0[j]; tmp_doublon += tmp_v02*num1_up*num1_down; tmp_num_up += tmp_v02*num1_up; tmp_num_down += tmp_v02*num1_down; } break; case Hubbard: case Kondo: case KondoGC: #pragma omp parallel for reduction(+:tmp_doublon, tmp_num_up, tmp_num_down) default(none) private(j, is1_up, is1_down, is1, ibit1,num1_up,num1_down, tmp_v02) shared(v0, list_1) firstprivate(i_max, X, isite1) for(j=1;j<=i_max;j++){ is1_up=X->Def.Tpow[2*isite1-2]; is1_down=X->Def.Tpow[2*isite1-1]; is1=is1_up+is1_down; ibit1=list_1[j]&is1; num1_up = (list_1[j]&is1_up)/is1_up; num1_down = (list_1[j]&is1_down)/is1_down; tmp_v02 = conj(v0[j])*v0[j]; tmp_doublon += tmp_v02*num1_up*num1_down; tmp_num_up += tmp_v02*num1_up; tmp_num_down += tmp_v02*num1_down; } break; case SpinGC: if(X->Def.iFlgGeneralSpin==FALSE){ is1_up=X->Def.Tpow[isite1-1]; #pragma omp parallel for reduction(+: tmp_num_up, tmp_num_down) default(none) private(j, ibit1,num1_up,num1_down, tmp_v02) shared(list_1, v0) firstprivate(i_max, X, isite1, is1_up) for(j=1;j<=i_max;j++){ ibit1=(j-1)&is1_up; tmp_v02 = conj(v0[j])*v0[j]; if(ibit1==is1_up){ tmp_num_up += tmp_v02; } else{ tmp_num_down +=tmp_v02; } } } break; /* SpinGCBoost */ case Spin: break; default: break; } } } tmp_doublon=SumMPI_d(tmp_doublon); tmp_num_up=SumMPI_d(tmp_num_up); tmp_num_down=SumMPI_d(tmp_num_down); tmp_num=SumMPI_d(tmp_num); switch(X->Def.iCalcModel){ case HubbardGC: case KondoGC: case Hubbard: case Kondo: X->Phys.doublon = tmp_doublon; X->Phys.num_up = tmp_num_up; X->Phys.num_down = tmp_num_down; X->Phys.num = tmp_num_up+tmp_num_down; break; case SpinGC: X->Phys.doublon = 0.0; X->Phys.num_up = tmp_num_up; X->Phys.num_down = tmp_num_down; X->Phys.num = tmp_num_up+tmp_num_down; break; case Spin: X->Phys.num_up = X->Def.Nup; X->Phys.num_down = X->Def.Ndown; X->Phys.num = X->Def.Nup+X->Def.Ndown;//canonical X->Phys.doublon = 0.0;// spin break; default: return -1; } #pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v1[i]=v0[i]; v0[i]=0.0+0.0*I; } mltply(X, v0, v1); // v0+=H*v1 /* switch -> SpinGCBoost */ dam_pr=0.0; dam_pr1=0.0; #pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) for(j=1;j<=i_max;j++){ dam_pr += conj(v1[j])*v0[j]; // E = <v1|H|v1>=<v1|v0> dam_pr1 += conj(v0[j])*v0[j]; // E^2 = <v1|H*H|v1>=<v0|v0> //v0[j]=v1[j]; v1-> orginal v0=H*v1 } dam_pr = SumMPI_dc(dam_pr); dam_pr1 = SumMPI_dc(dam_pr1); X->Phys.energy = dam_pr; X->Phys.var = dam_pr1; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeper(X, cFileNameTimeKeep, cExpecEnd, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); #endif break; default: break; } return 0; }
/** * * * @param X * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ void Lanczos_EigenVector(struct BindStruct *X){ printf("%s", cLogLanczos_EigenVectorStart); int i,j,i_max,iv; int k_exct; double beta1,alpha1,dnorm, dnorm_inv; double complex temp1,temp2; // for GC long unsigned int u_long_i; dsfmt_t dsfmt; k_exct = X->Def.k_exct; iv=X->Large.iv; i_max=X->Check.idim_max; //Eigenvectors by Lanczos method //initialization: initialization should be identical to that of Lanczos_EigenValue.c #pragma omp parallel for default(none) private(i) shared(v0, v1, vg) firstprivate(i_max) for(i=1;i<=i_max;i++){ v0[i]=0.0+0.0*I; v1[i]=0.0+0.0*I; vg[i]=0.0+0.0*I; } if(initial_mode == 0){ v1[iv]=1.0; vg[iv]=vec[k_exct][1]; }else if(initial_mode==1){ iv = X->Def.initial_iv; u_long_i = 123432 + abs(iv); dsfmt_init_gen_rand(&dsfmt, u_long_i); for(i = 1; i <= i_max; i++){ v1[i]=2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)+2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)*I; } dnorm=0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) for(i=1;i<=i_max;i++){ dnorm += conj(v1[i])*v1[i]; } dnorm=sqrt(dnorm); dnorm_inv=1.0/dnorm; #pragma omp parallel for default(none) private(i) shared(v1,vg,vec,k_exct) firstprivate(i_max, dnorm_inv) for(i=1;i<=i_max;i++){ v1[i] = v1[i]*dnorm_inv; vg[i] = v1[i]*vec[k_exct][1]; } } mltply(X, v0, v1); alpha1=alpha[1]; beta1=beta[1]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct) for(j=1;j<=i_max;j++){ vg[j]+=vec[k_exct][2]*(v0[j]-alpha1*v1[j])/beta1; } //iteration for(i=2;i<=X->Large.itr-1;i++){ #pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(j=1;j<=i_max;j++){ temp1=v1[j]; temp2=(v0[j]-alpha1*v1[j])/beta1; v0[j]=-beta1*temp1; v1[j]=temp2; } mltply(X, v0, v1); alpha1 = alpha[i]; beta1 = beta[i]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i) for(j=1;j<=i_max;j++){ vg[j] += vec[k_exct][i+1]*(v0[j]-alpha1*v1[j])/beta1; } } #pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max) for(j=1;j<=i_max;j++){ v0[j] = vg[j]; } //normalization dnorm=0.0; #pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max) for(j=1;j<=i_max;j++){ dnorm += conj(v0[j])*v0[j]; } dnorm=sqrt(dnorm); dnorm_inv=dnorm; #pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv) for(j=1;j<=i_max;j++){ v0[j] = v0[j]*dnorm_inv; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a"); printf("%s", cLogLanczos_EigenVectorEnd); }