/** * @brief A main function to calculate spectrum by Lanczos * * @param[in,out] X CalcStruct list for getting and pushing calculation information * @retval 0 normally finished * @retval -1 unnormally finished * * @version 1.1 * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ int CalcSpectrumByLanczos( struct EDMainCalStruct *X, double complex *tmp_v1, double dnorm, int Nomega, double complex *dcSpectrum, double complex *dcomega ) { char sdt[D_FileNameMax]; unsigned long int i, i_max; FILE *fp; int iret; unsigned long int liLanczosStp = X->Bind.Def.Lanczos_max; unsigned long int liLanczosStp_vec=0; if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) { fprintf(stdoutMPI, " Start: Input vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputSpectrumRecalcvecStart, "a"); StartTimer(6201); sprintf(sdt, cFileNameOutputRestartVec, X->Bind.Def.CDataFileHead, myrank); if (childfopenALL(sdt, "rb", &fp) != 0) { exitMPI(-1); } fread(&liLanczosStp_vec, sizeof(liLanczosStp_vec),1,fp); fread(&i_max, sizeof(long int), 1, fp); if(i_max != X->Bind.Check.idim_max){ fprintf(stderr, "Error: A size of Inputvector is incorrect.\n"); exitMPI(-1); } fread(v0, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fread(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); StopTimer(6201); fprintf(stdoutMPI, " End: Input vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputSpectrumRecalcvecEnd, "a"); } //Read diagonal components if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents || X->Bind.Def.iFlgCalcSpec ==RECALC_FROM_TMComponents_VEC|| X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) { StartTimer(6202); iret=ReadTMComponents(X, &dnorm, &liLanczosStp); if(!iret ==TRUE){ fprintf(stdoutMPI, " Error: Fail to read TMcomponents\n"); return FALSE; } if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents){ X->Bind.Def.Lanczos_restart=0; } else if(X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC|| X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC){ if(liLanczosStp_vec !=liLanczosStp){ fprintf(stdoutMPI, " Error: Input files for vector and TMcomponents are incoorect.\n"); fprintf(stdoutMPI, " Error: Input vector %ld th stps, TMcomponents %ld th stps.\n", liLanczosStp_vec, liLanczosStp); return FALSE; } X->Bind.Def.Lanczos_restart=liLanczosStp; liLanczosStp = liLanczosStp+X->Bind.Def.Lanczos_max; } StopTimer(6202); } // calculate ai, bi if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC ) { fprintf(stdoutMPI, " Start: Calculate tridiagonal matrix components.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_GetTridiagonalStart, "a"); // Functions in Lanczos_EigenValue StartTimer(6203); iret = Lanczos_GetTridiagonalMatrixComponents(&(X->Bind), alpha, beta, tmp_v1, &(liLanczosStp)); StopTimer(6203); if (iret != TRUE) { //Error Message will be added. return FALSE; } fprintf(stdoutMPI, " End: Calculate tridiagonal matrix components.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_GetTridiagonalEnd, "a"); StartTimer(6204); OutputTMComponents(X, alpha,beta, dnorm, liLanczosStp); StopTimer(6204); }//X->Bind.Def.iFlgCalcSpec == RECALC_NOT || RECALC_FROM_TMComponents_VEC fprintf(stdoutMPI, " Start: Caclulate spectrum from tridiagonal matrix components.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalStart, "a"); StartTimer(6205); for( i = 0 ; i < Nomega; i++) { iret = GetSpectrumByTridiagonalMatrixComponents(alpha, beta, dnorm, dcomega[i], &dcSpectrum[i], liLanczosStp); if (iret != TRUE) { //ToDo: Error Message will be added. //ReAlloc alpha, beta and Set alpha_start and beta_start in Lanczos_EigenValue return FALSE; } dcSpectrum[i] = dnorm * dcSpectrum[i]; } StopTimer(6205); fprintf(stdoutMPI, " End: Caclulate spectrum from tridiagonal matrix components.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalEnd, "a"); //output vectors for recalculation if(X->Bind.Def.iFlgCalcSpec==RECALC_OUTPUT_TMComponents_VEC || X->Bind.Def.iFlgCalcSpec==RECALC_INOUT_TMComponents_VEC){ StartTimer(6206); fprintf(stdoutMPI, " Start: Output vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecStart, "a"); sprintf(sdt, cFileNameOutputRestartVec, X->Bind.Def.CDataFileHead, myrank); if(childfopenALL(sdt, "wb", &fp)!=0){ exitMPI(-1); } fwrite(&liLanczosStp, sizeof(liLanczosStp),1,fp); fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max),1,fp); fwrite(v0, sizeof(complex double),X->Bind.Check.idim_max+1, fp); fwrite(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); fclose(fp); fprintf(stdoutMPI, " End: Output vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecEnd, "a"); StopTimer(6206); } return TRUE; }
/** * @brief Calculate tridiagonal matrix components by Lanczos method * * @param _alpha * @param _beta * @param _v1 * @param Lanczos_step * * @return 0 */ int Lanczos_GetTridiagonalMatrixComponents( struct BindStruct *X, double *_alpha, double *_beta, double complex *tmp_v1, unsigned long int *liLanczos_step ) { FILE *fp; char sdt[D_FileNameMax]; int stp, iproc; long int i,iv,i_max; i_max=X->Check.idim_max; unsigned long int i_max_tmp, sum_i_max; int k_exct,Target; double beta1,alpha1; //beta,alpha1 should be real double complex temp1,temp2; double complex cbeta1; double complex *tmp_v0; c_malloc1(tmp_v0, i_max); sprintf(sdt, cFileNameLanczosStep, X->Def.CDataFileHead); /* Set Maximum number of loop to the dimention of the Wavefunction */ i_max_tmp = SumMPI_li(i_max); if(i_max_tmp < *liLanczos_step){ *liLanczos_step = i_max_tmp; } if(i_max_tmp < X->Def.LanczosTarget){ *liLanczos_step = i_max_tmp; } #pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v0[i]=0.0; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); mltply(X, v0, tmp_v1); stp=1; alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v alpha[1]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i = 1; i <= i_max; i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[1]=beta1; for(stp = 2; stp <= *liLanczos_step; stp++){ if(fabs(beta[stp-1])<pow(10.0, -14)){ *liLanczos_step=stp-1; break; } #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(i=1;i<=i_max;i++){ temp1 = v1[i]; temp2 = (v0[i]-alpha1*v1[i])/beta1; v0[i] = -beta1*temp1; v1[i] = temp2; } mltply(X, v0, v1); alpha1=creal(X->Large.prdct); alpha[stp]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i=1;i<=i_max;i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[stp]=beta1; } for(stp = 1; stp <= *liLanczos_step; stp++) { _alpha[stp] = alpha[stp]; _beta[stp]=beta[stp]; } return TRUE; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int Lanczos_EigenValue(struct BindStruct *X) { fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart); FILE *fp; char sdt[D_FileNameMax],sdt_2[D_FileNameMax]; int stp, iproc; long int i,iv,i_max; unsigned long int i_max_tmp, sum_i_max; int k_exct,Target; int iconv=-1; double beta1,alpha1; //beta,alpha1 should be real double complex temp1,temp2; double complex cbeta1; double E[5],ebefor; int mythread; // for GC double dnorm; double complex cdnorm; long unsigned int u_long_i; dsfmt_t dsfmt; #ifdef lapack double **tmp_mat; double *tmp_E; int int_i,int_j,mfint[7]; #endif sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead); i_max=X->Check.idim_max; k_exct = X->Def.k_exct; if(initial_mode == 0){ sum_i_max = SumMPI_li(X->Check.idim_max); X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1; iv=X->Large.iv; fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v0[i]=0.0; v1[i]=0.0; } sum_i_max = 0; for (iproc = 0; iproc < nproc; iproc++) { i_max_tmp = BcastMPI_li(iproc, i_max); if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { if (myrank == iproc) { v1[iv - sum_i_max+1] = 1.0; if (X->Def.iInitialVecType == 0) { v1[iv - sum_i_max+1] += 1.0*I; v1[iv - sum_i_max+1] /= sqrt(2.0); } }/*if (myrank == iproc)*/ }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ sum_i_max += i_max_tmp; }/*for (iproc = 0; iproc < nproc; iproc++)*/ }/*if(initial_mode == 0)*/ else if(initial_mode==1){ iv = X->Def.initial_iv; fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max) { #pragma omp for for (i = 1; i <= i_max; i++) { v0[i] = 0.0; } /* Initialise MT */ #ifdef _OPENMP mythread = omp_get_thread_num(); #else mythread = 0; #endif u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; dsfmt_init_gen_rand(&dsfmt, u_long_i); if (X->Def.iInitialVecType == 0) { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; } else { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } }/*#pragma omp parallel*/ cdnorm=0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) for(i=1;i<=i_max;i++){ cdnorm += conj(v1[i])*v1[i]; } cdnorm = SumMPI_dc(cdnorm); dnorm=creal(cdnorm); dnorm=sqrt(dnorm); #pragma omp parallel for default(none) private(i) shared(v1) firstprivate(i_max, dnorm) for(i=1;i<=i_max;i++){ v1[i] = v1[i]/dnorm; } }/*else if(initial_mode==1)*/ //Eigenvalues by Lanczos method TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); mltply(X, v0, v1); stp=1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v alpha[1]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i = 1; i <= i_max; i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[1]=beta1; ebefor=0; /* Set Maximum number of loop to the dimention of the Wavefunction */ i_max_tmp = SumMPI_li(i_max); if(i_max_tmp < X->Def.Lanczos_max){ X->Def.Lanczos_max = i_max_tmp; } if(i_max_tmp < X->Def.LanczosTarget){ X->Def.LanczosTarget = i_max_tmp; } if(i_max_tmp == 1){ E[1]=alpha[1]; vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; fprintf(stdoutMPI," LanczosStep E[1] \n"); fprintf(stdoutMPI," stp=%d %.10lf \n",stp,E[1]); } else{ #ifdef lapack fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); #else fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] \n"); #endif for(stp = 2; stp <= X->Def.Lanczos_max; stp++){ #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(i=1;i<=i_max;i++){ temp1 = v1[i]; temp2 = (v0[i]-alpha1*v1[i])/beta1; v0[i] = -beta1*temp1; v1[i] = temp2; } mltply(X, v0, v1); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct); alpha[stp]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i=1;i<=i_max;i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[stp]=beta1; Target = X->Def.LanczosTarget; if(stp==2){ #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; tmp_mat[1][0] = beta[1]; tmp_mat[1][1] = alpha[2]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); #else bisec(alpha,beta,stp,E,4,eps_Bisec); #endif ebefor=E[Target]; childfopenMPI(sdt_2,"w", &fp); #ifdef lapack fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #else fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] \n"); fprintf(fp,"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #endif fclose(fp); } if(stp>2 && stp%2==0){ childfopenMPI(sdt_2,"a", &fp); #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; for(int_i=1;int_i<stp-1;int_i++){ tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i+1] = beta[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; } tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; E[0] = tmp_E[stp-1]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); #else bisec(alpha,beta,stp,E,4,eps_Bisec); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf \n",stp,E[1],E[2],E[3],E[4]); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4]); #endif fclose(fp); if(fabs((E[Target]-ebefor)/E[Target])<eps_Lanczos || fabs(beta[stp])<pow(10.0, -14)){ vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; break; } ebefor=E[Target]; } } } sprintf(sdt,cFileNameTimeKeep,X->Def.CDataFileHead); if(iconv!=0){ sprintf(sdt, cLogLanczos_EigenValueNotConverged); return -1; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a"); fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd); return 0; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int expec_energy(struct BindStruct *X){ long unsigned int i,j; long unsigned int irght,ilft,ihfbit; long unsigned int isite1; long unsigned int is1_up,is1_down; long unsigned int is1; double complex dam_pr,dam_pr1; long unsigned int num1_up, num1_down; long unsigned int ibit1; double tmp_num_up, tmp_num_down, tmp_doublon, tmp_num; double tmp_v02; long unsigned int i_max; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeper(X, cFileNameTimeKeep, cExpecStart, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecStart, "a", step_i); #endif break; case FullDiag: break; default: return -1; //break; } i_max=X->Check.idim_max; if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ return -1; } X->Large.i_max = i_max; X->Large.irght = irght; X->Large.ilft = ilft; X->Large.ihfbit = ihfbit; X->Large.mode = M_ENERGY; X->Phys.energy=0.0; dam_pr=0.0; // tentative doublon tmp_doublon=0.0; tmp_num_up=0.0; tmp_num_down=0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ if(isite1 > X->Def.Nsite){ switch(X->Def.iCalcModel){ case HubbardGC: case KondoGC: case Hubbard: case Kondo: is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; is1 = is1_up+is1_down; ibit1 = (unsigned long int)myrank & is1; num1_up = (ibit1&is1_up) / is1_up; num1_down = (ibit1&is1_down) / is1_down; #pragma omp parallel for reduction(+:tmp_doublon, tmp_num_up, tmp_num_down) default(none) shared(v0) \ firstprivate(i_max, num1_up, num1_down) private(j, tmp_v02) for (j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; tmp_doublon += tmp_v02*num1_up*num1_down; tmp_num_up += tmp_v02*num1_up; tmp_num_up += tmp_v02*num1_down; } break; case SpinGC: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; ibit1 = (unsigned long int)myrank& is1_up; tmp_num=0; #pragma omp parallel for reduction(+:tmp_num)default(none) shared(v0) \ firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) tmp_num += conj(v0[j])*v0[j]; if(ibit1==is1_up){ tmp_num_up += tmp_num; } else{ tmp_num_down += tmp_num; } } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ break;/*case SpinGC*/ /* SpinGCBoost */ case Spin: break; default: return -1; } } else{ switch(X->Def.iCalcModel){ case HubbardGC: #pragma omp parallel for reduction(+:tmp_doublon, tmp_num_up, tmp_num_down) default(none) private(j, is1_up, is1_down, is1, ibit1,num1_up,num1_down, tmp_v02) shared(v0) firstprivate(i_max, X, isite1) for(j=1;j<=i_max;j++){ is1_up=X->Def.Tpow[2*isite1-2]; is1_down=X->Def.Tpow[2*isite1-1]; is1=is1_up+is1_down; ibit1=(j-1)&is1; num1_up = ((j-1)&is1_up)/is1_up; num1_down = ((j-1)&is1_down)/is1_down; tmp_v02 = conj(v0[j])*v0[j]; tmp_doublon += tmp_v02*num1_up*num1_down; tmp_num_up += tmp_v02*num1_up; tmp_num_down += tmp_v02*num1_down; } break; case Hubbard: case Kondo: case KondoGC: #pragma omp parallel for reduction(+:tmp_doublon, tmp_num_up, tmp_num_down) default(none) private(j, is1_up, is1_down, is1, ibit1,num1_up,num1_down, tmp_v02) shared(v0, list_1) firstprivate(i_max, X, isite1) for(j=1;j<=i_max;j++){ is1_up=X->Def.Tpow[2*isite1-2]; is1_down=X->Def.Tpow[2*isite1-1]; is1=is1_up+is1_down; ibit1=list_1[j]&is1; num1_up = (list_1[j]&is1_up)/is1_up; num1_down = (list_1[j]&is1_down)/is1_down; tmp_v02 = conj(v0[j])*v0[j]; tmp_doublon += tmp_v02*num1_up*num1_down; tmp_num_up += tmp_v02*num1_up; tmp_num_down += tmp_v02*num1_down; } break; case SpinGC: if(X->Def.iFlgGeneralSpin==FALSE){ is1_up=X->Def.Tpow[isite1-1]; #pragma omp parallel for reduction(+: tmp_num_up, tmp_num_down) default(none) private(j, ibit1,num1_up,num1_down, tmp_v02) shared(list_1, v0) firstprivate(i_max, X, isite1, is1_up) for(j=1;j<=i_max;j++){ ibit1=(j-1)&is1_up; tmp_v02 = conj(v0[j])*v0[j]; if(ibit1==is1_up){ tmp_num_up += tmp_v02; } else{ tmp_num_down +=tmp_v02; } } } break; /* SpinGCBoost */ case Spin: break; default: break; } } } tmp_doublon=SumMPI_d(tmp_doublon); tmp_num_up=SumMPI_d(tmp_num_up); tmp_num_down=SumMPI_d(tmp_num_down); tmp_num=SumMPI_d(tmp_num); switch(X->Def.iCalcModel){ case HubbardGC: case KondoGC: case Hubbard: case Kondo: X->Phys.doublon = tmp_doublon; X->Phys.num_up = tmp_num_up; X->Phys.num_down = tmp_num_down; X->Phys.num = tmp_num_up+tmp_num_down; break; case SpinGC: X->Phys.doublon = 0.0; X->Phys.num_up = tmp_num_up; X->Phys.num_down = tmp_num_down; X->Phys.num = tmp_num_up+tmp_num_down; break; case Spin: X->Phys.num_up = X->Def.Nup; X->Phys.num_down = X->Def.Ndown; X->Phys.num = X->Def.Nup+X->Def.Ndown;//canonical X->Phys.doublon = 0.0;// spin break; default: return -1; } #pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v1[i]=v0[i]; v0[i]=0.0+0.0*I; } mltply(X, v0, v1); // v0+=H*v1 /* switch -> SpinGCBoost */ dam_pr=0.0; dam_pr1=0.0; #pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) for(j=1;j<=i_max;j++){ dam_pr += conj(v1[j])*v0[j]; // E = <v1|H|v1>=<v1|v0> dam_pr1 += conj(v0[j])*v0[j]; // E^2 = <v1|H*H|v1>=<v0|v0> //v0[j]=v1[j]; v1-> orginal v0=H*v1 } dam_pr = SumMPI_dc(dam_pr); dam_pr1 = SumMPI_dc(dam_pr1); X->Phys.energy = dam_pr; X->Phys.var = dam_pr1; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeper(X, cFileNameTimeKeep, cExpecEnd, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); #endif break; default: break; } return 0; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int expec_energy_flct(struct BindStruct *X){ long unsigned int i,j; long unsigned int irght,ilft,ihfbit; long unsigned int isite1; long unsigned int is1_up,is1_down; long unsigned int is1; double complex dam_pr,dam_pr1; long int num1_up, num1_down; long unsigned int ibit1; double tmp_num_up, tmp_num_down; double D,tmp_D,tmp_D2; double N,tmp_N,tmp_N2; double Sz,tmp_Sz, tmp_Sz2; double tmp_v02; long unsigned int i_max,tmp_list_1; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeper(X, cFileNameTimeKeep, cExpecStart, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecStart, "a", step_i); #endif break; case FullDiag: break; default: return -1; //break; } i_max=X->Check.idim_max; if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ return -1; } X->Large.i_max = i_max; X->Large.irght = irght; X->Large.ilft = ilft; X->Large.ihfbit = ihfbit; X->Large.mode = M_ENERGY; X->Phys.energy=0.0; dam_pr=0.0; // tentative doublon tmp_D = 0.0; tmp_D2 = 0.0; tmp_N = 0.0; tmp_N2 = 0.0; tmp_Sz = 0.0; tmp_Sz2 = 0.0; tmp_num_up = 0.0; tmp_num_down = 0.0; int nCalcFlct; if(X->Def.iCalcType == Lanczos){ nCalcFlct=4301; } else if (X->Def.iCalcType == TPQCalc){ nCalcFlct=3201; } else{//For FullDiag nCalcFlct=5301; } StartTimer(nCalcFlct); switch(X->Def.iCalcModel){ case HubbardGC: #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2, tmp_num_up, tmp_num_down) default(none) shared(v0) \ firstprivate(i_max, num1_up, num1_down,X,myrank) private(j, tmp_v02,D,N,Sz,isite1,is1_up,is1_down,is1,ibit1) for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; D = 0.0; N = 0.0; Sz = 0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ if(isite1 > X->Def.Nsite){ is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; is1 = is1_up+is1_down; ibit1 = (unsigned long int)myrank & is1; num1_up = (ibit1&is1_up) / is1_up; num1_down = (ibit1&is1_down) / is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; }else{ is1_up = X->Def.Tpow[2*isite1-2]; is1_down = X->Def.Tpow[2*isite1-1]; is1 = is1_up+is1_down; ibit1 = (j-1)&is1; num1_up = ((j-1)&is1_up)/is1_up; num1_down = ((j-1)&is1_down)/is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; } } tmp_D += tmp_v02*D; tmp_D2 += tmp_v02*D*D; tmp_N += tmp_v02*N; tmp_N2 += tmp_v02*N*N; tmp_Sz += tmp_v02*Sz; tmp_Sz2 += tmp_v02*Sz*Sz; } break; case KondoGC: case Hubbard: case Kondo: #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ firstprivate(i_max, num1_up, num1_down,X,myrank) private(j, tmp_v02,D,N,Sz,isite1,is1_up,is1_down,is1,ibit1,tmp_list_1) for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; D = 0.0; N = 0.0; Sz = 0.0; tmp_list_1 = list_1[j]; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ //printf("DEBUG: j=%d %d %d\n",j,isite1,myrank); if(isite1 > X->Def.Nsite){ is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; is1 = is1_up+is1_down; ibit1 = (unsigned long int)myrank & is1; num1_up = (ibit1&is1_up) / is1_up; num1_down = (ibit1&is1_down) / is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; }else{ is1_up = X->Def.Tpow[2*isite1-2]; is1_down = X->Def.Tpow[2*isite1-1]; is1 = is1_up+is1_down; //ibit1 = tmp_list_1&is1; num1_up = (tmp_list_1&is1_up)/is1_up; num1_down = (tmp_list_1&is1_down)/is1_down; D += num1_up*num1_down; N += num1_up+num1_down; Sz += num1_up-num1_down; } } tmp_D += tmp_v02*D; tmp_D2 += tmp_v02*D*D; tmp_N += tmp_v02*N; tmp_N2 += tmp_v02*N*N; tmp_Sz += tmp_v02*Sz; tmp_Sz2 += tmp_v02*Sz*Sz; } break; case SpinGC: if(X->Def.iFlgGeneralSpin == FALSE) { #pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0) \ firstprivate(i_max,X,myrank) private(j,Sz, is1_up,ibit1,isite1,tmp_v02) for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; Sz = 0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ if(isite1 > X->Def.Nsite){ is1_up = X->Def.Tpow[isite1 - 1]; ibit1 = (unsigned long int)myrank& is1_up; if(ibit1==is1_up){ Sz += 1.0; }else{ Sz += -1.0; } }else{ is1_up=X->Def.Tpow[isite1-1]; ibit1=(j-1)&is1_up; if(ibit1==is1_up){ Sz += 1.0; }else{ Sz += -1.0; } } } tmp_Sz += Sz*tmp_v02; tmp_Sz2 += Sz*Sz*tmp_v02; } } else{//for generalspin for(j = 1; j <= i_max; j++){ tmp_v02 = conj(v0[j])*v0[j]; Sz = 0.0; for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ //prefactor 0.5 is added later. if(isite1 > X->Def.Nsite){ Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); }else{ Sz += GetLocal2Sz(isite1, j-1, X->Def.SiteToBit, X->Def.Tpow); } } tmp_Sz += Sz*tmp_v02; tmp_Sz2 += Sz*Sz*tmp_v02; } } break;/*case SpinGC*/ /* SpinGCBoost */ case Spin: break; default: return -1; } tmp_D = SumMPI_d(tmp_D); tmp_D2 = SumMPI_d(tmp_D2); tmp_N = SumMPI_d(tmp_N); tmp_N2 = SumMPI_d(tmp_N2); tmp_Sz = SumMPI_d(tmp_Sz); tmp_Sz2 = SumMPI_d(tmp_Sz2); // tmp_num_up = SumMPI_d(tmp_num_up); // tmp_num_down = SumMPI_d(tmp_num_down); switch(X->Def.iCalcModel){ case HubbardGC: case KondoGC: case Hubbard: case Kondo: X->Phys.doublon = tmp_D; X->Phys.doublon2 = tmp_D2; X->Phys.num = tmp_N; X->Phys.num2 = tmp_N2; X->Phys.Sz = tmp_Sz*0.5; X->Phys.Sz2 = tmp_Sz2*0.25; X->Phys.num_up = 0.5*(tmp_N+tmp_Sz); X->Phys.num_down = 0.5*(tmp_N-tmp_Sz); break; case SpinGC: X->Phys.doublon = 0.0; X->Phys.doublon2 = 0.0; X->Phys.num = X->Def.NsiteMPI; X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; X->Phys.Sz = tmp_Sz*0.5; X->Phys.Sz2 = tmp_Sz2*0.25; X->Phys.num_up = 0.5*(X->Def.NsiteMPI+tmp_Sz); X->Phys.num_down = 0.5*(X->Def.NsiteMPI-tmp_Sz); break; case Spin: X->Phys.doublon = 0.0; X->Phys.doublon2 = 0.0; X->Phys.num_up = X->Def.Nup; X->Phys.num_down = X->Def.Ndown; X->Phys.num = (X->Def.Nup+X->Def.Ndown); X->Phys.num2 = (X->Def.Nup+X->Def.Ndown)*(X->Def.Nup+X->Def.Ndown); X->Phys.Sz = 0.5*(X->Def.Total2SzMPI); X->Phys.Sz2 = 0.25*pow((X->Def.Total2SzMPI),2); break; default: return -1; } StopTimer(nCalcFlct); #pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v1[i]=v0[i]; v0[i]=0.0+0.0*I; } int nCalcExpec; if(X->Def.iCalcType == Lanczos){ nCalcExpec=4302; } else if (X->Def.iCalcType == TPQCalc){ nCalcExpec=3202; } else{//For FullDiag nCalcExpec=5302; } StartTimer(nCalcExpec); mltply(X, v0, v1); // v0+=H*v1 StopTimer(nCalcExpec); /* switch -> SpinGCBoost */ dam_pr=0.0; dam_pr1=0.0; #pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) for(j=1;j<=i_max;j++){ dam_pr += conj(v1[j])*v0[j]; // E = <v1|H|v1>=<v1|v0> dam_pr1 += conj(v0[j])*v0[j]; // E^2 = <v1|H*H|v1>=<v0|v0> //v0[j]=v1[j]; v1-> orginal v0=H*v1 } dam_pr = SumMPI_dc(dam_pr); dam_pr1 = SumMPI_dc(dam_pr1); // fprintf(stdoutMPI, "Debug: ene=%lf, var=%lf\n", creal(dam_pr), creal(dam_pr1)); X->Phys.energy = dam_pr; X->Phys.var = dam_pr1; switch(X->Def.iCalcType){ case Lanczos: fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeper(X, cFileNameTimeKeep, cExpecEnd, "a"); break; case TPQCalc: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); #endif break; default: break; } return 0; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int diagonalcalc ( struct BindStruct *X ){ FILE *fp; long unsigned int i,j; long unsigned int isite1,isite2; long unsigned int spin; double tmp_V; /*[s] For InterAll*/ long unsigned int A_spin,B_spin; /*[e] For InterAll*/ long unsigned int i_max=X->Check.idim_max; printf("%s", cProStartCalcDiag); #pragma omp parallel for default(none) private(j) shared(list_Diagonal) firstprivate(i_max) for(j = 1;j <= i_max; j++){ list_Diagonal[j]=0.0; } if(X->Def.NCoulombIntra>0){ if(childfopen(cFileNameCheckCoulombIntra, "w", &fp)!=0){ return -1; } for(i = 0; i < X->Def.NCoulombIntra; i++){ isite1 = X->Def.CoulombIntra[i][0]+1; tmp_V = X->Def.ParaCoulombIntra[i]; fprintf(fp,"i=%ld isite1=%ld tmp_V=%lf \n",i,isite1,tmp_V); SetDiagonalCoulombIntra(isite1, tmp_V, X); } fclose(fp); } if(X->Def.EDNChemi>0){ if(childfopen(cFileNameCheckChemi,"w", &fp)!=0){ return -1; } for(i = 0; i < X->Def.EDNChemi; i++){ isite1 = X->Def.EDChemi[i]+1; spin = X->Def.EDSpinChemi[i]; tmp_V = -X->Def.EDParaChemi[i]; fprintf(fp,"i=%ld spin=%ld isite1=%ld tmp_V=%lf \n",i,spin,isite1,tmp_V); if(SetDiagonalChemi(isite1, tmp_V,spin, X) !=0){ return -1; } } fclose(fp); } if(X->Def.NCoulombInter>0){ if(childfopen(cFileNameCheckInterU,"w", &fp)!=0){ return -1; } for(i = 0; i < X->Def.NCoulombInter; i++){ isite1 = X->Def.CoulombInter[i][0]+1; isite2 = X->Def.CoulombInter[i][1]+1; tmp_V = X->Def.ParaCoulombInter[i]; fprintf(fp,"i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n",i,isite1,isite2,tmp_V); if(SetDiagonalCoulombInter(isite1, isite2, tmp_V, X) !=0){ return -1; } } fclose(fp); } if(X->Def.NHundCoupling>0){ if(childfopen(cFileNameCheckHund,"w", &fp) !=0){ return -1; } for(i = 0; i < X->Def.NHundCoupling; i++){ isite1 = X->Def.HundCoupling[i][0]+1; isite2 = X->Def.HundCoupling[i][1]+1; tmp_V = -X->Def.ParaHundCoupling[i]; if(SetDiagonalHund(isite1, isite2, tmp_V, X) !=0){ return -1; } fprintf(fp,"i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n",i,isite1,isite2,tmp_V); } fclose(fp); } if(X->Def.NInterAll_Diagonal>0){ if(childfopen(cFileNameCheckInterAll,"w", &fp) !=0){ return -1; } for(i = 0; i < X->Def.NInterAll_Diagonal; i++){ isite1=X->Def.InterAll_Diagonal[i][0]+1; A_spin=X->Def.InterAll_Diagonal[i][1]; isite2=X->Def.InterAll_Diagonal[i][2]+1; B_spin=X->Def.InterAll_Diagonal[i][3]; tmp_V = X->Def.ParaInterAll_Diagonal[i]; fprintf(fp,"i=%ld isite1=%ld A_spin=%ld isite2=%ld B_spin=%ld tmp_V=%lf \n", i, isite1, A_spin, isite2, B_spin, tmp_V); SetDiagonalInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X); } fclose(fp); } TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcFinish, "w"); printf("%s", cProEndCalcDiag); return 0; }
/** * @brief Parent function to calculate two-body green's functions * * @param X data list for calculation * @param vec eigenvectors * * @retval 0 normally finished * @retval -1 unnormally finished * @note the origin of function's name cisajscktalt comes from c=creation, i=ith site, s=spin, a=annihiration, j=jth site and so on. * * @version 0.2 * @details add function to treat the case of general spin * * @version 0.1 * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ int expec_cisajscktaltdc ( struct BindStruct *X, double complex *vec ) { FILE *fp; char sdt[D_FileNameMax]; long unsigned int i,j; long unsigned int irght,ilft,ihfbit; long unsigned int isite1,isite2,isite3,isite4; long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4; long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4; long unsigned int isA_up, isB_up; long unsigned int is1_up, is2_up; long unsigned int Asum,Bsum,Adiff,Bdiff; long unsigned int tmp_off=0; long unsigned int tmp_off_2=0; long unsigned int list1_off=0; int tmp_sgn, num1, num2; double complex tmp_V; double complex dam_pr; long int i_max; //For TPQ int step=0; int rand_i=0; //For Kond double complex dmv; if(X->Def.NCisAjtCkuAlvDC <1) return 0; i_max=X->Check.idim_max; X->Large.mode=M_CORR; tmp_V = 1.0+0.0*I; if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0) { return -1; } dam_pr=0.0; //Make File Name for output switch (X->Def.iCalcType) { case Lanczos: if(X->Def.St==0) { sprintf(sdt, cFileName2BGreen_Lanczos, X->Def.CDataFileHead); TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecTwoBodyGStart,"a"); fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGStart); } else if(X->Def.St==1) { sprintf(sdt, cFileName2BGreen_CG, X->Def.CDataFileHead); TimeKeeper(X, cFileNameTimeKeep, cCGExpecTwoBodyGStart,"a"); fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGStart); } break; case TPQCalc: step=X->Def.istep; rand_i=X->Def.irand; TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGStart, "a", rand_i, step); sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, rand_i, step); break; case FullDiag: sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, X->Phys.eigen_num); break; } if(!childfopenMPI(sdt, "w", &fp)==0) { return -1; } switch(X->Def.iCalcModel) { case HubbardGC: for(i=0; i<X->Def.NCisAjtCkuAlvDC; i++) { org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; dam_pr=0.0; if(CheckPE(org_isite1-1, X)==TRUE || CheckPE(org_isite2-1, X)==TRUE || CheckPE(org_isite3-1, X)==TRUE || CheckPE(org_isite4-1, X)==TRUE) { isite1 = X->Def.OrgTpow[2*org_isite1-2+org_sigma1] ; isite2 = X->Def.OrgTpow[2*org_isite2-2+org_sigma2] ; isite3 = X->Def.OrgTpow[2*org_isite3-2+org_sigma3] ; isite4 = X->Def.OrgTpow[2*org_isite4-2+org_sigma4] ; if(isite1 == isite2 && isite3 == isite4) { dam_pr = X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, 1.0, X, vec, vec); } else if(isite1 == isite2 && isite3 != isite4) { dam_pr = X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, 1.0, X, vec, vec); } else if(isite1 != isite2 && isite3 == isite4) { dam_pr = X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, org_isite3-1, org_sigma3, 1.0, X, vec, vec); } else if(isite1 != isite2 && isite3 != isite4) { dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, 1.0, X, vec, vec); } }//InterPE else { child_general_int_GetInfo ( i, X, org_isite1, org_isite2, org_isite3, org_isite4, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V ); i_max = X->Large.i_max; isite1 = X->Large.is1_spin; isite2 = X->Large.is2_spin; Asum = X->Large.isA_spin; Adiff = X->Large.A_spin; isite3 = X->Large.is3_spin; isite4 = X->Large.is4_spin; Bsum = X->Large.isB_spin; Bdiff = X->Large.B_spin; if(isite1 == isite2 && isite3 == isite4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr += GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, vec, vec, X, &tmp_off); } } else if(isite1 == isite2 && isite3 != isite4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr += GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off); } } else if(isite1 != isite2 && isite3 == isite4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr +=GC_child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, vec, vec, X, &tmp_off); } } else if(isite1 != isite2 && isite3 != isite4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr +=GC_child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off_2); } } } dam_pr = SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1, org_isite2-1,org_sigma2, org_isite3-1, org_sigma3, org_isite4-1,org_sigma4, creal(dam_pr), cimag(dam_pr)); }//Intra PE break; case KondoGC: case Hubbard: case Kondo: for(i=0; i<X->Def.NCisAjtCkuAlvDC; i++) { org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; tmp_V = 1.0; dam_pr=0.0; if(X->Def.iFlgSzConserved ==TRUE) { if(org_sigma1+org_sigma3 != org_sigma2+org_sigma4) { dam_pr=SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, creal(dam_pr), cimag(dam_pr)); continue; } } if(CheckPE(org_isite1-1, X)==TRUE || CheckPE(org_isite2-1, X)==TRUE || CheckPE(org_isite3-1, X)==TRUE || CheckPE(org_isite4-1, X)==TRUE) { isite1 = X->Def.OrgTpow[2*org_isite1-2+org_sigma1] ; isite2 = X->Def.OrgTpow[2*org_isite2-2+org_sigma2] ; isite3 = X->Def.OrgTpow[2*org_isite3-2+org_sigma3] ; isite4 = X->Def.OrgTpow[2*org_isite4-2+org_sigma4] ; if(isite1 == isite2 && isite3 == isite4) { dam_pr = X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, 1.0, X, vec, vec); } else if(isite1 == isite2 && isite3 != isite4) { dam_pr = X_child_CisAisCjtAku_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, 1.0, X, vec, vec); } else if(isite1 != isite2 && isite3 == isite4) { dam_pr = X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, org_isite3-1, org_sigma3, 1.0, X, vec, vec); } else if(isite1 != isite2 && isite3 != isite4) { dam_pr = X_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, 1.0, X, vec, vec); } }//InterPE else { child_general_int_GetInfo( i, X, org_isite1, org_isite2, org_isite3, org_isite4, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V ); i_max = X->Large.i_max; isite1 = X->Large.is1_spin; isite2 = X->Large.is2_spin; Asum = X->Large.isA_spin; Adiff = X->Large.A_spin; isite3 = X->Large.is3_spin; isite4 = X->Large.is4_spin; Bsum = X->Large.isB_spin; Bdiff = X->Large.B_spin; tmp_V = 1.0; dam_pr = 0.0; if(isite1 == isite2 && isite3 == isite4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) for(j=1; j<=i_max; j++) { dam_pr += child_CisAisCisAis_element(j, isite1, isite3, tmp_V, vec, vec, X, &tmp_off); } } else if(isite1 == isite2 && isite3 != isite4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) for(j=1; j<=i_max; j++) { dam_pr += child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off); } } else if(isite1 != isite2 && isite3 == isite4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) for(j=1; j<=i_max; j++) { dam_pr +=child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, vec, vec, X, &tmp_off); } } else if(isite1 != isite2 && isite3 != isite4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) for(j=1; j<=i_max; j++) { dam_pr +=child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off_2); } } } dam_pr = SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1, org_isite2-1,org_sigma2, org_isite3-1, org_sigma3, org_isite4-1,org_sigma4, creal(dam_pr), cimag(dam_pr)); } break; case Spin: if(X->Def.iFlgGeneralSpin==FALSE) { for(i=0; i<X->Def.NCisAjtCkuAlvDC; i++) { tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0) { //error message will be added fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); continue; } dam_pr = 0.0; if(org_isite1 >X->Def.Nsite && org_isite3>X->Def.Nsite) { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal is1_up = X->Def.Tpow[org_isite1 - 1]; is2_up = X->Def.Tpow[org_isite3 - 1]; num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); #pragma omp parallel for default(none) reduction (+:dam_pr) shared(vec) \ firstprivate(i_max, num1, num2, tmp_V) private(j) for (j = 1; j <= i_max; j++) { dam_pr += tmp_V*num1*num2*vec[j]*conj(vec[j]); } } else if(org_isite1==org_isite3 && org_sigma1==org_sigma4 && org_sigma2==org_sigma3) { is1_up = X->Def.Tpow[org_isite1 - 1]; num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); #pragma omp parallel for default(none) reduction (+:dam_pr) shared(vec) \ firstprivate(i_max, num1, num2, tmp_V) private(j) for (j = 1; j <= i_max; j++) { dam_pr += tmp_V*num1*vec[j]*conj(vec[j]); } } else if(org_sigma1==org_sigma4 && org_sigma2==org_sigma3) { //exchange dam_pr += X_child_general_int_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } else { // other process is not allowed // error message will be added } } else if(org_isite1 > X->Def.Nsite || org_isite3>X->Def.Nsite) { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal is1_up = X->Def.Tpow[org_isite1 - 1]; is2_up = X->Def.Tpow[org_isite3 - 1]; num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); dam_pr=0.0; #pragma omp parallel for default(none) reduction(+:dam_pr)shared(vec) \ firstprivate(i_max, tmp_V, is1_up, org_sigma1, X, num2) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = X_Spin_CisAis(j, X, is1_up, org_sigma1); dam_pr += tmp_V*num1*num2*conj(vec[j])*vec[j]; } } else if(org_sigma1==org_sigma4 && org_sigma2==org_sigma3) { //exchange dam_pr += X_child_general_int_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } else { // other process is not allowed // error message will be added dam_pr=0.0; } } else { isA_up = X->Def.Tpow[org_isite1-1]; isB_up = X->Def.Tpow[org_isite3-1]; if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_off_2, tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr +=child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, vec, vec, X); } } else if(org_isite1==org_isite3 && org_sigma1==org_sigma4 && org_sigma3==org_sigma2) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(i_max,X,isA_up, tmp_V) shared(vec, list_1) for(j=1; j<=i_max; j++) { dmv=X_CisAis(list_1[j], X, isA_up); dam_pr += vec[j]*tmp_V*dmv*conj(vec[j]); } } else if(org_sigma1==org_sigma4 && org_sigma2==org_sigma3) { // exchange dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { tmp_sgn = X_child_exchange_spin_element(j,X,isA_up,isB_up,org_sigma2,org_sigma4,&tmp_off); dmv = vec[j]*tmp_sgn; dam_pr += conj(vec[tmp_off])*dmv; } } else { // other process is not allowed // error message will be added dam_pr=0.0; } } dam_pr = SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,creal(dam_pr),cimag(dam_pr)); } }//iFlgGeneralSpin = FALSE else { for(i=0; i<X->Def.NCisAjtCkuAlvDC; i++) { tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0) { fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); continue; } dam_pr = 0.0; if(org_isite1 >X->Def.Nsite && org_isite3>X->Def.Nsite) { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr=X_child_CisAisCjuAju_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr=X_child_CisAitCjuAjv_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); } else { dam_pr=0.0; } } else if(org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr=X_child_CisAisCjuAju_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr=X_child_CisAitCjuAjv_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); } else { dam_pr=0.0; } } else { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) shared(vec,list_1) for(j=1; j<=i_max; j++) { num1=BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { num1=BitCheckGeneral(list_1[j], org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { dam_pr += tmp_V*conj(vec[j])*vec[j]; } } } } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, list1_off, tmp_V) shared(vec, list_1) for(j=1; j<=i_max; j++) { num1 = num1*GetOffCompGeneralSpin(list_1[j], org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { num1 = GetOffCompGeneralSpin(tmp_off, org_isite1, org_sigma2, org_sigma1, &tmp_off_2, X->Def.SiteToBit, X->Def.Tpow); ConvertToList1GeneralSpin(tmp_off_2, X->Check.sdim, &list1_off); if(num1 != FALSE) { dam_pr += tmp_V*conj(vec[list1_off])*vec[j]; } } } } else { dam_pr=0.0; } } dam_pr = SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, creal(dam_pr),cimag(dam_pr)); } } break; case SpinGC: if(X->Def.iFlgGeneralSpin==FALSE) { for(i=0; i<X->Def.NCisAjtCkuAlvDC; i++) { tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0) { //error message will be added fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); continue; } dam_pr=0.0; if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite) { //org_isite3 >= org_isite1 > Nsite if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr += X_GC_child_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, vec, vec); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3) { //diagonal (for spin: cuadcdau=cuau) dam_pr += X_GC_child_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, vec, vec); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { dam_pr += X_GC_child_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { dam_pr += X_GC_child_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr += X_GC_child_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite) { //org_isite3 > Nsite >= org_isite1 if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr += X_GC_child_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { dam_pr += X_GC_child_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { dam_pr += X_GC_child_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr += X_GC_child_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } } else { if(org_isite1==org_isite2 && org_isite3==org_isite4) { isA_up = X->Def.Tpow[org_isite2-1]; isB_up = X->Def.Tpow[org_isite4-1]; if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr +=GC_child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, vec, vec, X); } } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr += GC_child_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, vec, X, &tmp_off); } } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr += GC_child_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, vec, X, &tmp_off); } } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_off_2,tmp_V) shared(vec) for(j=1; j<=i_max; j++) { dam_pr += GC_child_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, vec, X, &tmp_off); } } } } dam_pr = SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,creal(dam_pr),cimag(dam_pr)); } } else { for(i=0; i<X->Def.NCisAjtCkuAlvDC; i++) { tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0) { //error message will be added fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); continue; } dam_pr = 0.0; if(org_isite1 > X->Def.Nsite && org_isite3 > X->Def.Nsite) { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr=X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { dam_pr=X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { dam_pr=X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr=X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); } } else if(org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal dam_pr=X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { dam_pr=X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { dam_pr=X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { dam_pr=X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); } } else { if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ) { //diagonal #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) shared(vec) for(j=1; j<=i_max; j++) { num1=BitCheckGeneral(j-1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { num1=BitCheckGeneral(j-1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { dam_pr += tmp_V*conj(vec[j])*vec[j]; } } } } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma3,org_sigma4, tmp_off, tmp_V) shared(vec) for(j=1; j<=i_max; j++) { num1 = GetOffCompGeneralSpin(j-1, org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { num1=BitCheckGeneral(tmp_off, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { dam_pr += tmp_V*conj(vec[tmp_off+1])*vec[j]; } } } } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma2, org_sigma3, tmp_off, tmp_V) shared(vec) for(j=1; j<=i_max; j++) { num1 = BitCheckGeneral(j-1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { num1 = GetOffCompGeneralSpin(j-1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { dam_pr += tmp_V*conj(vec[tmp_off+1])*vec[j]; } } } } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, tmp_V) shared(vec) for(j=1; j<=i_max; j++) { num1 = GetOffCompGeneralSpin(j-1, org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { num1 = GetOffCompGeneralSpin(tmp_off, org_isite1, org_sigma2, org_sigma1, &tmp_off_2, X->Def.SiteToBit, X->Def.Tpow); if(num1 != FALSE) { dam_pr += tmp_V*conj(vec[tmp_off_2+1])*vec[j]; } } } } } dam_pr = SumMPI_dc(dam_pr); fprintf(fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, creal(dam_pr),cimag(dam_pr)); } } break; default: return -1; } fclose(fp); if(X->Def.iCalcType==Lanczos) { if(X->Def.St==0) { TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecTwoBodyGFinish,"a"); fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGFinish); } else if(X->Def.St==1) { TimeKeeper(X, cFileNameTimeKeep, cCGExpecTwoBodyGFinish,"a"); fprintf(stdoutMPI, "%s", cLogCGExpecTwoBodyGFinish); } } else if(X->Def.iCalcType==TPQCalc) { TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGFinish, "a", rand_i, step); } //[s] this part will be added /* For FullDiag, it is convinient to calculate the total spin for each vector. Such functions will be added if(X->Def.iCalcType==FullDiag){ if(X->Def.iCalcModel==Spin){ expec_cisajscktaltdc_alldiag_spin(X,vec); }else if(X->Def.iCalcModel==Hubbard || X->Def.iCalcModel==Kondo){ expec_cisajscktaltdc_alldiag(X,vec); }else{// X->Phys.s2=0.0; } } */ //[e] return 0; }
/** * * * @param X * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ void Lanczos_EigenVector(struct BindStruct *X){ printf("%s", cLogLanczos_EigenVectorStart); int i,j,i_max,iv; int k_exct; double beta1,alpha1,dnorm, dnorm_inv; double complex temp1,temp2; // for GC long unsigned int u_long_i; dsfmt_t dsfmt; k_exct = X->Def.k_exct; iv=X->Large.iv; i_max=X->Check.idim_max; //Eigenvectors by Lanczos method //initialization: initialization should be identical to that of Lanczos_EigenValue.c #pragma omp parallel for default(none) private(i) shared(v0, v1, vg) firstprivate(i_max) for(i=1;i<=i_max;i++){ v0[i]=0.0+0.0*I; v1[i]=0.0+0.0*I; vg[i]=0.0+0.0*I; } if(initial_mode == 0){ v1[iv]=1.0; vg[iv]=vec[k_exct][1]; }else if(initial_mode==1){ iv = X->Def.initial_iv; u_long_i = 123432 + abs(iv); dsfmt_init_gen_rand(&dsfmt, u_long_i); for(i = 1; i <= i_max; i++){ v1[i]=2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)+2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)*I; } dnorm=0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) for(i=1;i<=i_max;i++){ dnorm += conj(v1[i])*v1[i]; } dnorm=sqrt(dnorm); dnorm_inv=1.0/dnorm; #pragma omp parallel for default(none) private(i) shared(v1,vg,vec,k_exct) firstprivate(i_max, dnorm_inv) for(i=1;i<=i_max;i++){ v1[i] = v1[i]*dnorm_inv; vg[i] = v1[i]*vec[k_exct][1]; } } mltply(X, v0, v1); alpha1=alpha[1]; beta1=beta[1]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct) for(j=1;j<=i_max;j++){ vg[j]+=vec[k_exct][2]*(v0[j]-alpha1*v1[j])/beta1; } //iteration for(i=2;i<=X->Large.itr-1;i++){ #pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(j=1;j<=i_max;j++){ temp1=v1[j]; temp2=(v0[j]-alpha1*v1[j])/beta1; v0[j]=-beta1*temp1; v1[j]=temp2; } mltply(X, v0, v1); alpha1 = alpha[i]; beta1 = beta[i]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i) for(j=1;j<=i_max;j++){ vg[j] += vec[k_exct][i+1]*(v0[j]-alpha1*v1[j])/beta1; } } #pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max) for(j=1;j<=i_max;j++){ v0[j] = vg[j]; } //normalization dnorm=0.0; #pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max) for(j=1;j<=i_max;j++){ dnorm += conj(v0[j])*v0[j]; } dnorm=sqrt(dnorm); dnorm_inv=dnorm; #pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv) for(j=1;j<=i_max;j++){ v0[j] = v0[j]*dnorm_inv; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a"); printf("%s", cLogLanczos_EigenVectorEnd); }