void CObjects::DumpIndex(void) { CChars sz; sz.Init("-------------------------- Indices -------------------------- \n"); PrintMemory(&sz); sz.Append("------------------------------------------------------------ \n"); sz.Dump(); sz.Kill(); }
void CObjects::ValidateEmpty(void) { OIndex iNumIndexed; iNumIndexed = mcMemory.NumIndexed(); if (iNumIndexed != 0) { CChars sz; sz.Init("\n"); sz.Append("Memory not empty. "); sz.Append(iNumIndexed); sz.Append(" objects are still indexed.\n"); PrintMemory(&sz); gcLogger.Error(sz.Text()); sz.Kill(); } }
void Hamiltonian_Band_NC(int Host_ID1, double *****RH, double *****IH, dcomplex **H, int *MP, double k1, double k2, double k3) { static int firsttime=1; int i,j,k,wanA,wanB,tnoA,tnoB,Anum,Bnum; int NUM,MA_AN,GA_AN,LB_AN,GB_AN; int l1,l2,l3,Rn,n2; double *tmp_array1,*tmp_array2; double **H11r,**H11i; double **H22r,**H22i; double **H12r,**H12i; double kRn,si,co,h; int ID,myid,numprocs,tag=999; MPI_Status stat; MPI_Request request; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); MPI_Barrier(mpi_comm_level1); /* set MP */ Anum = 1; for (i=1; i<=atomnum; i++) { MP[i] = Anum; wanA = WhatSpecies[i]; tnoA = Spe_Total_CNO[wanA]; Anum = Anum + tnoA; } NUM = Anum - 1; n2 = NUM + 2; /******************************************* allocation of H11r, H11i, H22r, H22i, H12r, H12i *******************************************/ H11r = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++) { H11r[i] = (double*)malloc(sizeof(double)*n2); } H11i = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++) { H11i[i] = (double*)malloc(sizeof(double)*n2); } H22r = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++) { H22r[i] = (double*)malloc(sizeof(double)*n2); } H22i = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++) { H22i[i] = (double*)malloc(sizeof(double)*n2); } H12r = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++) { H12r[i] = (double*)malloc(sizeof(double)*n2); } H12i = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++) { H12i[i] = (double*)malloc(sizeof(double)*n2); } /**************************************************** PrintMemory ****************************************************/ if (firsttime) { PrintMemory("Hamiltonian_Band: H11r",sizeof(double)*n2*n2,NULL); PrintMemory("Hamiltonian_Band: H11i",sizeof(double)*n2*n2,NULL); PrintMemory("Hamiltonian_Band: H22r",sizeof(double)*n2*n2,NULL); PrintMemory("Hamiltonian_Band: H22i",sizeof(double)*n2*n2,NULL); PrintMemory("Hamiltonian_Band: H12r",sizeof(double)*n2*n2,NULL); PrintMemory("Hamiltonian_Band: H12i",sizeof(double)*n2*n2,NULL); } /* for PrintMemory */ firsttime=0; /**************************************************** set Hamiltonian ****************************************************/ H[0][0].r = 2.0*NUM; for (i=1; i<=NUM; i++) { for (j=1; j<=NUM; j++) { H11r[i][j] = 0.0; H11i[i][j] = 0.0; H22r[i][j] = 0.0; H22i[i][j] = 0.0; H12r[i][j] = 0.0; H12i[i][j] = 0.0; } } for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) { GA_AN = M2G[MA_AN]; wanA = WhatSpecies[GA_AN]; tnoA = Spe_Total_CNO[wanA]; Anum = MP[GA_AN]; for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++) { GB_AN = natn[GA_AN][LB_AN]; Rn = ncn[GA_AN][LB_AN]; wanB = WhatSpecies[GB_AN]; tnoB = Spe_Total_CNO[wanB]; /* kRn = k1*( (double)atv_ijk[Rn][1] + Cell_Gxyz[GB_AN][1] - Cell_Gxyz[GA_AN][1] ) + k2*( (double)atv_ijk[Rn][2] + Cell_Gxyz[GB_AN][2] - Cell_Gxyz[GA_AN][2] ) + k3*( (double)atv_ijk[Rn][3] + Cell_Gxyz[GB_AN][3] - Cell_Gxyz[GA_AN][3] ); */ l1 = atv_ijk[Rn][1]; l2 = atv_ijk[Rn][2]; l3 = atv_ijk[Rn][3]; kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3; si = sin(2.0*PI*kRn); co = cos(2.0*PI*kRn); Bnum = MP[GB_AN]; /* non-spin-orbit coupling and non-LDA+U */ if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0) { for (i=0; i<=(tnoA-1); i++) { for (j=0; j<=(tnoB-1); j++) { H11r[Anum+i][Bnum+j] += co*RH[0][MA_AN][LB_AN][i][j]; H11i[Anum+i][Bnum+j] += si*RH[0][MA_AN][LB_AN][i][j]; H22r[Anum+i][Bnum+j] += co*RH[1][MA_AN][LB_AN][i][j]; H22i[Anum+i][Bnum+j] += si*RH[1][MA_AN][LB_AN][i][j]; H12r[Anum+i][Bnum+j] += co*RH[2][MA_AN][LB_AN][i][j] - si*RH[3][MA_AN][LB_AN][i][j]; H12i[Anum+i][Bnum+j] += si*RH[2][MA_AN][LB_AN][i][j] + co*RH[3][MA_AN][LB_AN][i][j]; } } } /* spin-orbit coupling or LDA+U */ else { for (i=0; i<=(tnoA-1); i++) { for (j=0; j<=(tnoB-1); j++) { H11r[Anum+i][Bnum+j] += co*RH[0][MA_AN][LB_AN][i][j] - si*IH[0][MA_AN][LB_AN][i][j]; H11i[Anum+i][Bnum+j] += si*RH[0][MA_AN][LB_AN][i][j] + co*IH[0][MA_AN][LB_AN][i][j]; H22r[Anum+i][Bnum+j] += co*RH[1][MA_AN][LB_AN][i][j] - si*IH[1][MA_AN][LB_AN][i][j]; H22i[Anum+i][Bnum+j] += si*RH[1][MA_AN][LB_AN][i][j] + co*IH[1][MA_AN][LB_AN][i][j]; H12r[Anum+i][Bnum+j] += co*RH[2][MA_AN][LB_AN][i][j] - si*(RH[3][MA_AN][LB_AN][i][j] + IH[2][MA_AN][LB_AN][i][j]); H12i[Anum+i][Bnum+j] += si*RH[2][MA_AN][LB_AN][i][j] + co*(RH[3][MA_AN][LB_AN][i][j] + IH[2][MA_AN][LB_AN][i][j]); } } } } } /****************************************************** MPI: H11r, H11i H22r, H22i H12r, H12i ******************************************************/ tmp_array1 = (double*)malloc(sizeof(double)*6*n2*List_YOUSO[7]); tmp_array2 = (double*)malloc(sizeof(double)*6*n2*List_YOUSO[7]); if (myid!=Host_ID1) { for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) { GA_AN = M2G[MA_AN]; wanA = WhatSpecies[GA_AN]; tnoA = Spe_Total_CNO[wanA]; Anum = MP[GA_AN]; k = 0; for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { tmp_array1[k] = H11r[Anum+i][j]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { tmp_array1[k] = H11i[Anum+i][j]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { tmp_array1[k] = H22r[Anum+i][j]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { tmp_array1[k] = H22i[Anum+i][j]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { tmp_array1[k] = H12r[Anum+i][j]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { tmp_array1[k] = H12i[Anum+i][j]; k++; } } tag = 999; MPI_Isend(&tmp_array1[0], 6*tnoA*n2, MPI_DOUBLE, Host_ID1, tag, mpi_comm_level1, &request); MPI_Wait(&request,&stat); } } else { for (GA_AN=1; GA_AN<=atomnum; GA_AN++) { wanA = WhatSpecies[GA_AN]; tnoA = Spe_Total_CNO[wanA]; Anum = MP[GA_AN]; ID = G2ID[GA_AN]; if (ID!=Host_ID1) { tag = 999; MPI_Recv(&tmp_array2[0], 6*tnoA*n2, MPI_DOUBLE, ID, tag, mpi_comm_level1, &stat); k = 0; for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { H11r[Anum+i][j] = tmp_array2[k]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { H11i[Anum+i][j] = tmp_array2[k]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { H22r[Anum+i][j] = tmp_array2[k]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { H22i[Anum+i][j] = tmp_array2[k]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { H12r[Anum+i][j] = tmp_array2[k]; k++; } } for (i=0; i<tnoA; i++) { for (j=0; j<n2; j++) { H12i[Anum+i][j] = tmp_array2[k]; k++; } } } } } free(tmp_array1); free(tmp_array2); /****************************************************** the full complex matrix of H ******************************************************/ for (i=1; i<=NUM; i++) { for (j=1; j<=NUM; j++) { H[i ][j ].r = H11r[i][j]; H[i ][j ].i = H11i[i][j]; H[i+NUM][j+NUM].r = H22r[i][j]; H[i+NUM][j+NUM].i = H22i[i][j]; H[i ][j+NUM].r = H12r[i][j]; H[i ][j+NUM].i = H12i[i][j]; H[j+NUM][i ].r = H[i][j+NUM].r; H[j+NUM][i ].i = -H[i][j+NUM].i; } } /**************************************************** free arrays ****************************************************/ for (i=0; i<n2; i++) { free(H11r[i]); } free(H11r); for (i=0; i<n2; i++) { free(H11i[i]); } free(H11i); for (i=0; i<n2; i++) { free(H22r[i]); } free(H22r); for (i=0; i<n2; i++) { free(H22i[i]); } free(H22i); for (i=0; i<n2; i++) { free(H12r[i]); } free(H12r); for (i=0; i<n2; i++) { free(H12i[i]); } free(H12i); }
void Taylor(int Taylor_switch, double ****OLP, double ****IOLP) { static int firsttime=1; static int ct_AN,fan,san,can,ig,ian,j,jan,m,n,order; static int k,kg,kan,kl,l,jg,wan,tno0,tno1,i,ino1,p,q,jl; static int h_AN,Gh_AN,Hwan,Cwan,size_O,size_OP; static double dum,sum,dum1,dum2; static double ****O; static double ****OP; static double ****ON; /**************************************************** allocation of arrays: static double O[atomnum+1] [FNAN+1] [Spe_Total_NO[Cwan]] [Spe_Total_NO[Hwan]] static double OP[atomnum+1] [FNAN+SNAN+1] [Spe_Total_NO[Cwan]] [Spe_Total_NO[Hwan]] static double ON[atomnum+1] [FNAN+SNAN+1] [Spe_Total_NO[Cwan]] [Spe_Total_NO[Hwan]] ****************************************************/ /* O */ FNAN[0] = 0; SNAN[0] = 0; size_O = 0; O = (double****)malloc(sizeof(double***)*(atomnum+1)); for (ct_AN=0; ct_AN<=atomnum; ct_AN++){ if (ct_AN==0){ tno0 = 1; } else{ Cwan = WhatSpecies[ct_AN]; tno0 = Spe_Total_NO[Cwan]; } O[ct_AN] = (double***)malloc(sizeof(double**)*(FNAN[ct_AN]+1)); for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){ if (ct_AN==0){ tno1 = 1; } else{ Gh_AN = natn[ct_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno1 = Spe_Total_NO[Hwan]; } O[ct_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0); for (i=0; i<tno0; i++){ O[ct_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1); } size_O += tno0*tno1; } } /* OP */ size_OP = 0; OP = (double****)malloc(sizeof(double***)*(atomnum+1)); for (ct_AN=0; ct_AN<=atomnum; ct_AN++){ if (ct_AN==0){ tno0 = 1; } else{ Cwan = WhatSpecies[ct_AN]; tno0 = Spe_Total_NO[Cwan]; } OP[ct_AN] = (double***)malloc(sizeof(double**)*(FNAN[ct_AN]+SNAN[ct_AN]+1)); for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){ if (ct_AN==0){ tno1 = 1; } else{ Gh_AN = natn[ct_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno1 = Spe_Total_NO[Hwan]; } OP[ct_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0); for (i=0; i<tno0; i++){ OP[ct_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1); } size_OP += tno0*tno1; } } /* ON */ ON = (double****)malloc(sizeof(double***)*(atomnum+1)); for (ct_AN=0; ct_AN<=atomnum; ct_AN++){ if (ct_AN==0){ tno0 = 1; } else{ Cwan = WhatSpecies[ct_AN]; tno0 = Spe_Total_NO[Cwan]; } ON[ct_AN] = (double***)malloc(sizeof(double**)*(FNAN[ct_AN]+SNAN[ct_AN]+1)); for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){ if (ct_AN==0){ tno1 = 1; } else{ Gh_AN = natn[ct_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno1 = Spe_Total_NO[Hwan]; } ON[ct_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0); for (i=0; i<tno0; i++){ ON[ct_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1); } } } /* PrintMemory */ if (firsttime) { PrintMemory("Taylor: O",sizeof(O),NULL); PrintMemory("Taylor: OP",sizeof(OP),NULL); PrintMemory("Taylor: ON",sizeof(ON),NULL); firsttime=0; } /**************************************************** Initializing of IOLP, OP and O matrices ****************************************************/ for (ct_AN=1; ct_AN<=atomnum; ct_AN++){ fan = FNAN[ct_AN]; san = SNAN[ct_AN]; can = fan + san; ig = natn[ct_AN][0]; ian = Spe_Total_NO[WhatSpecies[ig]]; for (j=0; j<=can; j++){ jg = natn[ct_AN][j]; jan = Spe_Total_NO[WhatSpecies[jg]]; if (j<=fan){ for (m=0; m<=(ian-1); m++){ for (n=0; n<=(jan-1); n++){ if (j==0 && m==n){ IOLP[ct_AN][j][m][n] = 1.0; O[ct_AN][j][m][n] = 0.0; OP[ct_AN][j][m][n] = 0.0; } else { IOLP[ct_AN][j][m][n] = -OLP[ct_AN][j][m][n]; O[ct_AN][j][m][n] = OLP[ct_AN][j][m][n]; OP[ct_AN][j][m][n] = OLP[ct_AN][j][m][n]; } } } } else { for (m=0; m<=(ian-1); m++){ for (n=0; n<=(jan-1); n++){ IOLP[ct_AN][j][m][n] = 0.0; O[ct_AN][j][m][n] = 0.0; OP[ct_AN][j][m][n] = 0.0; } } } } } /**************************************************** Iteration ****************************************************/ for (order=2; order<=Taylor_switch; order++){ /**************************************************** O*OP -> ON ****************************************************/ for (ct_AN=1; ct_AN<=atomnum; ct_AN++){ fan = FNAN[ct_AN]; san = SNAN[ct_AN]; can = fan + san; ig = natn[ct_AN][0]; ian = Spe_Total_NO[WhatSpecies[ig]]; for (j=0; j<=can; j++){ jg = natn[ct_AN][j]; jan = Spe_Total_NO[WhatSpecies[jg]]; for (k=0; k<=fan; k++){ kg = natn[ct_AN][k]; jl = RMI2[ct_AN][k][j]; kan = Spe_Total_NO[WhatSpecies[kg]]; kl = RMI2[ct_AN][j][k]; for (m=0; m<=(ian-1); m++){ for (n=0; n<=(jan-1); n++){ sum = 0.0; for (l=0; l<=(kan-1); l++){ sum = sum + O[ct_AN][k][m][l]*OP[kg][jl][l][n]; } if (k==0){ ON[ct_AN][j][m][n] = sum; } else { ON[ct_AN][j][m][n] = ON[ct_AN][j][m][n] + sum; } } } } } } /**************************************************** Averaging of ON ****************************************************/ if (order%2==0){ for (ct_AN=1; ct_AN<=atomnum; ct_AN++){ wan = WhatSpecies[ct_AN]; tno1 = Spe_Total_NO[wan] - 1; for (i=1; i<=(FNAN[ct_AN]+SNAN[ct_AN]); i++){ ig = natn[ct_AN][i]; ino1 = Spe_Total_NO[WhatSpecies[ig]] - 1; k = RMI2[ct_AN][i][0]; for (p=0; p<=tno1; p++){ for (q=0; q<=ino1; q++){ dum1 = ON[ct_AN][i][p][q]; dum2 = ON[ig][k][q][p]; dum = 0.50*(dum1 + dum2); ON[ct_AN][i][p][q] = dum; ON[ig][k][q][p] = dum; } } } } } /**************************************************** ON -> OP ****************************************************/ for (ct_AN=1; ct_AN<=atomnum; ct_AN++){ fan = FNAN[ct_AN]; san = SNAN[ct_AN]; can = fan + san; ig = natn[ct_AN][0]; ian = Spe_Total_NO[WhatSpecies[ig]]; for (j=0; j<=can; j++){ jg = natn[ct_AN][j]; jan = Spe_Total_NO[WhatSpecies[jg]]; for (m=0; m<=(ian-1); m++){ for (n=0; n<=(jan-1); n++){ OP[ct_AN][j][m][n] = ON[ct_AN][j][m][n]; } } } } /**************************************************** IOLP = IOLP +- ON ****************************************************/ for (ct_AN=1; ct_AN<=atomnum; ct_AN++){ fan = FNAN[ct_AN]; san = SNAN[ct_AN]; can = fan + san; ig = natn[ct_AN][0]; ian = Spe_Total_NO[WhatSpecies[ig]]; for (j=0; j<=can; j++){ jg = natn[ct_AN][j]; jan = Spe_Total_NO[WhatSpecies[jg]]; for (m=0; m<=(ian-1); m++){ for (n=0; n<=(jan-1); n++){ if ((order%2)==0) { IOLP[ct_AN][j][m][n] = IOLP[ct_AN][j][m][n] + ON[ct_AN][j][m][n]; } else { IOLP[ct_AN][j][m][n] = IOLP[ct_AN][j][m][n] - ON[ct_AN][j][m][n]; } } } } } } /**************************************************** freeing of arrays: static double O[atomnum+1] [FNAN+1] [Spe_Total_NO[Cwan]] [Spe_Total_NO[Hwan]] static double OP[atomnum+1] [FNAN+SNAN+1] [Spe_Total_NO[Cwan]] [Spe_Total_NO[Hwan]] static double ON[atomnum+1] [FNAN+SNAN+1] [Spe_Total_NO[Cwan]] [Spe_Total_NO[Hwan]] ****************************************************/ /* O */ FNAN[0] = 0; SNAN[0] = 0; for (ct_AN=0; ct_AN<=atomnum; ct_AN++){ if (ct_AN==0){ tno0 = 1; } else{ Cwan = WhatSpecies[ct_AN]; tno0 = Spe_Total_NO[Cwan]; } for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){ if (ct_AN==0){ tno1 = 1; } else{ Gh_AN = natn[ct_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno1 = Spe_Total_NO[Hwan]; } for (i=0; i<tno0; i++){ free(O[ct_AN][h_AN][i]); } free(O[ct_AN][h_AN]); } free(O[ct_AN]); } free(O); /* OP */ for (ct_AN=0; ct_AN<=atomnum; ct_AN++){ if (ct_AN==0){ tno0 = 1; } else{ Cwan = WhatSpecies[ct_AN]; tno0 = Spe_Total_NO[Cwan]; } for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){ if (ct_AN==0){ tno1 = 1; } else{ Gh_AN = natn[ct_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno1 = Spe_Total_NO[Hwan]; } for (i=0; i<tno0; i++){ free(OP[ct_AN][h_AN][i]); } free(OP[ct_AN][h_AN]); } free(OP[ct_AN]); } free(OP); /* ON */ for (ct_AN=0; ct_AN<=atomnum; ct_AN++){ if (ct_AN==0){ tno0 = 1; } else{ Cwan = WhatSpecies[ct_AN]; tno0 = Spe_Total_NO[Cwan]; } for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){ if (ct_AN==0){ tno1 = 1; } else{ Gh_AN = natn[ct_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno1 = Spe_Total_NO[Hwan]; } for (i=0; i<tno0; i++){ free(ON[ct_AN][h_AN][i]); } free(ON[ct_AN][h_AN]); } free(ON[ct_AN]); } free(ON); }
void Initial_CntCoes2(double *****nh, double *****OLP) { static int firsttime=1; int i,j,l,n,n2,i1,j1; int wan; int po; int Second_switch; double time0; int Mc_AN,Gc_AN,wanA; int q,q0,al0,al1,pmax; int Mul0,Mul1,deg_on,deg_num; int al,p,L0,M0,p0,Np; int ig,im,jg,ian,jan,kl,m,Gi; int mu,nu,Anum,Bnum,NUM,maxp; int h_AN,Gh_AN,Hwan,tno1,tno2,Cwan,spin; double Beta0,scaleF,maxc; double *ko,*C0,*koSys; double **S,**Hks,**D,*abs_sum,*M1,**C,**B; int *jun,*ponu; double tmp0,tmp1,Max0,rc1,fugou,MaxV; double sum,TZ; double Num_State,x,FermiF,Dnum; double LChemP_MAX,LChemP_MIN,LChemP; double TStime,TEtime; double *tmp_array; double *tmp_array2; int *MP,*dege; int **tmp_index; int ***tmp_index1; int ***tmp_index2; double *Tmp_CntCoes; double **Check_ko; double *Weight_ko; double ***CntCoes_Spe; double ***My_CntCoes_Spe; double **InProd; int *Snd_CntCoes_Size; int *Rcv_CntCoes_Size; int *Snd_H_Size,*Rcv_H_Size; int *Snd_S_Size,*Rcv_S_Size; int size1,size2,num; int numprocs,myid,tag=999,ID,IDS,IDR; MPI_Status stat; MPI_Request request; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); dtime(&TStime); /**************************************************** allocation of arrays: int MP[List_YOUSO[8]]; int tmp_index[List_YOUSO[25]+1] [2*(List_YOUSO[25]+1)+1]; int tmp_index1[List_YOUSO[25]+1] [List_YOUSO[24]] [2*(List_YOUSO[25]+1)+1]; int tmp_index2[List_YOUSO[25]+1] [List_YOUSO[24]] [2*(List_YOUSO[25]+1)+1]; double Tmp_CntCoes[List_YOUSO[24]] double Check_ko[List_YOUSO[25]+1] [2*(List_YOUSO[25]+1)+1]; double Weight_ko[List_YOUSO[7]]; ****************************************************/ MP = (int*)malloc(sizeof(int)*List_YOUSO[8]); tmp_index = (int**)malloc(sizeof(int*)*(List_YOUSO[25]+1)); for (i=0; i<(List_YOUSO[25]+1); i++){ tmp_index[i] = (int*)malloc(sizeof(int)*(2*(List_YOUSO[25]+1)+1)); } tmp_index1 = (int***)malloc(sizeof(int**)*(List_YOUSO[25]+1)); for (i=0; i<(List_YOUSO[25]+1); i++){ tmp_index1[i] = (int**)malloc(sizeof(int*)*List_YOUSO[24]); for (j=0; j<List_YOUSO[24]; j++){ tmp_index1[i][j] = (int*)malloc(sizeof(int)*(2*(List_YOUSO[25]+1)+1)); } } tmp_index2 = (int***)malloc(sizeof(int**)*(List_YOUSO[25]+1)); for (i=0; i<(List_YOUSO[25]+1); i++){ tmp_index2[i] = (int**)malloc(sizeof(int*)*List_YOUSO[24]); for (j=0; j<List_YOUSO[24]; j++){ tmp_index2[i][j] = (int*)malloc(sizeof(int)*(2*(List_YOUSO[25]+1)+1)); } } Tmp_CntCoes = (double*)malloc(sizeof(double)*List_YOUSO[24]); Check_ko = (double**)malloc(sizeof(double*)*(List_YOUSO[25]+1)); for (i=0; i<(List_YOUSO[25]+1); i++){ Check_ko[i] = (double*)malloc(sizeof(double)*(2*(List_YOUSO[25]+1)+1)); } Weight_ko = (double*)malloc(sizeof(double)*List_YOUSO[7]); Snd_CntCoes_Size = (int*)malloc(sizeof(int)*Num_Procs); Rcv_CntCoes_Size = (int*)malloc(sizeof(int)*Num_Procs); Snd_H_Size = (int*)malloc(sizeof(int)*Num_Procs); Rcv_H_Size = (int*)malloc(sizeof(int)*Num_Procs); Snd_S_Size = (int*)malloc(sizeof(int)*Num_Procs); Rcv_S_Size = (int*)malloc(sizeof(int)*Num_Procs); /* PrintMemory */ if (firsttime) { PrintMemory("Initial_CntCoes: tmp_index",sizeof(int)*(List_YOUSO[25]+1)* (2*(List_YOUSO[25]+1)+1),NULL); PrintMemory("Initial_CntCoes: tmp_index1",sizeof(int)*(List_YOUSO[25]+1)* List_YOUSO[24]*(2*(List_YOUSO[25]+1)+1) ,NULL); PrintMemory("Initial_CntCoes: tmp_index2",sizeof(int)*(List_YOUSO[25]+1)* List_YOUSO[24]*(2*(List_YOUSO[25]+1)+1) ,NULL); PrintMemory("Initial_CntCoes: Check_ko",sizeof(double)*(List_YOUSO[25]+1)* (2*(List_YOUSO[25]+1)+1),NULL); firsttime=0; } /**************************************************** MPI: nh(=H) ****************************************************/ /*********************************** set data size ************************************/ for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ tag = 999; /* find data size to send block data */ if (F_Snd_Num[IDS]!=0){ size1 = 0; for (spin=0; spin<=SpinP_switch; spin++){ for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; Cwan = WhatSpecies[Gc_AN]; tno1 = Spe_Total_NO[Cwan]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ Gh_AN = natn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno2 = Spe_Total_NO[Hwan]; size1 += tno1*tno2; } } } Snd_H_Size[IDS] = size1; MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request); } else{ Snd_H_Size[IDS] = 0; } /* receiving of size of data */ if (F_Rcv_Num[IDR]!=0){ MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat); Rcv_H_Size[IDR] = size2; } else{ Rcv_H_Size[IDR] = 0; } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } else{ Snd_H_Size[IDS] = 0; Rcv_H_Size[IDR] = 0; } } /*********************************** data transfer ************************************/ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (F_Snd_Num[IDS]!=0){ size1 = Snd_H_Size[IDS]; /* allocation of array */ tmp_array = (double*)malloc(sizeof(double)*size1); /* multidimentional array to vector array */ num = 0; for (spin=0; spin<=SpinP_switch; spin++){ for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; Cwan = WhatSpecies[Gc_AN]; tno1 = Spe_Total_NO[Cwan]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ Gh_AN = natn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno2 = Spe_Total_NO[Hwan]; for (i=0; i<tno1; i++){ for (j=0; j<tno2; j++){ tmp_array[num] = nh[spin][Mc_AN][h_AN][i][j]; num++; } } } } } MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of block data *****************************/ if (F_Rcv_Num[IDR]!=0){ size2 = Rcv_H_Size[IDR]; /* allocation of array */ tmp_array2 = (double*)malloc(sizeof(double)*size2); MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); num = 0; for (spin=0; spin<=SpinP_switch; spin++){ Mc_AN = S_TopMAN[IDR] - 1; /* S_TopMAN should be used. */ for (n=0; n<F_Rcv_Num[IDR]; n++){ Mc_AN++; Gc_AN = Rcv_GAN[IDR][n]; Cwan = WhatSpecies[Gc_AN]; tno1 = Spe_Total_NO[Cwan]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ Gh_AN = natn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno2 = Spe_Total_NO[Hwan]; for (i=0; i<tno1; i++){ for (j=0; j<tno2; j++){ nh[spin][Mc_AN][h_AN][i][j] = tmp_array2[num]; num++; } } } } } /* freeing of array */ free(tmp_array2); } if (F_Snd_Num[IDS]!=0){ MPI_Wait(&request,&stat); free(tmp_array); /* freeing of array */ } } } /**************************************************** MPI: OLP[0] ****************************************************/ /*********************************** set data size ************************************/ for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ tag = 999; /* find data size to send block data */ if (F_Snd_Num[IDS]!=0){ size1 = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; Cwan = WhatSpecies[Gc_AN]; tno1 = Spe_Total_NO[Cwan]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ Gh_AN = natn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno2 = Spe_Total_NO[Hwan]; size1 += tno1*tno2; } } Snd_S_Size[IDS] = size1; MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request); } else{ Snd_S_Size[IDS] = 0; } /* receiving of size of data */ if (F_Rcv_Num[IDR]!=0){ MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat); Rcv_S_Size[IDR] = size2; } else{ Rcv_S_Size[IDR] = 0; } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } else{ Snd_S_Size[IDS] = 0; Rcv_S_Size[IDR] = 0; } } /*********************************** data transfer ************************************/ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (F_Snd_Num[IDS]!=0){ size1 = Snd_S_Size[IDS]; /* allocation of array */ tmp_array = (double*)malloc(sizeof(double)*size1); /* multidimentional array to vector array */ num = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; Cwan = WhatSpecies[Gc_AN]; tno1 = Spe_Total_NO[Cwan]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ Gh_AN = natn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno2 = Spe_Total_NO[Hwan]; for (i=0; i<tno1; i++){ for (j=0; j<tno2; j++){ tmp_array[num] = OLP[0][Mc_AN][h_AN][i][j]; num++; } } } } MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of block data *****************************/ if (F_Rcv_Num[IDR]!=0){ size2 = Rcv_S_Size[IDR]; /* allocation of array */ tmp_array2 = (double*)malloc(sizeof(double)*size2); MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); num = 0; Mc_AN = S_TopMAN[IDR] - 1; /* S_TopMAN should be used. */ for (n=0; n<F_Rcv_Num[IDR]; n++){ Mc_AN++; Gc_AN = Rcv_GAN[IDR][n]; Cwan = WhatSpecies[Gc_AN]; tno1 = Spe_Total_NO[Cwan]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ Gh_AN = natn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; tno2 = Spe_Total_NO[Hwan]; for (i=0; i<tno1; i++){ for (j=0; j<tno2; j++){ OLP[0][Mc_AN][h_AN][i][j] = tmp_array2[num]; num++; } } } } /* freeing of array */ free(tmp_array2); } if (F_Snd_Num[IDS]!=0){ MPI_Wait(&request,&stat); free(tmp_array); /* freeing of array */ } } } /**************************************************** set of "initial" coefficients of the contraction ****************************************************/ Second_switch = 1; Simple_InitCnt[0] = 0; Simple_InitCnt[1] = 0; Simple_InitCnt[2] = 0; Simple_InitCnt[3] = 0; Simple_InitCnt[4] = 0; Simple_InitCnt[5] = 0; for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ CntCoes[Mc_AN][al][p] = 0.0; } } al = -1; for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ for (M0=0; M0<=2*L0; M0++){ al++; CntCoes[Mc_AN][al][Mul0] = 1.0; } } } } if (SICnt_switch==2) goto Simple_Init; /**************************************************** Setting of Hamiltonian and overlap matrices MP indicates the starting position of atom i in arraies H and S ****************************************************/ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; if ( FNAN[Gc_AN]<30 ) scaleF = 1.6; else if ( FNAN[Gc_AN]<40 ) scaleF = 1.4; else if ( FNAN[Gc_AN]<50 ) scaleF = 1.2; else scaleF = 1.0; rc1 = scaleF*Spe_Atom_Cut1[wan]; /*********************************************** MP indicates the starting position of atom i in arraies H and S ***********************************************/ Anum = 1; TZ = 0.0; for (i=0; i<=FNAN[Gc_AN]; i++){ if (Dis[Gc_AN][i]<=rc1){ MP[i] = Anum; Gi = natn[Gc_AN][i]; wanA = WhatSpecies[Gi]; Anum = Anum + Spe_Total_NO[wanA]; TZ = TZ + Spe_Core_Charge[wanA]; } } NUM = Anum - 1; /**************************************************** allocation ****************************************************/ n2 = NUM + 3; koSys = (double*)malloc(sizeof(double)*n2); ko = (double*)malloc(sizeof(double)*n2); abs_sum = (double*)malloc(sizeof(double)*n2); M1 = (double*)malloc(sizeof(double)*n2); dege = (int*)malloc(sizeof(int)*n2); C0 = (double*)malloc(sizeof(double)*n2); S = (double**)malloc(sizeof(double*)*n2); Hks = (double**)malloc(sizeof(double*)*n2); D = (double**)malloc(sizeof(double*)*n2); C = (double**)malloc(sizeof(double*)*n2); B = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++){ S[i] = (double*)malloc(sizeof(double)*n2); Hks[i] = (double*)malloc(sizeof(double)*n2); D[i] = (double*)malloc(sizeof(double)*n2); C[i] = (double*)malloc(sizeof(double)*n2); B[i] = (double*)malloc(sizeof(double)*n2); } jun = (int*)malloc(sizeof(int)*n2); ponu = (int*)malloc(sizeof(int)*n2); InProd = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++){ InProd[i] = (double*)malloc(sizeof(double)*n2); } /**************************************************** calc ****************************************************/ if (3<=level_stdout){ printf("<Initial_CntCoes> Mc_AN=%2d Gc_AN=%2d NUM=%2d\n",Mc_AN,Gc_AN,NUM); } for (i=0; i<=FNAN[Gc_AN]; i++){ if (Dis[Gc_AN][i]<=rc1){ ig = natn[Gc_AN][i]; im = S_G2M[ig]; /* S_G2M must be used. */ ian = Spe_Total_NO[WhatSpecies[ig]]; Anum = MP[i]; for (j=0; j<=FNAN[Gc_AN]; j++){ if (Dis[Gc_AN][j]<=rc1){ kl = RMI1[Mc_AN][i][j]; jg = natn[Gc_AN][j]; jan = Spe_Total_NO[WhatSpecies[jg]]; Bnum = MP[j]; if (0<=kl){ for (m=0; m<ian; m++){ for (n=0; n<jan; n++){ S[Anum+m][Bnum+n] = OLP[0][im][kl][m][n]; if (SpinP_switch==0) Hks[Anum+m][Bnum+n] = nh[0][im][kl][m][n]; else Hks[Anum+m][Bnum+n] = 0.5*(nh[0][im][kl][m][n] + nh[1][im][kl][m][n]); } } } else{ for (m=0; m<ian; m++){ for (n=0; n<jan; n++){ S[Anum+m][Bnum+n] = 0.0; Hks[Anum+m][Bnum+n] = 0.0; } } } } } } } if (3<=level_stdout){ printf("\n"); printf("overlap matrix Gc_AN=%2d\n",Gc_AN); for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ printf("%6.3f ",S[i1][j1]); } printf("\n"); } printf("\n"); printf("hamiltonian matrix Gc_AN=%2d\n",Gc_AN); for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ printf("%6.3f ",Hks[i1][j1]); } printf("\n"); } printf("\n"); } /*********************************************** Solve the generalized eigenvalue problem ***********************************************/ Eigen_lapack(S,koSys,NUM,NUM); /*********************************************** Searching of negative eigenvalues ************************************************/ for (l=1; l<=NUM; l++){ if (koSys[l]<0.0){ koSys[l] = 1.0e-7; if (3<=level_stdout){ printf("<Init_CntCoes> Negative EV of OLP %2d %15.12f\n",l,koSys[l]); } } } for (l=1; l<=NUM; l++){ M1[l] = 1.0/sqrt(koSys[l]); } for (i1=1; i1<=NUM; i1++){ for (j1=i1+1; j1<=NUM; j1++){ tmp0 = S[i1][j1]; tmp1 = S[j1][i1]; S[j1][i1] = tmp0; S[i1][j1] = tmp1; } } for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ sum = 0.0; tmp0 = M1[j1]; for (l=1; l<=NUM; l++){ sum = sum + Hks[i1][l]*S[j1][l]*tmp0; } C[j1][i1] = sum; } } for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ sum = 0.0; tmp0 = M1[i1]; for (l=1; l<=NUM; l++){ sum = sum + tmp0*S[i1][l]*C[j1][l]; } B[i1][j1] = sum; } } for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ D[i1][j1] = B[i1][j1]; } } Eigen_lapack(D,koSys,NUM,NUM); /*********************************************** transformation to the original eigenvectors. NOTE 244P ***********************************************/ for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ C[i1][j1] = 0.0; } } for (i1=1; i1<=NUM; i1++){ for (j1=i1+1; j1<=NUM; j1++){ tmp0 = S[i1][j1]; tmp1 = S[j1][i1]; S[j1][i1] = tmp0; S[i1][j1] = tmp1; tmp0 = D[i1][j1]; tmp1 = D[j1][i1]; D[j1][i1] = tmp0; D[i1][j1] = tmp1; } } for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ sum = 0.0; for (l=1; l<=NUM; l++){ sum = sum + S[i1][l]*M1[l]*D[j1][l]; } C[i1][j1] = sum; } } /**************************************************** searching of a local chemical potential ****************************************************/ po = 0; LChemP_MAX = 10.0; LChemP_MIN =-10.0; Beta0 = 1.0/(kB*1500.0/eV2Hartree); do{ LChemP = 0.50*(LChemP_MAX + LChemP_MIN); Num_State = 0.0; for (i1=1; i1<=NUM; i1++){ x = (koSys[i1] - LChemP)*Beta0; if (x<=-30.0) x = -30.0; if (30.0<=x) x = 30.0; FermiF = 2.0/(1.0 + exp(x)); Num_State = Num_State + FermiF; } Dnum = TZ - Num_State; if (0.0<=Dnum) LChemP_MIN = LChemP; else LChemP_MAX = LChemP; if (fabs(Dnum)<0.000000000001) po = 1; } while (po==0); if (3<=level_stdout){ for (i1=1; i1<=NUM; i1++){ x = (koSys[i1] - LChemP)*Beta0; if (x<=-30.0) x = -30.0; if (30.0<=x) x = 30.0; FermiF = 1.0/(1.0 + exp(x)); printf("<Init_CntCoes> %2d eigenvalue=%15.12f FermiF=%15.12f\n", i1,koSys[i1],FermiF); } } if (3<=level_stdout){ printf("\n"); printf("first C Gc_AN=%2d\n",Gc_AN); for (i1=1; i1<=NUM; i1++){ for (j1=1; j1<=NUM; j1++){ printf("%10.6f ",C[i1][j1]); } printf("\n"); } } if (3<=level_stdout){ printf("<Init_CntCoes> LChemP=%15.12f\n",LChemP); } /************************************************ maximize the "overlap" between wave functions and contracted basis functions ************************************************/ /* make a table function converting [L0][Mul0][M0] to "al" for primitive orbitals */ al = -1; for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (M0=0; M0<=2*L0; M0++){ al++; tmp_index1[L0][Mul0][M0] = al; } } } /* make a table function converting [L0][Mul0][M0] to "al" for contracted orbitals */ al = -1; for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ for (M0=0; M0<=2*L0; M0++){ al++; tmp_index2[L0][Mul0][M0] = al; } } } /* loop for L0 */ for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ Hks[Mul0+1][Mul1+1] = 0.0; } } for (M0=0; M0<=2*L0; M0++){ for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ i = tmp_index1[L0][Mul0][M0]; for (mu=1; mu<=NUM; mu++){ InProd[mu][Mul0] = C[MP[0]+i][mu]; } /* mu */ } /* Mul0 */ for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ sum = 0.0; for (mu=1; mu<=NUM; mu++){ x = (koSys[mu] - LChemP)*Beta0; if (x<=-30.0) x = -30.0; if (30.0<=x) x = 30.0; FermiF = 1.0/(1.0 + exp(x)); sum += FermiF*InProd[mu][Mul0]*InProd[mu][Mul1]; } Hks[Mul0+1][Mul1+1] -= sum; } /* for calculation of a single atom */ tmp0 = (double)(Spe_Num_Basis[wan][L0]-Mul0); Hks[Mul0+1][Mul0+1] += -1.0e-9*tmp0*tmp0; } } /* M0 */ /* M0 = 0; for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ i = tmp_index1[L0][Mul0][M0]; for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ j = tmp_index1[L0][Mul1][M0]; S[Mul0+1][Mul1+1] = OLP[0][Mc_AN][0][i][j]; } } */ for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ S[Mul0+1][Mul1+1] = 0.0; } S[Mul0+1][Mul0+1] = 1.0; } if (3<=level_stdout){ printf("<Hks Gc_AN=%2d L0=%2d>\n",Gc_AN,L0); for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ printf("%15.10f ",Hks[Mul0+1][Mul1+1]); } printf("\n"); } } if (3<=level_stdout){ printf("<S Gc_AN=%2d L0=%2d>\n",Gc_AN,L0); for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ printf("%15.10f ",S[Mul0+1][Mul1+1]); } printf("\n"); } } /* diagonalization */ Np = Spe_Num_Basis[wan][L0]; Eigen_lapack(S,ko,Np,Np); for (l=1; l<=Np; l++){ M1[l] = 1.0/sqrt(ko[l]); } for (i1=1; i1<=Np; i1++){ for (j1=1; j1<=Np; j1++){ sum = 0.0; for (l=1; l<=Np; l++){ sum = sum + Hks[i1][l]*S[l][j1]*M1[j1]; } C[i1][j1] = sum; } } for (i1=1; i1<=Np; i1++){ for (j1=1; j1<=Np; j1++){ sum = 0.0; for (l=1; l<=Np; l++){ sum = sum + M1[i1]*S[l][i1]*C[l][j1]; } B[i1][j1] = sum; } } for (i1=1; i1<=Np; i1++){ for (j1=1; j1<=Np; j1++){ D[i1][j1] = B[i1][j1]; } } Eigen_lapack(D,ko,Np,Np); /* transformation to the original eigenvectors */ for (i1=1; i1<=Np; i1++){ for (j1=1; j1<=Np; j1++){ C[i1][j1] = 0.0; } } for (i1=1; i1<=Np; i1++){ for (j1=1; j1<=Np; j1++){ sum = 0.0; for (l=1; l<=Np; l++){ sum = sum + S[i1][l]*M1[l]*D[l][j1]; } C[i1][j1] = sum; } } if (3<=level_stdout){ printf("<Eigenvalues Gc_AN=%2d L0=%2d>\n",Gc_AN,L0); for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ printf("Mul=%2d ko=%15.12f\n",Mul0,ko[Mul0+1]); } printf("<C Gc_AN=%2d L0=%2d>\n",Gc_AN,L0); for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){ for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){ printf("%15.10f ",C[Mul0+1][Mul1+1]); } printf("\n"); } } /* set up contraction coefficients */ for (M0=0; M0<=2*L0; M0++){ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ al = tmp_index2[L0][Mul0][M0]; /* if (SCnt_switch==1) */ if ( SCnt_switch==1 && Mul0==(Spe_Num_CBasis[wan][L0]-1) ){ for (p=0; p<Spe_Num_Basis[wan][L0]; p++){ CntCoes[Mc_AN][al][p] = C[p+1][1]; } } else { for (p=0; p<Spe_Num_Basis[wan][L0]; p++){ CntCoes[Mc_AN][al][p] = C[p+1][Mul0+1];; } } maxc = -1.0e+10; for (p=0; p<Spe_Num_Basis[wan][L0]; p++){ if (maxc<fabs(CntCoes[Mc_AN][al][p])){ maxc = fabs(CntCoes[Mc_AN][al][p]); maxp = p; } } tmp0 = sgn(CntCoes[Mc_AN][al][maxp]); for (p=0; p<Spe_Num_Basis[wan][L0]; p++){ CntCoes[Mc_AN][al][p] *= tmp0; } } } } /* L0 */ if (3<=level_stdout){ for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ printf("A Init_CntCoes Mc_AN=%2d Gc_AN=%2d al=%2d p=%2d %15.12f\n", Mc_AN,Gc_AN,al,p,CntCoes[Mc_AN][al][p]); } } } /**************************************************** free arrays ****************************************************/ for (i=0; i<n2; i++){ free(InProd[i]); } free(InProd); free(koSys); free(ko); free(abs_sum); free(M1); free(dege); free(jun); free(ponu); free(C0); for (i=0; i<n2; i++){ free(S[i]); free(Hks[i]); free(D[i]); free(C[i]); free(B[i]); } free(S); free(Hks); free(D); free(C); free(B); } /* Mc_AN */ /************************************************************* in case of optimization of only the last orbital in each L *************************************************************/ if (SCnt_switch==1){ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; al = -1; for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ for (M0=0; M0<=2*L0; M0++){ al++; if ( Mul0!=(Spe_Num_CBasis[wan][L0]-1) ){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ CntCoes[Mc_AN][al][p] = 0.0; } CntCoes[Mc_AN][al][Mul0] = 1.0; } } } } } } /**************************************************** average contraction coefficients ****************************************************/ if (ACnt_switch==1){ /* allocation */ My_CntCoes_Spe = (double***)malloc(sizeof(double**)*(SpeciesNum+1)); for (i=0; i<=SpeciesNum; i++){ My_CntCoes_Spe[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]); for (j=0; j<List_YOUSO[7]; j++){ My_CntCoes_Spe[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]); } } CntCoes_Spe = (double***)malloc(sizeof(double**)*(SpeciesNum+1)); for (i=0; i<=SpeciesNum; i++){ CntCoes_Spe[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]); for (j=0; j<List_YOUSO[7]; j++){ CntCoes_Spe[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]); } } /* initialize */ for (wan=0; wan<SpeciesNum; wan++){ for (i=0; i<List_YOUSO[7]; i++){ for (j=0; j<List_YOUSO[24]; j++){ My_CntCoes_Spe[wan][i][j] = 0.0; } } } /* local sum in a proccessor */ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ My_CntCoes_Spe[wan][al][p] += CntCoes[Mc_AN][al][p]; } } } /* global sum by MPI */ for (wan=0; wan<SpeciesNum; wan++){ for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ MPI_Allreduce(&My_CntCoes_Spe[wan][al][p], &CntCoes_Spe[wan][al][p], 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1); } } } /* copy CntCoes_Spe to CntCoes_Species */ for (wan=0; wan<SpeciesNum; wan++){ for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ CntCoes_Species[wan][al][p] = CntCoes_Spe[wan][al][p]; } } } /* CntCoes_Spe to CntCoes */ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ CntCoes[Mc_AN][al][p] = CntCoes_Spe[wan][al][p]; } } } /* free */ for (i=0; i<=SpeciesNum; i++){ for (j=0; j<List_YOUSO[7]; j++){ free(My_CntCoes_Spe[i][j]); } free(My_CntCoes_Spe[i]); } free(My_CntCoes_Spe); for (i=0; i<=SpeciesNum; i++){ for (j=0; j<List_YOUSO[7]; j++){ free(CntCoes_Spe[i][j]); } free(CntCoes_Spe[i]); } free(CntCoes_Spe); } /********************************************** transformation of optimized orbitals by an extended Gauss elimination and the Gram-Schmidt orthogonalization ***********************************************/ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; al = -1; for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ for (M0=0; M0<=2*L0; M0++){ al++; tmp_index2[L0][Mul0][M0] = al; } } } for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){ for (M0=0; M0<=2*L0; M0++){ /********************************************** extended Gauss elimination ***********************************************/ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ al0 = tmp_index2[L0][Mul0][M0]; for (Mul1=0; Mul1<Spe_Num_CBasis[wan][L0]; Mul1++){ al1 = tmp_index2[L0][Mul1][M0]; if (Mul1!=Mul0){ tmp0 = CntCoes[Mc_AN][al0][Mul0]; tmp1 = CntCoes[Mc_AN][al1][Mul0]; for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ CntCoes[Mc_AN][al1][p] -= CntCoes[Mc_AN][al0][p]/tmp0*tmp1; } } } } /********************************************** orthonormalization of initial contraction coefficients ***********************************************/ for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){ al0 = tmp_index2[L0][Mul0][M0]; /* x - sum_i <x|e_i>e_i */ for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ Tmp_CntCoes[p] = 0.0; } for (Mul1=0; Mul1<Mul0; Mul1++){ al1 = tmp_index2[L0][Mul1][M0]; sum = 0.0; for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ sum = sum + CntCoes[Mc_AN][al0][p]*CntCoes[Mc_AN][al1][p]; } for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ Tmp_CntCoes[p] = Tmp_CntCoes[p] + sum*CntCoes[Mc_AN][al1][p]; } } for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ CntCoes[Mc_AN][al0][p] = CntCoes[Mc_AN][al0][p] - Tmp_CntCoes[p]; } /* Normalize */ sum = 0.0; Max0 = -100.0; pmax = 0; for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ sum = sum + CntCoes[Mc_AN][al0][p]*CntCoes[Mc_AN][al0][p]; if (Max0<fabs(CntCoes[Mc_AN][al0][p])){ Max0 = fabs(CntCoes[Mc_AN][al0][p]); pmax = p; } } if (fabs(sum)<1.0e-11) tmp0 = 0.0; else tmp0 = 1.0/sqrt(sum); tmp1 = sgn(CntCoes[Mc_AN][al0][pmax]); for (p=0; p<Spe_Specified_Num[wan][al0]; p++){ CntCoes[Mc_AN][al0][p] = tmp0*tmp1*CntCoes[Mc_AN][al0][p]; } } } } } /* Mc_AN */ /**************************************************** Normalization ****************************************************/ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ sum = 0.0; for (p=0; p<Spe_Specified_Num[wan][al]; p++){ p0 = Spe_Trans_Orbital[wan][al][p]; for (q=0; q<Spe_Specified_Num[wan][al]; q++){ q0 = Spe_Trans_Orbital[wan][al][q]; tmp0 = CntCoes[Mc_AN][al][p]*CntCoes[Mc_AN][al][q]; sum = sum + tmp0*OLP[0][Mc_AN][0][p0][q0]; } } tmp0 = 1.0/sqrt(sum); for (p=0; p<Spe_Specified_Num[wan][al]; p++){ CntCoes[Mc_AN][al][p] = CntCoes[Mc_AN][al][p]*tmp0; } } if (3<=level_stdout){ for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ printf("B Init_CntCoes Mc_AN=%2d Gc_AN=%2d al=%2d p=%2d %15.12f\n", Mc_AN,Gc_AN,al,p,CntCoes[Mc_AN][al][p]); } } } } /* Mc_AN */ Simple_Init: /**************************************************** MPI: CntCoes_Species ****************************************************/ for (wan=0; wan<SpeciesNum; wan++){ Gc_AN = 1; po = 0; do { wanA = WhatSpecies[Gc_AN]; if (wan==wanA){ ID = G2ID[Gc_AN]; Mc_AN = F_G2M[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ if (ID==myid) tmp0 = CntCoes[Mc_AN][al][p]; MPI_Bcast(&tmp0, 1, MPI_DOUBLE, ID, mpi_comm_level1); CntCoes_Species[wan][al][p] = tmp0; } } po = 1; } Gc_AN++; } while (po==0 && Gc_AN<=atomnum); } /**************************************************** MPI: CntCoes ****************************************************/ /*********************************** set data size ************************************/ for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ tag = 999; /* find data size to send block data */ if (F_Snd_Num[IDS]!=0){ size1 = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ size1 += Spe_Specified_Num[wan][al]; } } Snd_CntCoes_Size[IDS] = size1; MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request); } else{ Snd_CntCoes_Size[IDS] = 0; } /* receiving of size of data */ if (F_Rcv_Num[IDR]!=0){ MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat); Rcv_CntCoes_Size[IDR] = size2; } else{ Rcv_CntCoes_Size[IDR] = 0; } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } else { Snd_CntCoes_Size[myid] = 0; Rcv_CntCoes_Size[myid] = 0; } } /*********************************** data transfer ************************************/ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (F_Snd_Num[IDS]!=0){ size1 = Snd_CntCoes_Size[IDS]; /* allocation of array */ tmp_array = (double*)malloc(sizeof(double)*size1); /* multidimentional array to vector array */ num = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ tmp_array[num] = CntCoes[Mc_AN][al][p]; num++; } } } MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of block data *****************************/ if (F_Rcv_Num[IDR]!=0){ size2 = Rcv_CntCoes_Size[IDR]; /* allocation of array */ tmp_array2 = (double*)malloc(sizeof(double)*size2); MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); num = 0; Mc_AN = F_TopMAN[IDR] - 1; for (n=0; n<F_Rcv_Num[IDR]; n++){ Mc_AN++; Gc_AN = Rcv_GAN[IDR][n]; wan = WhatSpecies[Gc_AN]; for (al=0; al<Spe_Total_CNO[wan]; al++){ for (p=0; p<Spe_Specified_Num[wan][al]; p++){ CntCoes[Mc_AN][al][p] = tmp_array2[num]; num++; } } } /* freeing of array */ free(tmp_array2); } if (F_Snd_Num[IDS]!=0){ MPI_Wait(&request,&stat); free(tmp_array); /* freeing of array */ } } } /**************************************************** freeing of arrays: int MP[List_YOUSO[8]]; int tmp_index[List_YOUSO[25]+1] [2*(List_YOUSO[25]+1)+1]; int tmp_index1[List_YOUSO[25]+1] [List_YOUSO[24]] [2*(List_YOUSO[25]+1)+1]; int tmp_index2[List_YOUSO[25]+1] [List_YOUSO[24]] [2*(List_YOUSO[25]+1)+1]; double Tmp_CntCoes[List_YOUSO[24]] double Check_ko[List_YOUSO[25]+1] [2*(List_YOUSO[25]+1)+1]; double Weight_ko[List_YOUSO[7]]; ****************************************************/ free(MP); for (i=0; i<(List_YOUSO[25]+1); i++){ free(tmp_index[i]); } free(tmp_index); for (i=0; i<(List_YOUSO[25]+1); i++){ for (j=0; j<List_YOUSO[24]; j++){ free(tmp_index1[i][j]); } free(tmp_index1[i]); } free(tmp_index1); for (i=0; i<(List_YOUSO[25]+1); i++){ for (j=0; j<List_YOUSO[24]; j++){ free(tmp_index2[i][j]); } free(tmp_index2[i]); } free(tmp_index2); free(Tmp_CntCoes); for (i=0; i<(List_YOUSO[25]+1); i++){ free(Check_ko[i]); } free(Check_ko); free(Weight_ko); free(Snd_CntCoes_Size); free(Rcv_CntCoes_Size); free(Snd_H_Size); free(Rcv_H_Size); free(Snd_S_Size); free(Rcv_S_Size); /* for elapsed time */ dtime(&TEtime); time0 = TEtime - TStime; }
/** Determine grid sizes. If this is the first time this routine is called * or the grid sizes have changed (re)allocate the grid and set up the * grid pointers. */ int PairList::SetupGrids(Vec3 const& recipLengths) { int offsetX = cellOffset_; int offsetY = cellOffset_; int offsetZ = cellOffset_; double dc1 = cutList_ / (double)offsetX; double dc2 = cutList_ / (double)offsetY; double dc3 = cutList_ / (double)offsetZ; nGridX_ = std::max(1, (int)(recipLengths[0] / dc1)); // nucgrd1 nGridY_ = std::max(1, (int)(recipLengths[1] / dc2)); nGridZ_ = std::max(1, (int)(recipLengths[2] / dc3)); // Check if grid (re)allocation needs to happen if ( nGridX_ == nGridX_0_ && nGridY_ == nGridY_0_ && nGridZ_ == nGridZ_0_ ) return 0; if (nGridX_0_ != -1) // -1 is the initial allocation mprintf("Warning: Unit cell size has changed so much that grid must be recalculated.\n" "Warning: Old sizes= {%i, %i, %i} New sizes= {%i, %i, %i}\n", nGridX_0_, nGridY_0_, nGridZ_0_, nGridX_, nGridY_, nGridZ_); nGridX_0_ = nGridX_; nGridY_0_ = nGridY_; nGridZ_0_ = nGridZ_; // TODO Add non-periodic case // Check short range cutoff dc1 = recipLengths[0] / (double)nGridX_; dc2 = recipLengths[1] / (double)nGridY_; dc3 = recipLengths[2] / (double)nGridZ_; double cut = (double)offsetX * dc1; if (offsetY*dc2 < cut) cut = (double)offsetY*dc2; if (offsetZ*dc3 < cut) cut = (double)offsetZ*dc3; //if(nogrdptrs)cut=cutlist // Allocation int nGridMax = nGridX_ * nGridY_ * nGridZ_; cells_.clear(); cells_.resize( nGridMax ); if (debug_ > 0) { mprintf("DEBUG: Number of grids per unit cell in each dimension: %i %i %i\n", nGridX_, nGridY_, nGridZ_); //mprintf("Unit cell edge lengths in each dimension: %9.3f %9.3f %9.3f\n",); mprintf("DEBUG: Distance between parallel faces of unit cell: %9.3f %9.3f %9.3f\n", recipLengths[0], recipLengths[1], recipLengths[2]); mprintf("DEBUG: Distance between faces of short range grid subcells: %9.3f %9.3f %9.3f\n", dc1, dc2, dc3); mprintf("DEBUG: Resulting cutoff from subcell neighborhoods is %f\n", cut); mprintf("%zu total grid cells\n", cells_.size()); } if (cut < cutList_) { mprinterr("Error: Resulting cutoff %f too small for lower limit %f\n", cut, cutList_); return 1; } // NOTE: myindex are for parallelization later (maybe). int myindexlo = 0; int myindexhi = (int)cells_.size(); CalcGridPointers(myindexlo, myindexhi); PrintMemory(); return 0; }
int run_main(int argc, char *argv[], int numprocs0, int myid0) { int MD_iter,i,j,po,ip; char fileE[YOUSO10] = ".ene"; char fileDRC[YOUSO10] = ".md"; char fileMemory[YOUSO10]; char fileRestart[YOUSO10]; char operate[200]; double TStime,TEtime; /* for idle CPUs */ int tag; int complete; MPI_Request request; MPI_Status status; /* for measuring elapsed time */ dtime(&TStime); /* allocation of CompTime */ CompTime = (double**)malloc(sizeof(double*)*numprocs0); for (i=0; i<numprocs0; i++){ CompTime[i] = (double*)malloc(sizeof(double)*30); for (j=0; j<30; j++) CompTime[i][j] = 0.0; } if (myid0==Host_ID){ printf("\n*******************************************************\n"); printf("*******************************************************\n"); printf(" Welcome to OpenMX Ver. %s \n",Version_OpenMX); printf(" Copyright (C), 2002-2009, T.Ozaki \n"); printf(" OpenMX comes with ABSOLUTELY NO WARRANTY. \n"); printf(" This is free software, and you are welcome to \n"); printf(" redistribute it under the constitution of the GNU-GPL.\n"); printf("*******************************************************\n"); printf("*******************************************************\n\n"); } Init_List_YOUSO(); remake_headfile = 0; ScaleSize = 1.2; /**************************************************** Read the input file ****************************************************/ init_alloc_first(); CompTime[myid0][1] = readfile(argv); MPI_Barrier(MPI_COMM_WORLD1); /* initialize PrintMemory routine */ sprintf(fileMemory,"%s%s.memory%i",filepath,filename,myid0); PrintMemory(fileMemory,0,"init"); PrintMemory_Fix(); /* initialize */ init(); fnjoint(filepath,filename,fileE); fnjoint(filepath,filename,fileDRC); /**************************************************** SCF-DFT calculations and MD and geometrical optimization. ****************************************************/ MD_iter = 1; do { CompTime[myid0][2] += truncation(MD_iter,1); if (ML_flag==1 && myid0==Host_ID) Get_VSZ(MD_iter); if (Solver==4) { TRAN_Calc_GridBound( mpi_comm_level1, atomnum, WhatSpecies, Spe_Atom_Cut1, Ngrid1, Grid_Origin, Gxyz, tv, gtv, rgtv, Left_tv, Right_tv ); /* output: TRAN_region[], TRAN_grid_bound */ } CompTime[myid0][3] += DFT(MD_iter,(MD_iter-1)%orbitalOpt_per_MDIter+1); if (myid0==Host_ID) iterout(MD_iter,MD_TimeStep*MD_iter,fileE,fileDRC); if (ML_flag==0) CompTime[myid0][4] += MD_pac(MD_iter,argv[1]); MD_iter++; } while(MD_Opt_OK==0 && MD_iter<=MD_IterNumber); if ( TRAN_output_hks ) { /* left is dummy */ TRAN_RestartFile(mpi_comm_level1, "write","left",filepath,TRAN_hksoutfilename); } /**************************************************** calculate Voronoi charge ****************************************************/ if (Voronoi_Charge_flag==1) Voronoi_Charge(); /**************************************************** making of a file *.frac for the fractional coordinates ****************************************************/ Make_FracCoord(argv[1]); /**************************************************** generate Wannier functions added by Hongming Weng ****************************************************/ /* hmweng */ if(Wannier_Func_Calc){ if (myid0==Host_ID) printf("Calling Generate_Wannier...\n");fflush(0); Generate_Wannier(argv[1]); } /**************************************************** Making of output files ****************************************************/ CompTime[myid0][20] = OutData(argv[1]); /**************************************************** write connectivity, Hamiltonian, overlap, density matrices, and etc. to a file, filename.scfout ****************************************************/ if (HS_fileout==1) SCF2File("write",argv[1]); /* elapsed time */ dtime(&TEtime); CompTime[myid0][0] = TEtime - TStime; Output_CompTime(); for (i=0; i<numprocs0; i++){ free(CompTime[i]); } free(CompTime); /* merge log files */ Merge_LogFile(argv[1]); /* free arrays */ Free_Arrays(0); /* print memory */ PrintMemory("total",0,"sum"); /**************************************************** reconstruct the original MPI group ****************************************************/ { int *new_ranks; MPI_Group new_group,old_group; new_ranks = (int*)malloc(sizeof(int)*numprocs0); for (i=0; i<numprocs0; i++) { new_ranks[i]=i; /* a new group is made of original rank=0:Pnum[k]-1 */ } MPI_Comm_group(MPI_COMM_WORLD1, &old_group); /* define a new group */ MPI_Group_incl(old_group,numprocs0,new_ranks,&new_group); MPI_Comm_create(MPI_COMM_WORLD1,new_group,&mpi_comm_level1); MPI_Group_free(&new_group); free(new_ranks); /* never forget cleaning! */ } MPI_Barrier(MPI_COMM_WORLD1); if (myid0==Host_ID){ printf("\nThe calculation was normally finished.\n"); } return 0; }
void Set_XC_Grid(int XC_P_switch, int XC_switch) { /**************************************************** XC_P_switch: 0 \epsilon_XC (XC energy density) 1 \mu_XC (XC potential) 2 \epsilon_XC - \mu_XC ****************************************************/ int MN,MN1,MN2,i,j,k,ri,ri1,ri2; int i1,i2,j1,j2,k1,k2,n,nmax; double den_min=1.0e-14; double Ec_unif[1],Vc_unif[2],Exc[2],Vxc[2]; double Ex_unif[1],Vx_unif[2],tot_den; double ED[2],GDENS[3][2]; double DEXDD[2],DECDD[2]; double DEXDGD[3][2],DECDGD[3][2]; double ***dEXC_dGD,***dDen_Grid; double up_x_a,up_x_b,up_x_c; double up_y_a,up_y_b,up_y_c; double up_z_a,up_z_b,up_z_c; double dn_x_a,dn_x_b,dn_x_c; double dn_y_a,dn_y_b,dn_y_c; double dn_z_a,dn_z_b,dn_z_c; double up_a,up_b,up_c; double dn_a,dn_b,dn_c; double tmp0,tmp1; double cot,sit,sip,cop,phi,theta; double detA,igtv[4][4]; int numprocs,myid; /* for OpenMP */ int OMPID,Nthrds,Nprocs; /**************************************************** when GGA, allocation double dEXC_dGD[2][3][My_NumGrid1] double dDen_Grid[2][3][My_NumGrid1] ****************************************************/ /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); if (XC_switch==4){ dDen_Grid = (double***)malloc(sizeof(double**)*2); for (k=0; k<=1; k++){ dDen_Grid[k] = (double**)malloc(sizeof(double*)*3); for (i=0; i<3; i++){ dDen_Grid[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); for (j=0; j<My_NumGrid1; j++) dDen_Grid[k][i][j] = 0.0; } } if (XC_P_switch!=0){ dEXC_dGD = (double***)malloc(sizeof(double**)*2); for (k=0; k<=1; k++){ dEXC_dGD[k] = (double**)malloc(sizeof(double*)*3); for (i=0; i<3; i++){ dEXC_dGD[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); for (j=0; j<My_NumGrid1; j++) dEXC_dGD[k][i][j] = 0.0; } } } /* PrintMemory */ PrintMemory("Set_XC_Grid: dDen_Grid", sizeof(double)*6*My_NumGrid1, NULL); PrintMemory("Set_XC_Grid: dEXC_dGD", sizeof(double)*6*My_NumGrid1, NULL); /**************************************************** calculate dDen_Grid ****************************************************/ detA = gtv[1][1]*gtv[2][2]*gtv[3][3] + gtv[1][2]*gtv[2][3]*gtv[3][1] + gtv[1][3]*gtv[2][1]*gtv[3][2] - gtv[1][3]*gtv[2][2]*gtv[3][1] - gtv[1][2]*gtv[2][1]*gtv[3][3] - gtv[1][1]*gtv[2][3]*gtv[3][2]; igtv[1][1] = (gtv[2][2]*gtv[3][3] - gtv[2][3]*gtv[3][2])/detA; igtv[2][1] = -(gtv[2][1]*gtv[3][3] - gtv[2][3]*gtv[3][1])/detA; igtv[3][1] = (gtv[2][1]*gtv[3][2] - gtv[2][2]*gtv[3][1])/detA; igtv[1][2] = -(gtv[1][2]*gtv[3][3] - gtv[1][3]*gtv[3][2])/detA; igtv[2][2] = (gtv[1][1]*gtv[3][3] - gtv[1][3]*gtv[3][1])/detA; igtv[3][2] = -(gtv[1][1]*gtv[3][2] - gtv[1][2]*gtv[3][1])/detA; igtv[1][3] = (gtv[1][2]*gtv[2][3] - gtv[1][3]*gtv[2][2])/detA; igtv[2][3] = -(gtv[1][1]*gtv[2][3] - gtv[1][3]*gtv[2][1])/detA; igtv[3][3] = (gtv[1][1]*gtv[2][2] - gtv[1][2]*gtv[2][1])/detA; #pragma omp parallel shared(igtv,dDen_Grid,PCCDensity_Grid,PCC_switch,Density_Grid,den_min,My_Cell0,My_Cell1,Ngrid3,Ngrid2,Num_Cells0) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_a,dn_a,up_b,dn_b,up_c,dn_c) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); nmax = Num_Cells0*Ngrid2*Ngrid3; for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){ i = n/(Ngrid2*Ngrid3); j = (n-i*Ngrid2*Ngrid3)/Ngrid3; k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; ri = My_Cell1[i]; /* find ri1, ri2, i1, and i2 */ if (ri==0){ ri1 = Ngrid1 - 1; ri2 = 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else if (ri==(Ngrid1-1)){ ri1 = Ngrid1 - 2; ri2 = 0; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else{ ri1 = ri - 1; ri2 = ri + 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } /* because we have +-1 buffer cells. */ if (i1!=-1 && i2!=-1){ /* find j1 and j2 */ if (j==0){ j1 = Ngrid2 - 1; j2 = 1; } else if (j==(Ngrid2-1)){ j1 = Ngrid2 - 2; j2 = 0; } else{ j1 = j - 1; j2 = j + 1; } /* find k1 and k2 */ if (k==0){ k1 = Ngrid3 - 1; k2 = 1; } else if (k==(Ngrid3-1)){ k1 = Ngrid3 - 2; k2 = 0; } else{ k1 = k - 1; k2 = k + 1; } /* set MN */ MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; /* set dDen_Grid */ if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){ /* a-axis */ MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k; MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k; if (PCC_switch==0) { up_a = Density_Grid[0][MN2] - Density_Grid[0][MN1]; dn_a = Density_Grid[1][MN2] - Density_Grid[1][MN1]; } else if (PCC_switch==1) { up_a = Density_Grid[0][MN2] + PCCDensity_Grid[MN2] - Density_Grid[0][MN1] - PCCDensity_Grid[MN1]; dn_a = Density_Grid[1][MN2] + PCCDensity_Grid[MN2] - Density_Grid[1][MN1] - PCCDensity_Grid[MN1]; } /* b-axis */ MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; if (PCC_switch==0) { up_b = Density_Grid[0][MN2] - Density_Grid[0][MN1]; dn_b = Density_Grid[1][MN2] - Density_Grid[1][MN1]; } else if (PCC_switch==1) { up_b = Density_Grid[0][MN2] + PCCDensity_Grid[MN2] - Density_Grid[0][MN1] - PCCDensity_Grid[MN1]; dn_b = Density_Grid[1][MN2] + PCCDensity_Grid[MN2] - Density_Grid[1][MN1] - PCCDensity_Grid[MN1]; } /* c-axis */ MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; if (PCC_switch==0) { up_c = Density_Grid[0][MN2] - Density_Grid[0][MN1]; dn_c = Density_Grid[1][MN2] - Density_Grid[1][MN1]; } else if (PCC_switch==1) { up_c = Density_Grid[0][MN2] + PCCDensity_Grid[MN2] - Density_Grid[0][MN1] - PCCDensity_Grid[MN1]; dn_c = Density_Grid[1][MN2] + PCCDensity_Grid[MN2] - Density_Grid[1][MN1] - PCCDensity_Grid[MN1]; } /* up */ dDen_Grid[0][0][MN] = 0.5*(igtv[1][1]*up_a + igtv[1][2]*up_b + igtv[1][3]*up_c); dDen_Grid[0][1][MN] = 0.5*(igtv[2][1]*up_a + igtv[2][2]*up_b + igtv[2][3]*up_c); dDen_Grid[0][2][MN] = 0.5*(igtv[3][1]*up_a + igtv[3][2]*up_b + igtv[3][3]*up_c); /* down */ dDen_Grid[1][0][MN] = 0.5*(igtv[1][1]*dn_a + igtv[1][2]*dn_b + igtv[1][3]*dn_c); dDen_Grid[1][1][MN] = 0.5*(igtv[2][1]*dn_a + igtv[2][2]*dn_b + igtv[2][3]*dn_c); dDen_Grid[1][2][MN] = 0.5*(igtv[3][1]*dn_a + igtv[3][2]*dn_b + igtv[3][3]*dn_c); } else{ dDen_Grid[0][0][MN] = 0.0; dDen_Grid[0][1][MN] = 0.0; dDen_Grid[0][2][MN] = 0.0; dDen_Grid[1][0][MN] = 0.0; dDen_Grid[1][1][MN] = 0.0; dDen_Grid[1][2][MN] = 0.0; } } /* if (i1!=-1 && i2!=-1) */ } /* n */ #pragma omp flush(dDen_Grid) } /* #pragma omp parallel */ } /* if (XC_switch==4) */ /**************************************************** loop MN ****************************************************/ #pragma omp parallel shared(dDen_Grid,dEXC_dGD,den_min,Vxc_Grid,My_NumGrid1,XC_P_switch,XC_switch,Density_Grid,PCC_switch,PCCDensity_Grid) private(OMPID,Nthrds,Nprocs,MN,tot_den,tmp0,ED,Exc,Ec_unif,Vc_unif,Vxc,Ex_unif,Vx_unif,GDENS,DEXDD,DECDD,DEXDGD,DECDGD) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){ switch(XC_switch){ /****************************************************************** LDA (Ceperly-Alder) constructed by Ceperly and Alder, ref. D. M. Ceperley, Phys. Rev. B18, 3126 (1978) D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) and parametrized by Perdew and Zunger. ref. J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981) ******************************************************************/ case 1: tot_den = Density_Grid[0][MN] + Density_Grid[1][MN]; /* partial core correction */ if (PCC_switch==1) { tot_den += PCCDensity_Grid[MN]*2.0; } tmp0 = XC_Ceperly_Alder(tot_den,XC_P_switch); Vxc_Grid[0][MN] = tmp0; Vxc_Grid[1][MN] = tmp0; break; /****************************************************************** LSDA-CA (Ceperly-Alder) constructed by Ceperly and Alder, ref. D. M. Ceperley, Phys. Rev. B18, 3126 (1978) D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) and parametrized by Perdew and Zunger. ref. J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981) ******************************************************************/ case 2: ED[0] = Density_Grid[0][MN]; ED[1] = Density_Grid[1][MN]; /* partial core correction */ if (PCC_switch==1) { ED[0] += PCCDensity_Grid[MN]; ED[1] += PCCDensity_Grid[MN]; } XC_CA_LSDA(ED[0], ED[1], Exc, XC_P_switch); Vxc_Grid[0][MN] = Exc[0]; Vxc_Grid[1][MN] = Exc[1]; break; /****************************************************************** LSDA-PW (PW91) used as Grad\rho = 0 in their GGA formalism ref. J.P.Perdew and Yue Wang, Phys. Rev. B45, 13244 (1992) ******************************************************************/ case 3: ED[0] = Density_Grid[0][MN]; ED[1] = Density_Grid[1][MN]; /* partial core correction */ if (PCC_switch==1) { ED[0] += PCCDensity_Grid[MN]; ED[1] += PCCDensity_Grid[MN]; } if ((ED[0]+ED[1])<den_min){ Vxc_Grid[0][MN] = 0.0; Vxc_Grid[1][MN] = 0.0; } else{ if (XC_P_switch==0){ XC_PW91C(ED,Ec_unif,Vc_unif); Vxc[0] = Vc_unif[0]; Vxc[1] = Vc_unif[1]; Exc[0] = Ec_unif[0]; XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif); Vxc[0] = Vxc[0] + Vx_unif[0]; Exc[1] = 2.0*ED[0]*Ex_unif[0]; XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif); Vxc[1] += Vx_unif[0]; Exc[1] += 2.0*ED[1]*Ex_unif[0]; Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]); Vxc_Grid[0][MN] = Exc[0] + Exc[1]; Vxc_Grid[1][MN] = Exc[0] + Exc[1]; } else if (XC_P_switch==1){ XC_PW91C(ED,Ec_unif,Vc_unif); Vxc_Grid[0][MN] = Vc_unif[0]; Vxc_Grid[1][MN] = Vc_unif[1]; XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif); Vxc_Grid[0][MN] = Vxc_Grid[0][MN] + Vx_unif[0]; XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif); Vxc_Grid[1][MN] = Vxc_Grid[1][MN] + Vx_unif[0]; } else if (XC_P_switch==2){ XC_PW91C(ED,Ec_unif,Vc_unif); Vxc[0] = Vc_unif[0]; Vxc[1] = Vc_unif[1]; Exc[0] = Ec_unif[0]; XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif); Vxc[0] = Vxc[0] + Vx_unif[0]; Exc[1] = 2.0*ED[0]*Ex_unif[0]; XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif); Vxc[1] += Vx_unif[0]; Exc[1] += 2.0*ED[1]*Ex_unif[0]; Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]); Vxc_Grid[0][MN] = Exc[0] + Exc[1] - Vxc[0]; Vxc_Grid[1][MN] = Exc[0] + Exc[1] - Vxc[1]; } } break; /****************************************************************** GGA-PBE ref. J. P. Perdew, K. Burke, and M. Ernzerhof, Phys. Rev. Lett. 77, 3865 (1996). ******************************************************************/ case 4: /**************************************************** ED[0] density of up spin: n_up ED[1] density of down spin: n_down GDENS[0][0] derivative (x) of density of up spin GDENS[1][0] derivative (y) of density of up spin GDENS[2][0] derivative (z) of density of up spin GDENS[0][1] derivative (x) of density of down spin GDENS[1][1] derivative (y) of density of down spin GDENS[2][1] derivative (z) of density of down spin DEXDD[0] d(fx)/d(n_up) DEXDD[1] d(fx)/d(n_down) DECDD[0] d(fc)/d(n_up) DECDD[1] d(fc)/d(n_down) n'_up_x = d(n_up)/d(x) n'_up_y = d(n_up)/d(y) n'_up_z = d(n_up)/d(z) n'_down_x = d(n_down)/d(x) n'_down_y = d(n_down)/d(y) n'_down_z = d(n_down)/d(z) DEXDGD[0][0] d(fx)/d(n'_up_x) DEXDGD[1][0] d(fx)/d(n'_up_y) DEXDGD[2][0] d(fx)/d(n'_up_z) DEXDGD[0][1] d(fx)/d(n'_down_x) DEXDGD[1][1] d(fx)/d(n'_down_y) DEXDGD[2][1] d(fx)/d(n'_down_z) DECDGD[0][0] d(fc)/d(n'_up_x) DECDGD[1][0] d(fc)/d(n'_up_y) DECDGD[2][0] d(fc)/d(n'_up_z) DECDGD[0][1] d(fc)/d(n'_down_x) DECDGD[1][1] d(fc)/d(n'_down_y) DECDGD[2][1] d(fc)/d(n'_down_z) ****************************************************/ ED[0] = Density_Grid[0][MN]; ED[1] = Density_Grid[1][MN]; if ((ED[0]+ED[1])<den_min){ Vxc_Grid[0][MN] = 0.0; Vxc_Grid[1][MN] = 0.0; /* later add its derivatives */ if (XC_P_switch!=0){ dEXC_dGD[0][0][MN] = 0.0; dEXC_dGD[0][1][MN] = 0.0; dEXC_dGD[0][2][MN] = 0.0; dEXC_dGD[1][0][MN] = 0.0; dEXC_dGD[1][1][MN] = 0.0; dEXC_dGD[1][2][MN] = 0.0; } } else{ GDENS[0][0] = dDen_Grid[0][0][MN]; GDENS[1][0] = dDen_Grid[0][1][MN]; GDENS[2][0] = dDen_Grid[0][2][MN]; GDENS[0][1] = dDen_Grid[1][0][MN]; GDENS[1][1] = dDen_Grid[1][1][MN]; GDENS[2][1] = dDen_Grid[1][2][MN]; if (PCC_switch==1) { ED[0] += PCCDensity_Grid[MN]; ED[1] += PCCDensity_Grid[MN]; } XC_PBE(ED, GDENS, Exc, DEXDD, DECDD, DEXDGD, DECDGD); /* XC energy density */ if (XC_P_switch==0){ Vxc_Grid[0][MN] = Exc[0] + Exc[1]; Vxc_Grid[1][MN] = Exc[0] + Exc[1]; } /* XC potential */ else if (XC_P_switch==1){ Vxc_Grid[0][MN] = DEXDD[0] + DECDD[0]; Vxc_Grid[1][MN] = DEXDD[1] + DECDD[1]; } /* XC energy density - XC potential */ else if (XC_P_switch==2){ Vxc_Grid[0][MN] = Exc[0] + Exc[1] - DEXDD[0] - DECDD[0]; Vxc_Grid[1][MN] = Exc[0] + Exc[1] - DEXDD[1] - DECDD[1]; } /* later add its derivatives */ if (XC_P_switch!=0){ dEXC_dGD[0][0][MN] = DEXDGD[0][0] + DECDGD[0][0]; dEXC_dGD[0][1][MN] = DEXDGD[1][0] + DECDGD[1][0]; dEXC_dGD[0][2][MN] = DEXDGD[2][0] + DECDGD[2][0]; dEXC_dGD[1][0][MN] = DEXDGD[0][1] + DECDGD[0][1]; dEXC_dGD[1][1][MN] = DEXDGD[1][1] + DECDGD[1][1]; dEXC_dGD[1][2][MN] = DEXDGD[2][1] + DECDGD[2][1]; } } break; } /* switch(XC_switch) */ } /* MN */ #pragma omp flush(dEXC_dGD) } /* #pragma omp parallel */ /**************************************************** calculate the second part of XC potential when GGA and XC_P_switch!=0 ****************************************************/ if (XC_switch==4 && XC_P_switch!=0){ #pragma omp parallel shared(XC_P_switch,Vxc_Grid,igtv,dEXC_dGD,Density_Grid,den_min,My_Cell0,My_Cell1,Num_Cells0,Ngrid2,Ngrid3) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_x_a,up_y_a,up_z_a,dn_x_a,dn_y_a,dn_z_a,up_x_b,up_y_b,up_z_b,dn_x_b,dn_y_b,dn_z_b,up_x_c,up_y_c,up_z_c,dn_x_c,dn_y_c,dn_z_c,tmp0,tmp1) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); nmax = Num_Cells0*Ngrid2*Ngrid3; for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){ i = n/(Ngrid2*Ngrid3); j = (n-i*Ngrid2*Ngrid3)/Ngrid3; k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; ri = My_Cell1[i]; /* find ri1, ri2, i1, and i2 */ if (ri==0){ ri1 = Ngrid1 - 1; ri2 = 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else if (ri==(Ngrid1-1)){ ri1 = Ngrid1 - 2; ri2 = 0; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else{ ri1 = ri - 1; ri2 = ri + 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } if (i1!=-1 && i2!=-1){ /* find j1 and j2 */ if (j==0){ j1 = Ngrid2 - 1; j2 = 1; } else if (j==(Ngrid2-1)){ j1 = Ngrid2 - 2; j2 = 0; } else{ j1 = j - 1; j2 = j + 1; } /* find k1 and k2 */ if (k==0){ k1 = Ngrid3 - 1; k2 = 1; } else if (k==(Ngrid3-1)){ k1 = Ngrid3 - 2; k2 = 0; } else{ k1 = k - 1; k2 = k + 1; } /* set MN */ MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; /* set Vxc_Grid */ if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){ /* a-axis */ MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k; MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k; up_x_a = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1]; up_y_a = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1]; up_z_a = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1]; dn_x_a = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1]; dn_y_a = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1]; dn_z_a = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1]; /* b-axis */ MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; up_x_b = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1]; up_y_b = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1]; up_z_b = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1]; dn_x_b = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1]; dn_y_b = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1]; dn_z_b = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1]; /* c-axis */ MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; up_x_c = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1]; up_y_c = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1]; up_z_c = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1]; dn_x_c = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1]; dn_y_c = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1]; dn_z_c = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1]; /* up */ tmp0 = igtv[1][1]*up_x_a + igtv[1][2]*up_x_b + igtv[1][3]*up_x_c + igtv[2][1]*up_y_a + igtv[2][2]*up_y_b + igtv[2][3]*up_y_c + igtv[3][1]*up_z_a + igtv[3][2]*up_z_b + igtv[3][3]*up_z_c; tmp0 = 0.5*tmp0; /* down */ tmp1 = igtv[1][1]*dn_x_a + igtv[1][2]*dn_x_b + igtv[1][3]*dn_x_c + igtv[2][1]*dn_y_a + igtv[2][2]*dn_y_b + igtv[2][3]*dn_y_c + igtv[3][1]*dn_z_a + igtv[3][2]*dn_z_b + igtv[3][3]*dn_z_c; tmp1 = 0.5*tmp1; /* XC potential */ if (XC_P_switch==1){ Vxc_Grid[0][MN] -= tmp0; Vxc_Grid[1][MN] -= tmp1; } /* XC energy density - XC potential */ else if (XC_P_switch==2){ Vxc_Grid[0][MN] += tmp0; Vxc_Grid[1][MN] += tmp1; } } } } #pragma omp flush(Vxc_Grid) } /* #pragma omp parallel */ } /* if (XC_switch==4 && XC_P_switch!=0) */ /**************************************************** In case of non-collinear spin DFT ****************************************************/ if (SpinP_switch==3 && XC_P_switch!=2){ #pragma omp parallel shared(Density_Grid,Vxc_Grid,My_NumGrid1) private(OMPID,Nthrds,Nprocs,MN,tmp0,tmp1,theta,phi,sit,cot,sip,cop) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){ tmp0 = 0.5*(Vxc_Grid[0][MN] + Vxc_Grid[1][MN]); tmp1 = 0.5*(Vxc_Grid[0][MN] - Vxc_Grid[1][MN]); theta = Density_Grid[2][MN]; phi = Density_Grid[3][MN]; sit = sin(theta); cot = cos(theta); sip = sin(phi); cop = cos(phi); Vxc_Grid[0][MN] = tmp0 + cot*tmp1; /* Re Vxc11 */ Vxc_Grid[1][MN] = tmp0 - cot*tmp1; /* Re Vxc22 */ Vxc_Grid[2][MN] = tmp1*sit*cop; /* Re Vxc12 */ Vxc_Grid[3][MN] = -tmp1*sit*sip; /* Im Vxc12 */ } #pragma omp flush(Vxc_Grid) } /* #pragma omp parallel */ } /* { int hN1,hN2,hN3,i; double Re11,Re22,Re12,Im12; hN1 = Ngrid1/2; hN2 = Ngrid2/2; hN3 = Ngrid3/2; for (i=0; i<Num_Cells0; i++){ MN = i*Ngrid2*Ngrid3 + hN2*Ngrid3 + hN3; Re11 = Vxc_Grid[0][MN]; Re22 = Vxc_Grid[1][MN]; Re12 = Vxc_Grid[2][MN]; Im12 = Vxc_Grid[3][MN]; printf("MN=%4d %15.12f %15.12f %15.12f %15.12f\n", MN,Re11,Re22,Re12,Im12); } } MPI_Finalize(); exit(0); */ /**************************************************** In case of GGA, free arrays double dEXC_dGD[2][3][My_NumGrid1] double dDen_Grid[2][3][My_NumGrid1] ****************************************************/ if (XC_switch==4){ for (k=0; k<=1; k++){ for (i=0; i<3; i++){ free(dDen_Grid[k][i]); } free(dDen_Grid[k]); } free(dDen_Grid); if (XC_P_switch!=0){ for (k=0; k<=1; k++){ for (i=0; i<3; i++){ free(dEXC_dGD[k][i]); } free(dEXC_dGD[k]); } free(dEXC_dGD); } } }
void Overlap_Band(int Host_ID1, double ****OLP, dcomplex **S, int *MP, double k1, double k2, double k3) { static int firsttime=1; int i,j,k,wanA,wanB,tnoA,tnoB,Anum,Bnum; int NUM,MA_AN,GA_AN,LB_AN,GB_AN; int l1,l2,l3,Rn,n2; double **S1,**S2; double *tmp_array1,*tmp_array2; double kRn,si,co,s; int ID,myid,numprocs,tag=999; MPI_Status stat; MPI_Request request; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); MPI_Barrier(mpi_comm_level1); /* set MP */ Anum = 1; for (i=1; i<=atomnum; i++){ MP[i] = Anum; wanA = WhatSpecies[i]; tnoA = Spe_Total_CNO[wanA]; Anum = Anum + tnoA; } NUM = Anum - 1; /**************************************************** Allocation ****************************************************/ n2 = NUM + 2; S1 = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++){ S1[i] = (double*)malloc(sizeof(double)*n2); } if (firsttime) PrintMemory("Overlap_Band: S1",sizeof(double)*n2*n2,NULL); S2 = (double**)malloc(sizeof(double*)*n2); for (i=0; i<n2; i++){ S2[i] = (double*)malloc(sizeof(double)*n2); } if (firsttime) PrintMemory("Overlap_Band: S2",sizeof(double)*n2*n2,NULL); /* for PrintMemory */ firsttime=0; /**************************************************** set overlap ****************************************************/ S[0][0].r = NUM; for (i=1; i<=NUM; i++){ for (j=1; j<=NUM; j++){ S1[i][j] = 0.0; S2[i][j] = 0.0; } } for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){ GA_AN = M2G[MA_AN]; wanA = WhatSpecies[GA_AN]; tnoA = Spe_Total_CNO[wanA]; Anum = MP[GA_AN]; for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){ GB_AN = natn[GA_AN][LB_AN]; Rn = ncn[GA_AN][LB_AN]; wanB = WhatSpecies[GB_AN]; tnoB = Spe_Total_CNO[wanB]; /* kRn = k1*( (double)atv_ijk[Rn][1] + Cell_Gxyz[GB_AN][1] - Cell_Gxyz[GA_AN][1] ) + k2*( (double)atv_ijk[Rn][2] + Cell_Gxyz[GB_AN][2] - Cell_Gxyz[GA_AN][2] ) + k3*( (double)atv_ijk[Rn][3] + Cell_Gxyz[GB_AN][3] - Cell_Gxyz[GA_AN][3] ); */ l1 = atv_ijk[Rn][1]; l2 = atv_ijk[Rn][2]; l3 = atv_ijk[Rn][3]; kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3; si = sin(2.0*PI*kRn); co = cos(2.0*PI*kRn); Bnum = MP[GB_AN]; for (i=0; i<tnoA; i++){ for (j=0; j<tnoB; j++){ s = OLP[MA_AN][LB_AN][i][j]; S1[Anum+i][Bnum+j] += s*co; S2[Anum+i][Bnum+j] += s*si; } } } } /****************************************************** MPI: S1 and S2 ******************************************************/ tmp_array1 = (double*)malloc(sizeof(double)*2*n2*List_YOUSO[7]); tmp_array2 = (double*)malloc(sizeof(double)*2*n2*List_YOUSO[7]); /* S1 and S2 */ if (myid!=Host_ID1){ for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){ GA_AN = M2G[MA_AN]; wanA = WhatSpecies[GA_AN]; tnoA = Spe_Total_CNO[wanA]; Anum = MP[GA_AN]; k = 0; for (i=0; i<tnoA; i++){ for (j=0; j<n2; j++){ tmp_array1[k] = S1[Anum+i][j]; k++; } } for (i=0; i<tnoA; i++){ for (j=0; j<n2; j++){ tmp_array1[k] = S2[Anum+i][j]; k++; } } tag = 999; MPI_Isend(&tmp_array1[0], 2*tnoA*n2, MPI_DOUBLE, Host_ID1, tag, mpi_comm_level1, &request); MPI_Wait(&request,&stat); } } else{ for (GA_AN=1; GA_AN<=atomnum; GA_AN++){ wanA = WhatSpecies[GA_AN]; tnoA = Spe_Total_CNO[wanA]; Anum = MP[GA_AN]; ID = G2ID[GA_AN]; if (ID!=Host_ID1){ tag = 999; MPI_Recv(&tmp_array2[0], 2*tnoA*n2, MPI_DOUBLE, ID, tag, mpi_comm_level1, &stat); k = 0; for (i=0; i<tnoA; i++){ for (j=0; j<n2; j++){ S1[Anum+i][j] = tmp_array2[k]; k++; } } for (i=0; i<tnoA; i++){ for (j=0; j<n2; j++){ S2[Anum+i][j] = tmp_array2[k]; k++; } } } } } free(tmp_array1); free(tmp_array2); /****************************************************** Make the full complex matrix of S ******************************************************/ for (i=1; i<=NUM; i++){ for (j=1; j<=NUM; j++){ S[i][j].r = S1[i][j]; S[i][j].i = S2[i][j]; } } /**************************************************** free arrays ****************************************************/ for (i=0; i<n2; i++){ free(S2[i]); } free(S2); for (i=0; i<n2; i++){ free(S1[i]); } free(S1); }
double Set_Aden_Grid(int init_density) { /**************************************************** Densities by the atomic superposition densities on grids ****************************************************/ static int firsttime=1; int i,k,MN,ct_AN,Gc_AN,Mc_AN,top_num; int Rn,Cwan,Nc,GNc,GRc,n,size1,size2; int My_Max,Max_Size; int size_AtomDen_Grid; double time0; double x,y,z,DenA,DenPCC,dDenA,dDenPCC; double tmp0,dx,dy,dz,r,rmin=10e-14; double Nele,Nu,Nd,M,ocupcy_u,ocupcy_d; double rho,mag,magx,magy,magz,theta,phi; double TStime,TEtime; double Cxyz[4]; double S_coordinate[3]; double *tmp_array; double *tmp_array2; double **AtomDen_Grid; double **PCCDen_Grid; double *AtomDen2_Grid; double *PCCDen2_Grid; int *Snd_Size,*Rcv_Size; int numprocs,myid,tag=999,ID,IDS,IDR; double Stime_atom, Etime_atom; double Nup,Ndown,sit,cot,sip,cop; dcomplex U[2][2]; MPI_Status stat; MPI_Request request; /* for OpenMP */ int OMPID,Nthrds,Nprocs; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); dtime(&TStime); /**************************************************** allocation of arrays: int Snd_Size[numprocs] int Rcv_Size[numprocs] double AtomDen_Grid[Matomnum+MatomnumF+1] [GridN_Atom[Gc_AN]] double PCCDen_Grid[Matomnum+MatomnumF+1] [GridN_Atom[Gc_AN]] ****************************************************/ Snd_Size = (int*)malloc(sizeof(int)*numprocs); Rcv_Size = (int*)malloc(sizeof(int)*numprocs); size_AtomDen_Grid = Matomnum+MatomnumF+1; AtomDen_Grid = (double**)malloc(sizeof(double*)*(Matomnum+MatomnumF+1)); AtomDen_Grid[0] = (double*)malloc(sizeof(double)*1); for (Mc_AN=1; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){ Gc_AN = F_M2G[Mc_AN]; AtomDen_Grid[Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]); size_AtomDen_Grid += GridN_Atom[Gc_AN]; } PCCDen_Grid = (double**)malloc(sizeof(double*)*(Matomnum+MatomnumF+1)); PCCDen_Grid[0] = (double*)malloc(sizeof(double)*1); for (Mc_AN=1; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){ Gc_AN = F_M2G[Mc_AN]; PCCDen_Grid[Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]); } /* PrintMemory */ if (firsttime==1){ PrintMemory("Set_Aden_Grid: AtomDen_Grid", sizeof(double)*size_AtomDen_Grid, NULL); PrintMemory("Set_Aden_Grid: PCCDen_Grid", sizeof(double)*size_AtomDen_Grid, NULL); firsttime = 0; } /****************************************************** setting of AtomDen_Grid ******************************************************/ /************************************** for spin non-collinear 1. set rho, mx, my, mz 2. calculate theta and phi 3. n_up = (rho+m)/2 n_dn = (rho-m)/2 **************************************/ if (SpinP_switch==3){ for (MN=0; MN<My_NumGrid1; MN++){ Density_Grid[0][MN] = 0.0; Density_Grid[1][MN] = 0.0; Density_Grid[2][MN] = 0.0; Density_Grid[3][MN] = 0.0; ADensity_Grid[MN] = 0.0; } } /* spin collinear */ else{ for (MN=0; MN<My_NumGrid1; MN++){ Density_Grid[0][MN] = 0.0; Density_Grid[1][MN] = 0.0; ADensity_Grid[MN] = 0.0; } } /* PCC */ if (PCC_switch==1) { for (MN=0; MN<My_NumGrid1; MN++){ PCCDensity_Grid[MN] = 0.0; } } for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ dtime(&Stime_atom); Gc_AN = M2G[Mc_AN]; Cwan = WhatSpecies[Gc_AN]; #pragma omp parallel shared(PCCDen_Grid,PCC_switch,AtomDen_Grid,Cwan,Gxyz,atv,Mc_AN,CellListAtom,GridListAtom,GridN_Atom,Gc_AN) private(OMPID,Nthrds,Nprocs,Nc,GNc,GRc,Cxyz,dx,dy,dz,r,DenA) { /* get info. on OpenMP */ OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (Nc=OMPID*GridN_Atom[Gc_AN]/Nthrds; Nc<(OMPID+1)*GridN_Atom[Gc_AN]/Nthrds; Nc++){ GNc = GridListAtom[Mc_AN][Nc]; GRc = CellListAtom[Mc_AN][Nc]; Get_Grid_XYZ(GNc,Cxyz); dx = Cxyz[1] + atv[GRc][1] - Gxyz[Gc_AN][1]; dy = Cxyz[2] + atv[GRc][2] - Gxyz[Gc_AN][2]; dz = Cxyz[3] + atv[GRc][3] - Gxyz[Gc_AN][3]; r = sqrt(dx*dx + dy*dy + dz*dz); /* AtomicDenF */ DenA = AtomicDenF(Cwan,r); AtomDen_Grid[Mc_AN][Nc] = DenA; /* partial core correction */ if (PCC_switch==1) { PCCDen_Grid[Mc_AN][Nc] = AtomicPCCF(Cwan,r); } } } /* #pragma omp parallel */ dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } /****************************************************** MPI: AtomDen_Grid ******************************************************/ /* find data size for sending and recieving */ tag = 999; My_Max = -10000; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /* sending size */ if (F_Snd_Num[IDS]!=0){ /* find data size */ size1 = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Gc_AN = Snd_GAN[IDS][n]; size1 += GridN_Atom[Gc_AN]; } Snd_Size[IDS] = size1; MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request); } else{ Snd_Size[IDS] = 0; } /* receiving size */ if (F_Rcv_Num[IDR]!=0){ MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat); Rcv_Size[IDR] = size2; } else{ Rcv_Size[IDR] = 0; } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } else{ Snd_Size[IDS] = 0; Rcv_Size[IDR] = 0; } if (My_Max<Snd_Size[IDS]) My_Max = Snd_Size[IDS]; if (My_Max<Rcv_Size[IDR]) My_Max = Rcv_Size[IDR]; } MPI_Allreduce(&My_Max, &Max_Size, 1, MPI_INT, MPI_MAX, mpi_comm_level1); tmp_array = (double*)malloc(sizeof(double)*Max_Size); tmp_array2 = (double*)malloc(sizeof(double)*Max_Size); /* send and recieve AtomDen_Grid */ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (F_Snd_Num[IDS]!=0){ /* find data size */ size1 = Snd_Size[IDS]; /* multidimentional array to vector array */ k = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; for (i=0; i<GridN_Atom[Gc_AN]; i++){ tmp_array[k] = AtomDen_Grid[Mc_AN][i]; k++; } } /* MPI_Isend */ MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of block data *****************************/ if (F_Rcv_Num[IDR]!=0){ /* find data size */ size2 = Rcv_Size[IDR]; /* MPI_Recv */ MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); k = 0; Mc_AN = F_TopMAN[IDR] - 1; for (n=0; n<F_Rcv_Num[IDR]; n++){ Mc_AN++; Gc_AN = Rcv_GAN[IDR][n]; for (i=0; i<GridN_Atom[Gc_AN]; i++){ AtomDen_Grid[Mc_AN][i] = tmp_array2[k]; k++; } } } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } } /****************************************************** MPI: PCCDen_Grid ******************************************************/ /* send and receive PCCDen_Grid */ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (F_Snd_Num[IDS]!=0){ /* find data size */ size1 = Snd_Size[IDS]; /* multidimentional array to vector array */ k = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; for (i=0; i<GridN_Atom[Gc_AN]; i++){ tmp_array[k] = PCCDen_Grid[Mc_AN][i]; k++; } } /* MPI_Isend */ MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of block data *****************************/ if (F_Rcv_Num[IDR]!=0){ /* find data size */ size2 = Rcv_Size[IDR]; /* MPI_Recv */ MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); k = 0; Mc_AN = F_TopMAN[IDR] - 1; for (n=0; n<F_Rcv_Num[IDR]; n++){ Mc_AN++; Gc_AN = Rcv_GAN[IDR][n]; for (i=0; i<GridN_Atom[Gc_AN]; i++){ PCCDen_Grid[Mc_AN][i] = tmp_array2[k]; k++; } } } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } } /**************************************************** freeing of arrays: tmp_array tmp_array2 ****************************************************/ free(tmp_array); free(tmp_array2); /****************************************************** superposition of atomic densities ******************************************************/ for (Mc_AN=1; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){ dtime(&Stime_atom); Gc_AN = F_M2G[Mc_AN]; Cwan = WhatSpecies[Gc_AN]; Nele = fabs(InitN_USpin[Gc_AN] + InitN_DSpin[Gc_AN]); Nu = InitN_USpin[Gc_AN]; Nd = InitN_DSpin[Gc_AN]; if (1.0e-15<Nele){ ocupcy_u = Nu/Nele; ocupcy_d = Nd/Nele; } else{ ocupcy_u = 0.0; ocupcy_d = 0.0; } if (2<=level_stdout){ printf(" Mc_AN=%3d Gc_AN=%3d ocupcy_u=%15.12f ocupcy_d=%15.12f\n", Mc_AN,Gc_AN,ocupcy_u,ocupcy_d); } for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ MN = MGridListAtom[Mc_AN][Nc]; if (0<=MN){ DenA = AtomDen_Grid[Mc_AN][Nc]; /* spin collinear */ if ( init_density==1 && SpinP_switch==1 ){ Density_Grid[0][MN] += ocupcy_u*DenA; Density_Grid[1][MN] += ocupcy_d*DenA; } /* spin non-collinear */ else if ( init_density==1 && SpinP_switch==3 ){ theta = Angle0_Spin[Gc_AN]; phi = Angle1_Spin[Gc_AN]; rho = DenA; mag = (ocupcy_u - ocupcy_d)*DenA; magx = mag*sin(theta)*cos(phi); magy = mag*sin(theta)*sin(phi); magz = mag*cos(theta); Density_Grid[0][MN] += rho; Density_Grid[1][MN] += magx; Density_Grid[2][MN] += magy; Density_Grid[3][MN] += magz; } else if (init_density==1){ Density_Grid[0][MN] += 0.5*DenA; } else{ ADensity_Grid[MN] += DenA; } /* partial core correction */ if (PCC_switch==1) { DenPCC = PCCDen_Grid[Mc_AN][Nc]; /* later add this in Set_XC_Grid */ PCCDensity_Grid[MN] += 0.5*DenPCC; } } } dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } /****************************************************** atomic densities (AtomDen2_Grid) in terms of FNAN2 ******************************************************/ AtomDen2_Grid = (double*)malloc(sizeof(double)*FNAN2_Grid); /* MPI */ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (Num_Snd_FNAN2_Grid[IDS]!=0){ tmp_array = (double*)malloc(sizeof(double)*Num_Snd_FNAN2_Grid[IDS]); /* vector array */ for (i=0; i<Num_Snd_FNAN2_Grid[IDS]; i++){ Gc_AN = Snd_FNAN2_At[IDS][i]; Mc_AN = F_G2M[Gc_AN]; Nc = Snd_FNAN2_Nc[IDS][i]; tmp_array[i] = AtomDen_Grid[Mc_AN][Nc]; } /* MPI_Isend */ MPI_Isend(&tmp_array[0], Num_Snd_FNAN2_Grid[IDS], MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of data *****************************/ if (Num_Rcv_FNAN2_Grid[IDR]!=0){ top_num = TopMAN2_Grid[IDR]; /* MPI_Recv */ MPI_Recv(&AtomDen2_Grid[top_num], Num_Rcv_FNAN2_Grid[IDR], MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); } if (Num_Snd_FNAN2_Grid[IDS]!=0){ MPI_Wait(&request,&stat); free(tmp_array); } } } /* Density_Grid += AtomDen2_Grid */ for (i=0; i<FNAN2_Grid; i++){ DenA = AtomDen2_Grid[i]; MN = Rcv_FNAN2_MN[i]; Gc_AN = Rcv_FNAN2_GA[i]; Nele = InitN_USpin[Gc_AN] + InitN_DSpin[Gc_AN]; Nu = InitN_USpin[Gc_AN]; Nd = InitN_DSpin[Gc_AN]; if (1.0e-13<Nele){ ocupcy_u = Nu/Nele; ocupcy_d = Nd/Nele; } else{ ocupcy_u = 0.0; ocupcy_d = 0.0; } /* spin collinear */ if ( init_density==1 && SpinP_switch==1 ){ Density_Grid[0][MN] += ocupcy_u*DenA; Density_Grid[1][MN] += ocupcy_d*DenA; } /* spin non-collinear */ else if ( init_density==1 && SpinP_switch==3 ){ theta = Angle0_Spin[Gc_AN]; phi = Angle1_Spin[Gc_AN]; rho = DenA; mag = (ocupcy_u - ocupcy_d)*DenA; magx = mag*sin(theta)*cos(phi); magy = mag*sin(theta)*sin(phi); magz = mag*cos(theta); Density_Grid[0][MN] += rho; Density_Grid[1][MN] += magx; Density_Grid[2][MN] += magy; Density_Grid[3][MN] += magz; } else if (init_density==1){ Density_Grid[0][MN] += 0.5*DenA; } } /****************************************************** pcc densities (PCCDen2_Grid) in terms of FNAN2 ******************************************************/ PCCDen2_Grid = (double*)malloc(sizeof(double)*FNAN2_Grid); if (PCC_switch==1) { /* MPI */ tag = 999; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /***************************** sending of data *****************************/ if (Num_Snd_FNAN2_Grid[IDS]!=0){ tmp_array = (double*)malloc(sizeof(double)*Num_Snd_FNAN2_Grid[IDS]); /* vector array */ for (i=0; i<Num_Snd_FNAN2_Grid[IDS]; i++){ Gc_AN = Snd_FNAN2_At[IDS][i]; Mc_AN = F_G2M[Gc_AN]; Nc = Snd_FNAN2_Nc[IDS][i]; tmp_array[i] = PCCDen_Grid[Mc_AN][Nc]; } /* MPI_Isend */ MPI_Isend(&tmp_array[0], Num_Snd_FNAN2_Grid[IDS], MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /***************************** receiving of data *****************************/ if (Num_Rcv_FNAN2_Grid[IDR]!=0){ top_num = TopMAN2_Grid[IDR]; /* MPI_Recv */ MPI_Recv(&PCCDen2_Grid[top_num], Num_Rcv_FNAN2_Grid[IDR], MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); } if (Num_Snd_FNAN2_Grid[IDS]!=0){ MPI_Wait(&request,&stat); free(tmp_array); } } } /* PCCDensity_Grid += PCCDen2_Grid */ for (i=0; i<FNAN2_Grid; i++){ MN = Rcv_FNAN2_MN[i]; PCCDensity_Grid[MN] += 0.5*PCCDen2_Grid[i]; } } /**************************************************** initialize diagonal and off-diagonal densities in case of spin non-collinear DFT ****************************************************/ if (init_density==1 && SpinP_switch==3){ for (MN=0; MN<My_NumGrid1; MN++){ rho = Density_Grid[0][MN]; magx = Density_Grid[1][MN]; magy = Density_Grid[2][MN]; magz = Density_Grid[3][MN]; Density_Grid[0][MN] = 0.5*(rho + magz); Density_Grid[1][MN] = 0.5*(rho - magz); Density_Grid[2][MN] = 0.5*magx; Density_Grid[3][MN] =-0.5*magy; } } /****************************************************** Density_Grid to ADensity_Grid ******************************************************/ if ( init_density==1 && (SpinP_switch==1 || SpinP_switch==3) ){ for (MN=0; MN<My_NumGrid1; MN++){ ADensity_Grid[MN] = Density_Grid[0][MN] + Density_Grid[1][MN]; } } else if (init_density==1){ for (MN=0; MN<My_NumGrid1; MN++){ ADensity_Grid[MN] = 2.0*Density_Grid[0][MN]; } } /**************************************************** in case of non-spin polarization up-spin to down-spin ****************************************************/ if (init_density==1 && SpinP_switch==0){ for (MN=0; MN<My_NumGrid1; MN++){ Density_Grid[1][MN] = Density_Grid[0][MN]; } } /**************************************************** freeing of arrays: Snd_Size Rcv_Size AtomDen_Grid PCCDen_Grid ****************************************************/ free(Snd_Size); free(Rcv_Size); for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){ free(AtomDen_Grid[Mc_AN]); } free(AtomDen_Grid); for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){ free(PCCDen_Grid[Mc_AN]); } free(PCCDen_Grid); free(AtomDen2_Grid); free(PCCDen2_Grid); /* elapsed time */ dtime(&TEtime); time0 = TEtime - TStime; return time0; }
void Make_VNA_Grid() { static int firsttime=1; unsigned long long int n2D,N2D,GNc,GN; int i,Mc_AN,Gc_AN,BN,CN,LN,GRc,N3[4]; int AN,Nc,MN,Cwan,NN_S,NN_R; int size_AtomVNA_Grid; int size_AtomVNA_Snd_Grid_A2B; int size_AtomVNA_Rcv_Grid_A2B; double Cxyz[4]; double r,dx,dy,dz; double **AtomVNA_Grid; double **AtomVNA_Snd_Grid_A2B; double **AtomVNA_Rcv_Grid_A2B; double Stime_atom, Etime_atom; int numprocs,myid,tag=999,ID,IDS,IDR; int OMPID,Nthrds,Nprocs; MPI_Status stat; MPI_Request request; MPI_Status *stat_send; MPI_Status *stat_recv; MPI_Request *request_send; MPI_Request *request_recv; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); /* allocation of arrays */ size_AtomVNA_Grid = 1; AtomVNA_Grid = (double**)malloc(sizeof(double*)*(Matomnum+1)); AtomVNA_Grid[0] = (double*)malloc(sizeof(double)*1); for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = F_M2G[Mc_AN]; AtomVNA_Grid[Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]); size_AtomVNA_Grid += GridN_Atom[Gc_AN]; } size_AtomVNA_Snd_Grid_A2B = 0; AtomVNA_Snd_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs); for (ID=0; ID<numprocs; ID++){ AtomVNA_Snd_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Snd_Grid_A2B[ID]); size_AtomVNA_Snd_Grid_A2B += Num_Snd_Grid_A2B[ID]; } size_AtomVNA_Rcv_Grid_A2B = 0; AtomVNA_Rcv_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs); for (ID=0; ID<numprocs; ID++){ AtomVNA_Rcv_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Rcv_Grid_A2B[ID]); size_AtomVNA_Rcv_Grid_A2B += Num_Rcv_Grid_A2B[ID]; } /* PrintMemory */ if (firsttime) { PrintMemory("Set_Vpot: AtomVNA_Grid",sizeof(double)*size_AtomVNA_Grid,NULL); PrintMemory("Set_Vpot: AtomVNA_Snd_Grid_A2B",sizeof(double)*size_AtomVNA_Snd_Grid_A2B,NULL); PrintMemory("Set_Vpot: AtomVNA_Rcv_Grid_A2B",sizeof(double)*size_AtomVNA_Rcv_Grid_A2B,NULL); } /* calculation of AtomVNA_Grid */ for (MN=0; MN<My_NumGridC; MN++) VNA_Grid[MN] = 0.0; for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ dtime(&Stime_atom); Gc_AN = M2G[Mc_AN]; Cwan = WhatSpecies[Gc_AN]; #pragma omp parallel shared(AtomVNA_Grid,GridN_Atom,atv,Gxyz,Gc_AN,Cwan,Mc_AN,GridListAtom,CellListAtom) private(OMPID,Nthrds,Nprocs,Nc,GNc,GRc,Cxyz,dx,dy,dz,r) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (Nc=OMPID*GridN_Atom[Gc_AN]/Nthrds; Nc<(OMPID+1)*GridN_Atom[Gc_AN]/Nthrds; Nc++){ GNc = GridListAtom[Mc_AN][Nc]; GRc = CellListAtom[Mc_AN][Nc]; Get_Grid_XYZ(GNc,Cxyz); dx = Cxyz[1] + atv[GRc][1] - Gxyz[Gc_AN][1]; dy = Cxyz[2] + atv[GRc][2] - Gxyz[Gc_AN][2]; dz = Cxyz[3] + atv[GRc][3] - Gxyz[Gc_AN][3]; r = sqrt(dx*dx + dy*dy + dz*dz); AtomVNA_Grid[Mc_AN][Nc] = VNAF(Cwan,r); } #pragma omp flush(AtomVNA_Grid) } /* #pragma omp parallel */ dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } /* Mc_AN */ /****************************************************** MPI communication from the partitions A to B ******************************************************/ /* copy AtomVNA_Grid to AtomVNA_Snd_Grid_A2B */ for (ID=0; ID<numprocs; ID++) Num_Snd_Grid_A2B[ID] = 0; N2D = Ngrid1*Ngrid2; for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; for (AN=0; AN<GridN_Atom[Gc_AN]; AN++){ GN = GridListAtom[Mc_AN][AN]; GN2N(GN,N3); n2D = N3[1]*Ngrid2 + N3[2]; ID = (int)(n2D*(unsigned long long int)numprocs/N2D); AtomVNA_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]] = AtomVNA_Grid[Mc_AN][AN]; Num_Snd_Grid_A2B[ID]++; } } /* MPI: A to B */ request_send = malloc(sizeof(MPI_Request)*NN_A2B_S); request_recv = malloc(sizeof(MPI_Request)*NN_A2B_R); stat_send = malloc(sizeof(MPI_Status)*NN_A2B_S); stat_recv = malloc(sizeof(MPI_Status)*NN_A2B_R); NN_S = 0; NN_R = 0; tag = 999; for (ID=1; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (Num_Snd_Grid_A2B[IDS]!=0){ MPI_Isend(&AtomVNA_Snd_Grid_A2B[IDS][0], Num_Snd_Grid_A2B[IDS], MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]); NN_S++; } if (Num_Rcv_Grid_A2B[IDR]!=0){ MPI_Irecv( &AtomVNA_Rcv_Grid_A2B[IDR][0], Num_Rcv_Grid_A2B[IDR], MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]); NN_R++; } } if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send); if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv); free(request_send); free(request_recv); free(stat_send); free(stat_recv); /* for myid */ for (i=0; i<Num_Rcv_Grid_A2B[myid]; i++){ AtomVNA_Rcv_Grid_A2B[myid][i] = AtomVNA_Snd_Grid_A2B[myid][i]; } /****************************************************** superposition of VNA in the partition B ******************************************************/ /* initialize VNA_Grid_B */ for (BN=0; BN<My_NumGridB_AB; BN++) VNA_Grid_B[BN] = 0.0; /* superposition of VNA */ for (ID=0; ID<numprocs; ID++){ for (LN=0; LN<Num_Rcv_Grid_A2B[ID]; LN++){ BN = Index_Rcv_Grid_A2B[ID][3*LN+0]; VNA_Grid_B[BN] += AtomVNA_Rcv_Grid_A2B[ID][LN]; } /* LN */ } /* ID */ /****************************************************** MPI: from the partitions B to C ******************************************************/ Data_Grid_Copy_B2C_1( VNA_Grid_B, VNA_Grid ); /* freeing of arrays */ for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){ free(AtomVNA_Grid[Mc_AN]); } free(AtomVNA_Grid); for (ID=0; ID<numprocs; ID++){ free(AtomVNA_Snd_Grid_A2B[ID]); } free(AtomVNA_Snd_Grid_A2B); for (ID=0; ID<numprocs; ID++){ free(AtomVNA_Rcv_Grid_A2B[ID]); } free(AtomVNA_Rcv_Grid_A2B); }
int main(void){ ListOfProcesses *LReady; ListOfProcesses *LBlock; ListOfProcesses *LFinished; Memory *Mmemory; FILE *pEntry; FILE *pOut; int i; int counter = 0; int retRunning; int TamReady = 0; tempoTotal = 0; pOut = fopen("OutputFIRST.txt", "w"); if (pOut == NULL){ printf("Erro ao tentar abrir o arquivo!\n"); exit(1); } pEntry = fopen("teste.txt", "r"); if (pEntry == NULL){ printf("Erro ao tentar abrir o arquivo!\n"); exit(1); } LReady = FileReader(pEntry, &TamReady); Mmemory = MemoryCreator(); LBlock = BlockCreator(TamReady); LFinished = FinishedCreator(TamReady); Mmemory->LMemory->quantity = 0; for (i = 0; i < LReady->quantity; i++){ PrintProcess(&(LReady->proc[i]), pOut); } while ((Mmemory->LMemory->quantity + LReady->quantity + LBlock->quantity) != 0){ while ((CheckMemory(Mmemory) >= LReady->proc[0].Memory) && (LReady->proc[0].order != -1)){ ReadyToMemory(LReady, Mmemory); OrganizeList(LReady); fprintf(pOut, "\n-----------------------Mapa de Alocação de Memória-----------------------\n"); PrintMemory(Mmemory, pOut); fprintf(pOut, "-----------------------Fim Mapa de Alocação de Memória-----------------------\n"); } counter++; retRunning = RunningProcess(Mmemory, LBlock, pOut); BlockToReady(LReady, LBlock); ProcessToEverywhere(Mmemory, LBlock, LReady, LFinished, retRunning); PrintMemory(Mmemory, pOut); fprintf(pOut, "\n-----------------------Processos na Fila de Prontos-----------------------\n"); PrintListOfProcesses(LReady, pOut); fprintf(pOut, "\n-----------------------Processos em IO-----------------------\n"); PrintListOfProcesses(LBlock, pOut); fprintf(pOut, "\n-----------------------Processos Já Terminados-----------------------\n"); PrintListOfProcesses(LFinished, pOut); if ((Mmemory->LMemory->quantity == 0) && (LReady->quantity == 0) && (LBlock->quantity != 0)){ for (i = 0; i < LBlock->quantity; i++){ while (LBlock->proc[i].TimeIO[0] != 0){ ExecutaBloqueados(LBlock); Mmemory->LMemory->quantity = 1; } LBlock->proc[i].order = -1; LBlock->quantity = LBlock->quantity - 1; } } } printf("Tempo Decorrido: %ds\n", tempoTotal); fprintf(pOut, "Tempo Total Decorrido: %d\n", tempoTotal); printf("Counter: %d\n", counter); fprintf(pOut, "Numero de iterações do while: %d\n", counter); free(LReady->proc); free(LBlock->proc); free(LFinished->proc); free(LReady); free(LBlock); free(LFinished); free(Mmemory->LMemory->proc); free(Mmemory->LMemory); free(Mmemory); fclose(pEntry); fclose(pOut); return 0; }
void CCommandProcessor::ParseCommand ( const char * const cmdBegin, const uint64_t currentTime ) { const char * const cmdEnd = SkipCharsNotInSet( cmdBegin, SPACE_AND_TAB ); assert( cmdBegin != cmdEnd ); const char * const paramBegin = SkipCharsInSet( cmdEnd, SPACE_AND_TAB ); bool extraParamsFound = false; if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_QUESTION_MARK, true, false, &extraParamsFound ) || IsCmd( cmdBegin, cmdEnd, CMDNAME_HELP, false, false, &extraParamsFound ) ) { PrintStr( "This console is similar to the Bus Pirate console." EOL ); PrintStr( "Commands longer than 1 character are case insensitive." EOL ); PrintStr( "WARNING: If a command takes too long to run, the watchdog may reset the board." EOL ); PrintStr( "Commands are:" EOL ); Printf( " %s, %s: Show this help text." EOL, CMDNAME_QUESTION_MARK, CMDNAME_HELP ); Printf( " %s: Show version information." EOL, CMDNAME_I ); Printf( " %s: Test USB transfer speed." EOL, CMDNAME_USBSPEEDTEST ); Printf( " %s: Show JTAG pin status (read as inputs)." EOL, CMDNAME_JTAGPINS ); Printf( " %s: Test JTAG shift speed. WARNING: Do NOT connect any JTAG device." EOL, CMDNAME_JTAGSHIFTSPEEDTEST ); Printf( " %s: Exercises malloc()." EOL, CMDNAME_MALLOCTEST ); Printf( " %s: Exercises C++ exceptions." EOL, CMDNAME_CPP_EXCEPTION_TEST ); Printf( " %s: Shows memory usage." EOL, CMDNAME_MEMORY_USAGE ); Printf( " %s" EOL, CMDNAME_CPU_LOAD ); Printf( " %s" EOL, CMDNAME_UPTIME ); Printf( " %s" EOL, CMDNAME_RESET ); Printf( " %s" EOL, CMDNAME_RESET_CAUSE ); Printf( " %s <addr> <byte count>" EOL, CMDNAME_PRINT_MEMORY ); Printf( " %s <milliseconds>" EOL, CMDNAME_BUSY_WAIT ); Printf( " %s <command|protocol>" EOL, CMDNAME_SIMULATE_ERROR ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_I, true, false, &extraParamsFound ) ) { #ifndef NDEBUG const char buildType[] = "Debug build"; #else const char buildType[] = "Release build"; #endif Printf( "JtagDue %s" EOL, PACKAGE_VERSION ); Printf( "%s, compiler version %s" EOL, buildType, __VERSION__ ); Printf( "Watchdog %s" EOL, ENABLE_WDT ? "enabled" : "disabled" ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_RESET, false, false, &extraParamsFound ) ) { // This message does not reach the other side, we would need to add some delay. // UsbPrint( txBuffer, "Resetting the board..." EOL ); __disable_irq(); // Note that this message always goes to the serial port console, // even if the user is connected over USB. It might be possible to send // it over USB and then wait for the outgoing buffer to be empty. SerialSyncWriteStr( "Resetting the board..." EOL ); SerialWaitForDataSent(); ResetBoard( ENABLE_WDT ); assert( false ); // We should never reach this point. return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_CPU_LOAD, false, false, &extraParamsFound ) ) { if ( ENABLE_CPU_SLEEP ) PrintStr( "CPU load statistics not available." EOL ); else DisplayCpuLoad(); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_UPTIME, false, false, &extraParamsFound ) ) { char buffer[ CONVERT_TO_DEC_BUF_SIZE ]; Printf( "Uptime: %s seconds." EOL, convert_unsigned_to_dec_th( GetUptime() / 1000, buffer, ',' ) ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_RESET_CAUSE, false, false, &extraParamsFound ) ) { DisplayResetCause(); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_PRINT_MEMORY, false, true, &extraParamsFound ) ) { PrintMemory( paramBegin ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_BUSY_WAIT, false, true, &extraParamsFound ) ) { BusyWait( paramBegin ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_USBSPEEDTEST, false, true, &extraParamsFound ) ) { ProcessUsbSpeedTestCmd( paramBegin, currentTime ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_JTAGPINS, false, false, &extraParamsFound ) ) { PrintJtagPinStatus(); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_JTAGSHIFTSPEEDTEST, false, false, &extraParamsFound ) ) { if ( !IsNativeUsbPort() ) throw std::runtime_error( "This command is only available on the 'Native' USB port." ); // Fill the Rx buffer with some test data. assert( m_rxBuffer != NULL ); m_rxBuffer->Reset(); for ( uint32_t i = 0; !m_rxBuffer->IsFull(); ++i ) { m_rxBuffer->WriteElem( CUsbRxBuffer::ElemType( i ) ); } // If the mode is set to MODE_HIZ, you cannot see the generated signal with the oscilloscope. // Note also that the built-in pull-ups on the Atmel ATSAM3X8 are too weak (between 50 and 100 KOhm, // yields too slow a rising time) to be of any use. const bool oldPullUps = GetJtagPullups(); SetJtagPullups( false ); const JtagPinModeEnum oldMode = GetJtagPinMode(); SetJtagPinMode ( MODE_JTAG ); // Each JTAG transfer needs 2 bits in the Rx buffer, TMS and TDI, // but produces only 1 bit, TDO. const uint32_t jtagByteCount = m_rxBuffer->GetElemCount() / 2; const uint16_t bitCount = jtagByteCount * 8; // Shift all JTAG data through several times. const uint64_t startTime = GetUptime(); const uint32_t iterCount = 50; for ( uint32_t i = 0; i < iterCount; ++i ) { // We hope that this will not clear the buffer contents. assert( m_rxBuffer != NULL ); assert( m_txBuffer != NULL ); m_rxBuffer->Reset(); m_rxBuffer->CommitWrittenElements( jtagByteCount * 2 ); m_txBuffer->Reset(); ShiftJtagData( m_rxBuffer, m_txBuffer, bitCount ); assert( m_txBuffer->GetElemCount() == jtagByteCount ); } const uint64_t finishTime = GetUptime(); const uint32_t elapsedTime = uint32_t( finishTime - startTime ); m_rxBuffer->Reset(); m_txBuffer->Reset(); const unsigned kBitsPerSec = unsigned( uint64_t(bitCount) * iterCount * 1000 / elapsedTime / 1024 ); SetJtagPinMode( oldMode ); SetJtagPullups( oldPullUps ); // I am getting 221 KiB/s with GCC 4.7.3 and optimisation level "-O3". Printf( EOL "Finished JTAG shift speed test, throughput %u Kbits/s (%u KiB/s)." EOL, kBitsPerSec, kBitsPerSec / 8 ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_MALLOCTEST, false, false, &extraParamsFound ) ) { PrintStr( "Allocalling memory..." EOL ); volatile uint32_t * const volatile mallocTest = (volatile uint32_t *) malloc(123); *mallocTest = 123; PrintStr( "Releasing memory..." EOL ); free( const_cast< uint32_t * >( mallocTest ) ); PrintStr( "Test finished." EOL ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_CPP_EXCEPTION_TEST, false, false, &extraParamsFound ) ) { try { PrintStr( "Throwing integer exception..." EOL ); throw 123; PrintStr( "Throw did not work." EOL ); assert( false ); } catch ( ... ) { PrintStr( "Caught integer exception." EOL ); } PrintStr( "Test finished." EOL ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_SIMULATE_ERROR, false, true, &extraParamsFound ) ) { SimulateError( paramBegin ); return; } if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_MEMORY_USAGE, false, false, &extraParamsFound ) ) { const unsigned heapSize = unsigned( GetHeapEndAddr() - uintptr_t( &_end ) ); Printf( "Partitions: malloc heap: %u bytes, free: %u bytes, stack: %u bytes." EOL, heapSize, GetStackStartAddr() - GetHeapEndAddr(), STACK_SIZE ); Printf( "Used stack (estimated): %u from %u bytes." EOL, unsigned( GetStackSizeUsageEstimate() ), STACK_SIZE ); const struct mallinfo mi = mallinfo(); const unsigned heapSizeAccordingToNewlib = unsigned( mi.arena ); Printf( "Heap: %u allocated from %u bytes." EOL, unsigned( mi.uordblks ), unsigned( mi.arena ) ); assert( heapSize == heapSizeAccordingToNewlib ); UNUSED_IN_RELEASE( heapSizeAccordingToNewlib ); return; } if ( extraParamsFound ) Printf( "Command \"%.*s\" does not take any parameters." EOL, cmdEnd - cmdBegin, cmdBegin ); else Printf( "Unknown command \"%.*s\"." EOL, cmdEnd - cmdBegin, cmdBegin ); }
void Data_Grid_Copy_B2C_1(double *data_B, double *data_C) { static int firsttime=1; int CN,BN,LN,spin,i,gp,NN_S,NN_R; double *Work_Array_Snd_Grid_B2C; double *Work_Array_Rcv_Grid_B2C; int numprocs,myid,tag=999,ID,IDS,IDR; MPI_Status stat; MPI_Request request; MPI_Status *stat_send; MPI_Status *stat_recv; MPI_Request *request_send; MPI_Request *request_recv; MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); /* allocation of arrays */ Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]); Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]); if (firsttime==1){ PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Snd_Grid_B2C", sizeof(double)*GP_B2C_S[NN_B2C_S], NULL); PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Rcv_Grid_B2C", sizeof(double)*GP_B2C_R[NN_B2C_R], NULL); firsttime = 0; } /****************************************************** MPI: from the partitions B to C ******************************************************/ request_send = malloc(sizeof(MPI_Request)*NN_B2C_S); request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R); stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S); stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R); NN_S = 0; NN_R = 0; /* MPI_Irecv */ for (ID=0; ID<NN_B2C_R; ID++){ IDR = ID_NN_B2C_R[ID]; gp = GP_B2C_R[ID]; if (IDR!=myid){ MPI_Irecv( &Work_Array_Rcv_Grid_B2C[gp], Num_Rcv_Grid_B2C[IDR], MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]); NN_R++; } } /* MPI_Isend */ for (ID=0; ID<NN_B2C_S; ID++){ IDS = ID_NN_B2C_S[ID]; gp = GP_B2C_S[ID]; /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */ for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){ BN = Index_Snd_Grid_B2C[IDS][LN]; Work_Array_Snd_Grid_B2C[gp+LN] = data_B[BN]; } if (IDS!=myid){ MPI_Isend( &Work_Array_Snd_Grid_B2C[gp], Num_Snd_Grid_B2C[IDS], MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]); NN_S++; } } /* MPI_Waitall */ if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send); if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv); free(request_send); free(request_recv); free(stat_send); free(stat_recv); /* copy Work_Array_Rcv_Grid_B2C to data_C */ for (ID=0; ID<NN_B2C_R; ID++){ IDR = ID_NN_B2C_R[ID]; if (IDR==myid){ gp = GP_B2C_S[ID]; for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){ CN = Index_Rcv_Grid_B2C[IDR][LN]; data_C[CN] = Work_Array_Snd_Grid_B2C[gp+LN]; } } else{ gp = GP_B2C_R[ID]; for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){ CN = Index_Rcv_Grid_B2C[IDR][LN]; data_C[CN] = Work_Array_Rcv_Grid_B2C[gp+LN]; } } } /* freeing of arrays */ free(Work_Array_Snd_Grid_B2C); free(Work_Array_Rcv_Grid_B2C); }
void Data_Grid_Copy_B2C_2(double **data_B, double **data_C) { static int firsttime=1; int CN,BN,LN,spin,i,gp,NN_S,NN_R; double *Work_Array_Snd_Grid_B2C; double *Work_Array_Rcv_Grid_B2C; int numprocs,myid,tag=999,ID,IDS,IDR; MPI_Status stat; MPI_Request request; MPI_Status *stat_send; MPI_Status *stat_recv; MPI_Request *request_send; MPI_Request *request_recv; MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); /* allocation of arrays */ Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1)); Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1)); if (firsttime==1){ PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Snd_Grid_B2C", sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1), NULL); PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Rcv_Grid_B2C", sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1), NULL); firsttime = 0; } /****************************************************** MPI: from the partitions B to C ******************************************************/ request_send = malloc(sizeof(MPI_Request)*NN_B2C_S); request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R); stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S); stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R); NN_S = 0; NN_R = 0; /* MPI_Irecv */ for (ID=0; ID<NN_B2C_R; ID++){ IDR = ID_NN_B2C_R[ID]; gp = GP_B2C_R[ID]; if (IDR!=myid){ MPI_Irecv( &Work_Array_Rcv_Grid_B2C[(SpinP_switch+1)*gp], Num_Rcv_Grid_B2C[IDR]*(SpinP_switch+1), MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]); NN_R++; } } /* MPI_Isend */ for (ID=0; ID<NN_B2C_S; ID++){ IDS = ID_NN_B2C_S[ID]; gp = GP_B2C_S[ID]; /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */ for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){ BN = Index_Snd_Grid_B2C[IDS][LN]; if (SpinP_switch==0){ Work_Array_Snd_Grid_B2C[gp+LN] = data_B[0][BN]; } else if (SpinP_switch==1){ Work_Array_Snd_Grid_B2C[2*gp+2*LN+0] = data_B[0][BN]; Work_Array_Snd_Grid_B2C[2*gp+2*LN+1] = data_B[1][BN]; } else if (SpinP_switch==3){ Work_Array_Snd_Grid_B2C[4*gp+4*LN+0] = data_B[0][BN]; Work_Array_Snd_Grid_B2C[4*gp+4*LN+1] = data_B[1][BN]; Work_Array_Snd_Grid_B2C[4*gp+4*LN+2] = data_B[2][BN]; Work_Array_Snd_Grid_B2C[4*gp+4*LN+3] = data_B[3][BN]; } } /* LN */ if (IDS!=myid){ MPI_Isend( &Work_Array_Snd_Grid_B2C[(SpinP_switch+1)*gp], Num_Snd_Grid_B2C[IDS]*(SpinP_switch+1), MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]); NN_S++; } } /* MPI_Waitall */ if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send); if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv); free(request_send); free(request_recv); free(stat_send); free(stat_recv); /* copy Work_Array_Rcv_Grid_B2C to data_C */ for (ID=0; ID<NN_B2C_R; ID++){ IDR = ID_NN_B2C_R[ID]; if (IDR==myid){ gp = GP_B2C_S[ID]; for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){ CN = Index_Rcv_Grid_B2C[IDR][LN]; if (SpinP_switch==0){ data_C[0][CN] = Work_Array_Snd_Grid_B2C[gp+LN]; } else if (SpinP_switch==1){ data_C[0][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+0]; data_C[1][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+1]; } else if (SpinP_switch==3){ data_C[0][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+0]; data_C[1][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+1]; data_C[2][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+2]; data_C[3][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+3]; } } /* LN */ } else { gp = GP_B2C_R[ID]; for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){ CN = Index_Rcv_Grid_B2C[IDR][LN]; if (SpinP_switch==0){ data_C[0][CN] = Work_Array_Rcv_Grid_B2C[gp+LN]; } else if (SpinP_switch==1){ data_C[0][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+0]; data_C[1][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+1]; } else if (SpinP_switch==3){ data_C[0][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+0]; data_C[1][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+1]; data_C[2][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+2]; data_C[3][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+3]; } } } } /* if (SpinP_switch==0), copy data_B[0] to data_B[1] copy data_C[0] to data_C[1] */ if (SpinP_switch==0){ for (BN=0; BN<My_NumGridB_AB; BN++){ data_B[1][BN] = data_B[0][BN]; } for (CN=0; CN<My_NumGridC; CN++){ data_C[1][CN] = data_C[0][CN]; } } /* freeing of arrays */ free(Work_Array_Snd_Grid_B2C); free(Work_Array_Rcv_Grid_B2C); }
double Set_Density_Grid(int Cnt_kind, int Calc_CntOrbital_ON, double *****CDM) { static int firsttime=1; int al,L0,Mul0,M0,p,size1,size2; int Gc_AN,Mc_AN,Mh_AN,LN,AN,BN,CN; int n1,n2,n3,k1,k2,k3,N3[4]; int Cwan,NO0,NO1,Rn,N,Hwan,i,j,k,n; int NN_S,NN_R; unsigned long long int N2D,n2D,GN; int Max_Size,My_Max; int size_Tmp_Den_Grid; int size_Den_Snd_Grid_A2B; int size_Den_Rcv_Grid_A2B; int h_AN,Gh_AN,Rnh,spin,Nc,GRc,Nh,Nog; int Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3; double threshold; double tmp0,tmp1,sk1,sk2,sk3,tot_den,sum; double tmp0_0,tmp0_1,tmp0_2,tmp0_3; double sum_0,sum_1,sum_2,sum_3; double d1,d2,d3,cop,sip,sit,cot; double x,y,z,Cxyz[4]; double TStime,TEtime; double ***Tmp_Den_Grid; double **Den_Snd_Grid_A2B; double **Den_Rcv_Grid_A2B; double *tmp_array; double *tmp_array2; double *orbs0,*orbs1; double *orbs0_0,*orbs0_1,*orbs0_2,*orbs0_3; double *orbs1_0,*orbs1_1,*orbs1_2,*orbs1_3; double ***tmp_CDM; int *Snd_Size,*Rcv_Size; int numprocs,myid,tag=999,ID,IDS,IDR; double Stime_atom, Etime_atom; double time0,time1,time2; MPI_Status stat; MPI_Request request; MPI_Status *stat_send; MPI_Status *stat_recv; MPI_Request *request_send; MPI_Request *request_recv; /* for OpenMP */ int OMPID,Nthrds; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); dtime(&TStime); /* allocation of arrays */ size_Tmp_Den_Grid = 0; Tmp_Den_Grid = (double***)malloc(sizeof(double**)*(SpinP_switch+1)); for (i=0; i<(SpinP_switch+1); i++){ Tmp_Den_Grid[i] = (double**)malloc(sizeof(double*)*(Matomnum+1)); Tmp_Den_Grid[i][0] = (double*)malloc(sizeof(double)*1); for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = F_M2G[Mc_AN]; Tmp_Den_Grid[i][Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]); /* AITUNE */ for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ Tmp_Den_Grid[i][Mc_AN][Nc] = 0.0; } size_Tmp_Den_Grid += GridN_Atom[Gc_AN]; } } size_Den_Snd_Grid_A2B = 0; Den_Snd_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs); for (ID=0; ID<numprocs; ID++){ Den_Snd_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Snd_Grid_A2B[ID]*(SpinP_switch+1)); size_Den_Snd_Grid_A2B += Num_Snd_Grid_A2B[ID]*(SpinP_switch+1); } size_Den_Rcv_Grid_A2B = 0; Den_Rcv_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs); for (ID=0; ID<numprocs; ID++){ Den_Rcv_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1)); size_Den_Rcv_Grid_A2B += Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1); } /* PrintMemory */ if (firsttime==1){ PrintMemory("Set_Density_Grid: AtomDen_Grid", sizeof(double)*size_Tmp_Den_Grid, NULL); PrintMemory("Set_Density_Grid: Den_Snd_Grid_A2B",sizeof(double)*size_Den_Snd_Grid_A2B, NULL); PrintMemory("Set_Density_Grid: Den_Rcv_Grid_A2B",sizeof(double)*size_Den_Rcv_Grid_A2B, NULL); firsttime = 0; } /**************************************************** when orbital optimization ****************************************************/ if (Calc_CntOrbital_ON==1 && Cnt_kind==0 && Cnt_switch==1){ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ dtime(&Stime_atom); /* COrbs_Grid */ Gc_AN = M2G[Mc_AN]; Cwan = WhatSpecies[Gc_AN]; NO0 = Spe_Total_CNO[Cwan]; for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ al = -1; for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){ for (Mul0=0; Mul0<Spe_Num_CBasis[Cwan][L0]; Mul0++){ for (M0=0; M0<=2*L0; M0++){ al++; tmp0 = 0.0; for (p=0; p<Spe_Specified_Num[Cwan][al]; p++){ j = Spe_Trans_Orbital[Cwan][al][p]; tmp0 += CntCoes[Mc_AN][al][p]*Orbs_Grid[Mc_AN][Nc][j];/* AITUNE */ } COrbs_Grid[Mc_AN][al][Nc] = (Type_Orbs_Grid)tmp0; } } } } dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } /********************************************** MPI: COrbs_Grid ***********************************************/ /* allocation of arrays */ Snd_Size = (int*)malloc(sizeof(int)*numprocs); Rcv_Size = (int*)malloc(sizeof(int)*numprocs); /* find data size for sending and receiving */ My_Max = -10000; for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /* sending size */ if (F_Snd_Num[IDS]!=0){ /* find data size */ size1 = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Gc_AN = Snd_GAN[IDS][n]; Cwan = WhatSpecies[Gc_AN]; size1 += GridN_Atom[Gc_AN]*Spe_Total_CNO[Cwan]; } Snd_Size[IDS] = size1; MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request); } else{ Snd_Size[IDS] = 0; } /* receiving size */ if (F_Rcv_Num[IDR]!=0){ MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat); Rcv_Size[IDR] = size2; } else{ Rcv_Size[IDR] = 0; } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } else{ Snd_Size[IDS] = 0; Rcv_Size[IDR] = 0; } if (My_Max<Snd_Size[IDS]) My_Max = Snd_Size[IDS]; if (My_Max<Rcv_Size[IDR]) My_Max = Rcv_Size[IDR]; } MPI_Allreduce(&My_Max, &Max_Size, 1, MPI_INT, MPI_MAX, mpi_comm_level1); /* allocation of arrays */ tmp_array = (double*)malloc(sizeof(double)*Max_Size); tmp_array2 = (double*)malloc(sizeof(double)*Max_Size); /* send and receive COrbs_Grid */ for (ID=0; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (ID!=0){ /* sending of data */ if (F_Snd_Num[IDS]!=0){ /* find data size */ size1 = Snd_Size[IDS]; /* multidimentional array to vector array */ k = 0; for (n=0; n<F_Snd_Num[IDS]; n++){ Mc_AN = Snd_MAN[IDS][n]; Gc_AN = Snd_GAN[IDS][n]; Cwan = WhatSpecies[Gc_AN]; NO0 = Spe_Total_CNO[Cwan]; for (i=0; i<NO0; i++){ for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ tmp_array[k] = COrbs_Grid[Mc_AN][i][Nc]; k++; } } } /* MPI_Isend */ MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request); } /* receiving of block data */ if (F_Rcv_Num[IDR]!=0){ /* find data size */ size2 = Rcv_Size[IDR]; /* MPI_Recv */ MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat); k = 0; Mc_AN = F_TopMAN[IDR] - 1; for (n=0; n<F_Rcv_Num[IDR]; n++){ Mc_AN++; Gc_AN = Rcv_GAN[IDR][n]; Cwan = WhatSpecies[Gc_AN]; NO0 = Spe_Total_CNO[Cwan]; for (i=0; i<NO0; i++){ for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ COrbs_Grid[Mc_AN][i][Nc] = tmp_array2[k]; k++; } } } } if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat); } } /* freeing of arrays */ free(tmp_array); free(tmp_array2); free(Snd_Size); free(Rcv_Size); } /********************************************** calculate Tmp_Den_Grid ***********************************************/ dtime(&time1); /* AITUNE ========================== */ int OneD_Nloop = 0; int ai_MaxNc = 0; for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ int Gc_AN = M2G[Mc_AN]; for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ OneD_Nloop++; if(ai_MaxNc < GridN_Atom[Gc_AN]) {ai_MaxNc = GridN_Atom[Gc_AN];} } } /* ai_MaxNc is maximum of GridN_Atom[] */ int gNthrds; #pragma omp parallel { gNthrds = omp_get_num_threads(); } double*** ai_tmpDG_all = (double***)malloc(sizeof(double**)*gNthrds); /* ========================== AITUNE */ #pragma omp parallel shared(myid,G2ID,Orbs_Grid_FNAN,List_YOUSO,time_per_atom,Tmp_Den_Grid,Orbs_Grid,COrbs_Grid,Cnt_switch,Cnt_kind,GListTAtoms2,GListTAtoms1,NumOLG,CDM,SpinP_switch,WhatSpecies,ncn,F_G2M,natn,Spe_Total_CNO,M2G) private(OMPID,Nthrds,Mc_AN,h_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,NO0,Gh_AN,Mh_AN,Rnh,Hwan,NO1,spin,i,j,tmp_CDM,Nog,Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3,orbs0_0,orbs0_1,orbs0_2,orbs0_3,orbs1_0,orbs1_1,orbs1_2,orbs1_3,sum_0,sum_1,sum_2,sum_3,tmp0_0,tmp0_1,tmp0_2,tmp0_3,Nc,Nh,orbs0,orbs1,sum,tmp0) { orbs0 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs0_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs0_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs0_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs0_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs1_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs1_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs1_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]); orbs1_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]); tmp_CDM = (double***)malloc(sizeof(double**)*(SpinP_switch+1)); for (i=0; i<(SpinP_switch+1); i++){ tmp_CDM[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]); for (j=0; j<List_YOUSO[7]; j++){ tmp_CDM[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]); } } /* get info. on OpenMP */ OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); /* AITUNE ========================== */ double *ai_tmpDGs[4]; { int spin; for (spin=0; spin<=SpinP_switch; spin++){ ai_tmpDGs[spin] = (double*)malloc(sizeof(double)* ai_MaxNc); } } ai_tmpDG_all[OMPID] = ai_tmpDGs; /* ==================================== AITUNE */ /* for (Mc_AN=(OMPID+1); Mc_AN<=Matomnum; Mc_AN+=Nthrds){ AITUNE */ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ dtime(&Stime_atom); /* set data on Mc_AN */ Gc_AN = M2G[Mc_AN]; Cwan = WhatSpecies[Gc_AN]; NO0 = Spe_Total_CNO[Cwan]; int spin; for (spin=0; spin<=SpinP_switch; spin++){ int Nc; for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ ai_tmpDGs[spin][Nc] = 0.0; } } for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){ /* set data on h_AN */ Gh_AN = natn[Gc_AN][h_AN]; Mh_AN = F_G2M[Gh_AN]; Rnh = ncn[Gc_AN][h_AN]; Hwan = WhatSpecies[Gh_AN]; NO1 = Spe_Total_CNO[Hwan]; /* store CDM into tmp_CDM */ for (spin=0; spin<=SpinP_switch; spin++){ for (i=0; i<NO0; i++){ for (j=0; j<NO1; j++){ tmp_CDM[spin][i][j] = CDM[spin][Mc_AN][h_AN][i][j]; } } } /* summation of non-zero elements */ /* for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]; Nog++){ */ #pragma omp for for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]-3; Nog+=4){ Nc_0 = GListTAtoms1[Mc_AN][h_AN][Nog]; Nc_1 = GListTAtoms1[Mc_AN][h_AN][Nog+1]; Nc_2 = GListTAtoms1[Mc_AN][h_AN][Nog+2]; Nc_3 = GListTAtoms1[Mc_AN][h_AN][Nog+3]; Nh_0 = GListTAtoms2[Mc_AN][h_AN][Nog]; Nh_1 = GListTAtoms2[Mc_AN][h_AN][Nog+1]; Nh_2 = GListTAtoms2[Mc_AN][h_AN][Nog+2]; Nh_3 = GListTAtoms2[Mc_AN][h_AN][Nog+3]; /* Now under the orbital optimization */ if (Cnt_kind==0 && Cnt_switch==1){ for (i=0; i<NO0; i++){ orbs0_0[i] = COrbs_Grid[Mc_AN][i][Nc_0]; orbs0_1[i] = COrbs_Grid[Mc_AN][i][Nc_1]; orbs0_2[i] = COrbs_Grid[Mc_AN][i][Nc_2]; orbs0_3[i] = COrbs_Grid[Mc_AN][i][Nc_3]; } for (j=0; j<NO1; j++){ orbs1_0[j] = COrbs_Grid[Mh_AN][j][Nh_0]; orbs1_1[j] = COrbs_Grid[Mh_AN][j][Nh_1]; orbs1_2[j] = COrbs_Grid[Mh_AN][j][Nh_2]; orbs1_3[j] = COrbs_Grid[Mh_AN][j][Nh_3]; } } /* else if ! "now under the orbital optimization" */ else{ for (i=0; i<NO0; i++){ orbs0_0[i] = Orbs_Grid[Mc_AN][Nc_0][i]; orbs0_1[i] = Orbs_Grid[Mc_AN][Nc_1][i]; orbs0_2[i] = Orbs_Grid[Mc_AN][Nc_2][i]; orbs0_3[i] = Orbs_Grid[Mc_AN][Nc_3][i]; } if (G2ID[Gh_AN]==myid){ for (j=0; j<NO1; j++){ orbs1_0[j] = Orbs_Grid[Mh_AN][Nh_0][j]; orbs1_1[j] = Orbs_Grid[Mh_AN][Nh_1][j]; orbs1_2[j] = Orbs_Grid[Mh_AN][Nh_2][j]; orbs1_3[j] = Orbs_Grid[Mh_AN][Nh_3][j]; } } else{ for (j=0; j<NO1; j++){ orbs1_0[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog ][j]; orbs1_1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+1][j]; orbs1_2[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+2][j]; orbs1_3[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+3][j]; } } } for (spin=0; spin<=SpinP_switch; spin++){ /* Tmp_Den_Grid */ sum_0 = 0.0; sum_1 = 0.0; sum_2 = 0.0; sum_3 = 0.0; for (i=0; i<NO0; i++){ tmp0_0 = 0.0; tmp0_1 = 0.0; tmp0_2 = 0.0; tmp0_3 = 0.0; for (j=0; j<NO1; j++){ tmp0_0 += orbs1_0[j]*tmp_CDM[spin][i][j]; tmp0_1 += orbs1_1[j]*tmp_CDM[spin][i][j]; tmp0_2 += orbs1_2[j]*tmp_CDM[spin][i][j]; tmp0_3 += orbs1_3[j]*tmp_CDM[spin][i][j]; } sum_0 += orbs0_0[i]*tmp0_0; sum_1 += orbs0_1[i]*tmp0_1; sum_2 += orbs0_2[i]*tmp0_2; sum_3 += orbs0_3[i]*tmp0_3; } ai_tmpDGs[spin][Nc_0] += sum_0; ai_tmpDGs[spin][Nc_1] += sum_1; ai_tmpDGs[spin][Nc_2] += sum_2; ai_tmpDGs[spin][Nc_3] += sum_3; /* Tmp_Den_Grid[spin][Mc_AN][Nc_0] += sum_0; Tmp_Den_Grid[spin][Mc_AN][Nc_1] += sum_1; Tmp_Den_Grid[spin][Mc_AN][Nc_2] += sum_2; Tmp_Den_Grid[spin][Mc_AN][Nc_3] += sum_3; */ } /* spin */ } /* Nog */ #pragma omp for for (Nog = NumOLG[Mc_AN][h_AN] - (NumOLG[Mc_AN][h_AN] % 4); Nog<NumOLG[Mc_AN][h_AN]; Nog++){ /*for (; Nog<NumOLG[Mc_AN][h_AN]; Nog++){*/ Nc = GListTAtoms1[Mc_AN][h_AN][Nog]; Nh = GListTAtoms2[Mc_AN][h_AN][Nog]; if (Cnt_kind==0 && Cnt_switch==1){ for (i=0; i<NO0; i++){ orbs0[i] = COrbs_Grid[Mc_AN][i][Nc]; } for (j=0; j<NO1; j++){ orbs1[j] = COrbs_Grid[Mh_AN][j][Nh]; } } else{ for (i=0; i<NO0; i++){ orbs0[i] = Orbs_Grid[Mc_AN][Nc][i]; } if (G2ID[Gh_AN]==myid){ for (j=0; j<NO1; j++){ orbs1[j] = Orbs_Grid[Mh_AN][Nh][j]; } } else{ for (j=0; j<NO1; j++){ orbs1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog][j]; } } } for (spin=0; spin<=SpinP_switch; spin++){ sum = 0.0; for (i=0; i<NO0; i++){ tmp0 = 0.0; for (j=0; j<NO1; j++){ tmp0 += orbs1[j]*tmp_CDM[spin][i][j]; } sum += orbs0[i]*tmp0; } ai_tmpDGs[spin][Nc] += sum; /*Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;*/ } } /* Nog */ } /* h_AN */ /* AITUNE merge temporary buffer for all omp threads */ for (spin=0; spin<=SpinP_switch; spin++){ int Nc; #pragma omp for for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){ double sum = 0.0; int th; for(th = 0; th < Nthrds; th++){ sum += ai_tmpDG_all[th][spin][Nc]; } Tmp_Den_Grid[spin][Mc_AN][Nc] += sum; } } dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } /* Mc_AN */ /* freeing of arrays */ free(orbs0); free(orbs1); free(orbs0_0); free(orbs0_1); free(orbs0_2); free(orbs0_3); free(orbs1_0); free(orbs1_1); free(orbs1_2); free(orbs1_3); for (i=0; i<(SpinP_switch+1); i++){ for (j=0; j<List_YOUSO[7]; j++){ free(tmp_CDM[i][j]); } free(tmp_CDM[i]); free(ai_tmpDGs[i]); /* AITUNE */ } free(tmp_CDM); #pragma omp flush(Tmp_Den_Grid) } /* #pragma omp parallel */ free(ai_tmpDG_all); dtime(&time2); if(myid==0 && measure_time){ printf("Time for Part1=%18.5f\n",(time2-time1));fflush(stdout); } /****************************************************** MPI communication from the partitions A to B ******************************************************/ /* copy Tmp_Den_Grid to Den_Snd_Grid_A2B */ for (ID=0; ID<numprocs; ID++) Num_Snd_Grid_A2B[ID] = 0; N2D = Ngrid1*Ngrid2; for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; for (AN=0; AN<GridN_Atom[Gc_AN]; AN++){ GN = GridListAtom[Mc_AN][AN]; GN2N(GN,N3); n2D = N3[1]*Ngrid2 + N3[2]; ID = (int)(n2D*(unsigned long long int)numprocs/N2D); if (SpinP_switch==0){ Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]] = Tmp_Den_Grid[0][Mc_AN][AN]; } else if (SpinP_switch==1){ Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+0] = Tmp_Den_Grid[0][Mc_AN][AN]; Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+1] = Tmp_Den_Grid[1][Mc_AN][AN]; } else if (SpinP_switch==3){ Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+0] = Tmp_Den_Grid[0][Mc_AN][AN]; Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+1] = Tmp_Den_Grid[1][Mc_AN][AN]; Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+2] = Tmp_Den_Grid[2][Mc_AN][AN]; Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+3] = Tmp_Den_Grid[3][Mc_AN][AN]; } Num_Snd_Grid_A2B[ID]++; } } /* MPI: A to B */ request_send = malloc(sizeof(MPI_Request)*NN_A2B_S); request_recv = malloc(sizeof(MPI_Request)*NN_A2B_R); stat_send = malloc(sizeof(MPI_Status)*NN_A2B_S); stat_recv = malloc(sizeof(MPI_Status)*NN_A2B_R); NN_S = 0; NN_R = 0; tag = 999; for (ID=1; ID<numprocs; ID++){ IDS = (myid + ID) % numprocs; IDR = (myid - ID + numprocs) % numprocs; if (Num_Snd_Grid_A2B[IDS]!=0){ MPI_Isend( &Den_Snd_Grid_A2B[IDS][0], Num_Snd_Grid_A2B[IDS]*(SpinP_switch+1), MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]); NN_S++; } if (Num_Rcv_Grid_A2B[IDR]!=0){ MPI_Irecv( &Den_Rcv_Grid_A2B[IDR][0], Num_Rcv_Grid_A2B[IDR]*(SpinP_switch+1), MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]); NN_R++; } } if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send); if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv); free(request_send); free(request_recv); free(stat_send); free(stat_recv); /* for myid */ for (i=0; i<Num_Rcv_Grid_A2B[myid]*(SpinP_switch+1); i++){ Den_Rcv_Grid_A2B[myid][i] = Den_Snd_Grid_A2B[myid][i]; } /****************************************************** superposition of rho_i to calculate charge density in the partition B. ******************************************************/ /* initialize arrays */ for (spin=0; spin<(SpinP_switch+1); spin++){ for (BN=0; BN<My_NumGridB_AB; BN++){ Density_Grid_B[spin][BN] = 0.0; } } /* superposition of densities rho_i */ for (ID=0; ID<numprocs; ID++){ for (LN=0; LN<Num_Rcv_Grid_A2B[ID]; LN++){ BN = Index_Rcv_Grid_A2B[ID][3*LN+0]; Gc_AN = Index_Rcv_Grid_A2B[ID][3*LN+1]; GRc = Index_Rcv_Grid_A2B[ID][3*LN+2]; if (Solver!=4 || (Solver==4 && atv_ijk[GRc][1]==0 )){ /* spin collinear non-polarization */ if ( SpinP_switch==0 ){ Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN]; } /* spin collinear polarization */ else if ( SpinP_switch==1 ){ Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*2 ]; Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*2+1]; } /* spin non-collinear */ else if ( SpinP_switch==3 ){ Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*4 ]; Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*4+1]; Density_Grid_B[2][BN] += Den_Rcv_Grid_A2B[ID][LN*4+2]; Density_Grid_B[3][BN] += Den_Rcv_Grid_A2B[ID][LN*4+3]; } } /* if (Solve!=4.....) */ } /* AN */ } /* ID */ /**************************************************** Conjugate complex of Density_Grid[3][MN] due to difference in the definition between density matrix and charge density ****************************************************/ if (SpinP_switch==3){ for (BN=0; BN<My_NumGridB_AB; BN++){ Density_Grid_B[3][BN] = -Density_Grid_B[3][BN]; } } /****************************************************** MPI: from the partitions B to D ******************************************************/ Density_Grid_Copy_B2D(); /* freeing of arrays */ for (i=0; i<(SpinP_switch+1); i++){ for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){ free(Tmp_Den_Grid[i][Mc_AN]); } free(Tmp_Den_Grid[i]); } free(Tmp_Den_Grid); for (ID=0; ID<numprocs; ID++){ free(Den_Snd_Grid_A2B[ID]); } free(Den_Snd_Grid_A2B); for (ID=0; ID<numprocs; ID++){ free(Den_Rcv_Grid_A2B[ID]); } free(Den_Rcv_Grid_A2B); /* elapsed time */ dtime(&TEtime); time0 = TEtime - TStime; if(myid==0 && measure_time) printf("time0=%18.5f\n",time0); return time0; }