void CObjects::DumpIndex(void)
{
	CChars				sz;

	sz.Init("-------------------------- Indices -------------------------- \n");
	PrintMemory(&sz);
	sz.Append("------------------------------------------------------------ \n");
	sz.Dump();
	sz.Kill();
}
void CObjects::ValidateEmpty(void)
{
	OIndex	iNumIndexed;

	iNumIndexed = mcMemory.NumIndexed();
	if (iNumIndexed != 0)
	{
		CChars				sz;

		sz.Init("\n");
		sz.Append("Memory not empty.  ");
		sz.Append(iNumIndexed);
		sz.Append(" objects are still indexed.\n");
		PrintMemory(&sz);
		gcLogger.Error(sz.Text());
		sz.Kill();
	}
}
Example #3
0
void Hamiltonian_Band_NC(int Host_ID1,
                         double *****RH, double *****IH,
                         dcomplex **H, int *MP,
                         double k1, double k2, double k3)
{
    static int firsttime=1;
    int i,j,k,wanA,wanB,tnoA,tnoB,Anum,Bnum;
    int NUM,MA_AN,GA_AN,LB_AN,GB_AN;
    int l1,l2,l3,Rn,n2;
    double *tmp_array1,*tmp_array2;
    double **H11r,**H11i;
    double **H22r,**H22i;
    double **H12r,**H12i;
    double kRn,si,co,h;
    int ID,myid,numprocs,tag=999;

    MPI_Status stat;
    MPI_Request request;

    /* MPI */
    MPI_Comm_size(mpi_comm_level1,&numprocs);
    MPI_Comm_rank(mpi_comm_level1,&myid);
    MPI_Barrier(mpi_comm_level1);

    /* set MP */
    Anum = 1;
    for (i=1; i<=atomnum; i++) {
        MP[i] = Anum;
        wanA = WhatSpecies[i];
        tnoA = Spe_Total_CNO[wanA];
        Anum = Anum + tnoA;
    }
    NUM = Anum - 1;
    n2 = NUM + 2;

    /*******************************************
     allocation of H11r, H11i,
                   H22r, H22i,
                   H12r, H12i
    *******************************************/

    H11r = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++) {
        H11r[i] = (double*)malloc(sizeof(double)*n2);
    }

    H11i = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++) {
        H11i[i] = (double*)malloc(sizeof(double)*n2);
    }

    H22r = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++) {
        H22r[i] = (double*)malloc(sizeof(double)*n2);
    }

    H22i = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++) {
        H22i[i] = (double*)malloc(sizeof(double)*n2);
    }

    H12r = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++) {
        H12r[i] = (double*)malloc(sizeof(double)*n2);
    }

    H12i = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++) {
        H12i[i] = (double*)malloc(sizeof(double)*n2);
    }

    /****************************************************
     PrintMemory
    ****************************************************/

    if (firsttime) {
        PrintMemory("Hamiltonian_Band: H11r",sizeof(double)*n2*n2,NULL);
        PrintMemory("Hamiltonian_Band: H11i",sizeof(double)*n2*n2,NULL);
        PrintMemory("Hamiltonian_Band: H22r",sizeof(double)*n2*n2,NULL);
        PrintMemory("Hamiltonian_Band: H22i",sizeof(double)*n2*n2,NULL);
        PrintMemory("Hamiltonian_Band: H12r",sizeof(double)*n2*n2,NULL);
        PrintMemory("Hamiltonian_Band: H12i",sizeof(double)*n2*n2,NULL);
    }

    /* for PrintMemory */
    firsttime=0;

    /****************************************************
                      set Hamiltonian
    ****************************************************/

    H[0][0].r = 2.0*NUM;
    for (i=1; i<=NUM; i++) {
        for (j=1; j<=NUM; j++) {
            H11r[i][j] = 0.0;
            H11i[i][j] = 0.0;
            H22r[i][j] = 0.0;
            H22i[i][j] = 0.0;
            H12r[i][j] = 0.0;
            H12i[i][j] = 0.0;
        }
    }

    for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
        GA_AN = M2G[MA_AN];
        wanA = WhatSpecies[GA_AN];
        tnoA = Spe_Total_CNO[wanA];
        Anum = MP[GA_AN];

        for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++) {
            GB_AN = natn[GA_AN][LB_AN];
            Rn = ncn[GA_AN][LB_AN];
            wanB = WhatSpecies[GB_AN];
            tnoB = Spe_Total_CNO[wanB];

            /*
            kRn = k1*( (double)atv_ijk[Rn][1] + Cell_Gxyz[GB_AN][1] - Cell_Gxyz[GA_AN][1] )
                + k2*( (double)atv_ijk[Rn][2] + Cell_Gxyz[GB_AN][2] - Cell_Gxyz[GA_AN][2] )
                + k3*( (double)atv_ijk[Rn][3] + Cell_Gxyz[GB_AN][3] - Cell_Gxyz[GA_AN][3] );
            */

            l1 = atv_ijk[Rn][1];
            l2 = atv_ijk[Rn][2];
            l3 = atv_ijk[Rn][3];
            kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;

            si = sin(2.0*PI*kRn);
            co = cos(2.0*PI*kRn);
            Bnum = MP[GB_AN];

            /* non-spin-orbit coupling and non-LDA+U */
            if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
                    && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0) {

                for (i=0; i<=(tnoA-1); i++) {
                    for (j=0; j<=(tnoB-1); j++) {
                        H11r[Anum+i][Bnum+j] += co*RH[0][MA_AN][LB_AN][i][j];
                        H11i[Anum+i][Bnum+j] += si*RH[0][MA_AN][LB_AN][i][j];
                        H22r[Anum+i][Bnum+j] += co*RH[1][MA_AN][LB_AN][i][j];
                        H22i[Anum+i][Bnum+j] += si*RH[1][MA_AN][LB_AN][i][j];
                        H12r[Anum+i][Bnum+j] += co*RH[2][MA_AN][LB_AN][i][j] - si*RH[3][MA_AN][LB_AN][i][j];
                        H12i[Anum+i][Bnum+j] += si*RH[2][MA_AN][LB_AN][i][j] + co*RH[3][MA_AN][LB_AN][i][j];
                    }
                }
            }

            /* spin-orbit coupling or LDA+U */
            else {
                for (i=0; i<=(tnoA-1); i++) {
                    for (j=0; j<=(tnoB-1); j++) {
                        H11r[Anum+i][Bnum+j] += co*RH[0][MA_AN][LB_AN][i][j] - si*IH[0][MA_AN][LB_AN][i][j];
                        H11i[Anum+i][Bnum+j] += si*RH[0][MA_AN][LB_AN][i][j] + co*IH[0][MA_AN][LB_AN][i][j];
                        H22r[Anum+i][Bnum+j] += co*RH[1][MA_AN][LB_AN][i][j] - si*IH[1][MA_AN][LB_AN][i][j];
                        H22i[Anum+i][Bnum+j] += si*RH[1][MA_AN][LB_AN][i][j] + co*IH[1][MA_AN][LB_AN][i][j];
                        H12r[Anum+i][Bnum+j] += co*RH[2][MA_AN][LB_AN][i][j] - si*(RH[3][MA_AN][LB_AN][i][j]
                                                + IH[2][MA_AN][LB_AN][i][j]);
                        H12i[Anum+i][Bnum+j] += si*RH[2][MA_AN][LB_AN][i][j] + co*(RH[3][MA_AN][LB_AN][i][j]
                                                + IH[2][MA_AN][LB_AN][i][j]);
                    }
                }
            }

        }
    }

    /******************************************************
      MPI: H11r, H11i
           H22r, H22i
           H12r, H12i
    ******************************************************/

    tmp_array1 = (double*)malloc(sizeof(double)*6*n2*List_YOUSO[7]);
    tmp_array2 = (double*)malloc(sizeof(double)*6*n2*List_YOUSO[7]);

    if (myid!=Host_ID1) {

        for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
            GA_AN = M2G[MA_AN];
            wanA = WhatSpecies[GA_AN];
            tnoA = Spe_Total_CNO[wanA];
            Anum = MP[GA_AN];

            k = 0;

            for (i=0; i<tnoA; i++) {
                for (j=0; j<n2; j++) {
                    tmp_array1[k] = H11r[Anum+i][j];
                    k++;
                }
            }

            for (i=0; i<tnoA; i++) {
                for (j=0; j<n2; j++) {
                    tmp_array1[k] = H11i[Anum+i][j];
                    k++;
                }
            }

            for (i=0; i<tnoA; i++) {
                for (j=0; j<n2; j++) {
                    tmp_array1[k] = H22r[Anum+i][j];
                    k++;
                }
            }

            for (i=0; i<tnoA; i++) {
                for (j=0; j<n2; j++) {
                    tmp_array1[k] = H22i[Anum+i][j];
                    k++;
                }
            }

            for (i=0; i<tnoA; i++) {
                for (j=0; j<n2; j++) {
                    tmp_array1[k] = H12r[Anum+i][j];
                    k++;
                }
            }

            for (i=0; i<tnoA; i++) {
                for (j=0; j<n2; j++) {
                    tmp_array1[k] = H12i[Anum+i][j];
                    k++;
                }
            }

            tag = 999;
            MPI_Isend(&tmp_array1[0], 6*tnoA*n2, MPI_DOUBLE, Host_ID1,
                      tag, mpi_comm_level1, &request);
            MPI_Wait(&request,&stat);

        }

    }

    else {

        for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
            wanA = WhatSpecies[GA_AN];
            tnoA = Spe_Total_CNO[wanA];
            Anum = MP[GA_AN];
            ID = G2ID[GA_AN];
            if (ID!=Host_ID1) {

                tag = 999;
                MPI_Recv(&tmp_array2[0], 6*tnoA*n2, MPI_DOUBLE, ID, tag, mpi_comm_level1, &stat);

                k = 0;

                for (i=0; i<tnoA; i++) {
                    for (j=0; j<n2; j++) {
                        H11r[Anum+i][j] = tmp_array2[k];
                        k++;
                    }
                }

                for (i=0; i<tnoA; i++) {
                    for (j=0; j<n2; j++) {
                        H11i[Anum+i][j] = tmp_array2[k];
                        k++;
                    }
                }

                for (i=0; i<tnoA; i++) {
                    for (j=0; j<n2; j++) {
                        H22r[Anum+i][j] = tmp_array2[k];
                        k++;
                    }
                }

                for (i=0; i<tnoA; i++) {
                    for (j=0; j<n2; j++) {
                        H22i[Anum+i][j] = tmp_array2[k];
                        k++;
                    }
                }

                for (i=0; i<tnoA; i++) {
                    for (j=0; j<n2; j++) {
                        H12r[Anum+i][j] = tmp_array2[k];
                        k++;
                    }
                }

                for (i=0; i<tnoA; i++) {
                    for (j=0; j<n2; j++) {
                        H12i[Anum+i][j] = tmp_array2[k];
                        k++;
                    }
                }

            }
        }

    }

    free(tmp_array1);
    free(tmp_array2);

    /******************************************************
      the full complex matrix of H
    ******************************************************/

    for (i=1; i<=NUM; i++) {
        for (j=1; j<=NUM; j++) {
            H[i    ][j    ].r =  H11r[i][j];
            H[i    ][j    ].i =  H11i[i][j];
            H[i+NUM][j+NUM].r =  H22r[i][j];
            H[i+NUM][j+NUM].i =  H22i[i][j];
            H[i    ][j+NUM].r =  H12r[i][j];
            H[i    ][j+NUM].i =  H12i[i][j];
            H[j+NUM][i    ].r =  H[i][j+NUM].r;
            H[j+NUM][i    ].i = -H[i][j+NUM].i;
        }
    }

    /****************************************************
                         free arrays
    ****************************************************/

    for (i=0; i<n2; i++) {
        free(H11r[i]);
    }
    free(H11r);

    for (i=0; i<n2; i++) {
        free(H11i[i]);
    }
    free(H11i);

    for (i=0; i<n2; i++) {
        free(H22r[i]);
    }
    free(H22r);

    for (i=0; i<n2; i++) {
        free(H22i[i]);
    }
    free(H22i);

    for (i=0; i<n2; i++) {
        free(H12r[i]);
    }
    free(H12r);

    for (i=0; i<n2; i++) {
        free(H12i[i]);
    }
    free(H12i);

}
Example #4
0
void Taylor(int Taylor_switch, double ****OLP, double ****IOLP)
{
  static int firsttime=1;
  static int ct_AN,fan,san,can,ig,ian,j,jan,m,n,order;
  static int k,kg,kan,kl,l,jg,wan,tno0,tno1,i,ino1,p,q,jl;
  static int h_AN,Gh_AN,Hwan,Cwan,size_O,size_OP;

  static double dum,sum,dum1,dum2;
  static double ****O;
  static double ****OP;
  static double ****ON;

  /****************************************************
    allocation of arrays:

    static double O[atomnum+1]
                   [FNAN+1]
                   [Spe_Total_NO[Cwan]]
                   [Spe_Total_NO[Hwan]] 

    static double OP[atomnum+1]
                    [FNAN+SNAN+1]
                    [Spe_Total_NO[Cwan]]
                    [Spe_Total_NO[Hwan]] 

    static double ON[atomnum+1]
                    [FNAN+SNAN+1]
                    [Spe_Total_NO[Cwan]]
                    [Spe_Total_NO[Hwan]] 
  ****************************************************/

  /* O */

  FNAN[0] = 0;
  SNAN[0] = 0;
  size_O = 0;

  O = (double****)malloc(sizeof(double***)*(atomnum+1)); 
  for (ct_AN=0; ct_AN<=atomnum; ct_AN++){

    if (ct_AN==0){
      tno0 = 1;
    }
    else{
      Cwan = WhatSpecies[ct_AN];
      tno0 = Spe_Total_NO[Cwan];  
    }    

    O[ct_AN] = (double***)malloc(sizeof(double**)*(FNAN[ct_AN]+1)); 
    for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){

      if (ct_AN==0){
	tno1 = 1;  
      }
      else{
	Gh_AN = natn[ct_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	tno1 = Spe_Total_NO[Hwan];
      } 

      O[ct_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0); 
      for (i=0; i<tno0; i++){
	O[ct_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1); 
      }
      size_O += tno0*tno1;
    }
  }

  /* OP */

  size_OP = 0;
  OP = (double****)malloc(sizeof(double***)*(atomnum+1)); 
  for (ct_AN=0; ct_AN<=atomnum; ct_AN++){

    if (ct_AN==0){
      tno0 = 1;
    }
    else{
      Cwan = WhatSpecies[ct_AN];
      tno0 = Spe_Total_NO[Cwan];  
    }    

    OP[ct_AN] = (double***)malloc(sizeof(double**)*(FNAN[ct_AN]+SNAN[ct_AN]+1)); 
    for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){

      if (ct_AN==0){
	tno1 = 1;  
      }
      else{
	Gh_AN = natn[ct_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	tno1 = Spe_Total_NO[Hwan];
      } 

      OP[ct_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0); 
      for (i=0; i<tno0; i++){
	OP[ct_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1); 
      }
      size_OP += tno0*tno1;
    }
  }

  /* ON */

  ON = (double****)malloc(sizeof(double***)*(atomnum+1)); 
  for (ct_AN=0; ct_AN<=atomnum; ct_AN++){

    if (ct_AN==0){
      tno0 = 1;
    }
    else{
      Cwan = WhatSpecies[ct_AN];
      tno0 = Spe_Total_NO[Cwan];  
    }    

    ON[ct_AN] = (double***)malloc(sizeof(double**)*(FNAN[ct_AN]+SNAN[ct_AN]+1)); 
    for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){

      if (ct_AN==0){
	tno1 = 1;  
      }
      else{
	Gh_AN = natn[ct_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	tno1 = Spe_Total_NO[Hwan];
      } 

      ON[ct_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0); 
      for (i=0; i<tno0; i++){
	ON[ct_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1); 
      }
    }
  }


  /* PrintMemory */
  if (firsttime) {
    PrintMemory("Taylor: O",sizeof(O),NULL);
    PrintMemory("Taylor: OP",sizeof(OP),NULL);
    PrintMemory("Taylor: ON",sizeof(ON),NULL);
    firsttime=0;
  }

  /****************************************************
        Initializing of IOLP, OP and O matrices
  ****************************************************/

  for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
    fan = FNAN[ct_AN];
    san = SNAN[ct_AN];
    can = fan + san;
    ig = natn[ct_AN][0];
    ian = Spe_Total_NO[WhatSpecies[ig]];
    for (j=0; j<=can; j++){
      jg = natn[ct_AN][j];
      jan = Spe_Total_NO[WhatSpecies[jg]];

      if (j<=fan){
        for (m=0; m<=(ian-1); m++){
          for (n=0; n<=(jan-1); n++){
            if (j==0 && m==n){
              IOLP[ct_AN][j][m][n] = 1.0;
              O[ct_AN][j][m][n] = 0.0;
              OP[ct_AN][j][m][n] = 0.0;
            }
            else {
              IOLP[ct_AN][j][m][n] = -OLP[ct_AN][j][m][n];
              O[ct_AN][j][m][n] = OLP[ct_AN][j][m][n];
              OP[ct_AN][j][m][n] = OLP[ct_AN][j][m][n];
            }
          }
        }
      }
      else {
        for (m=0; m<=(ian-1); m++){
          for (n=0; n<=(jan-1); n++){
            IOLP[ct_AN][j][m][n] = 0.0;
            O[ct_AN][j][m][n] = 0.0;
            OP[ct_AN][j][m][n] = 0.0;
          }
        }
      }
    }
  }

  /****************************************************
                      Iteration
  ****************************************************/

  for (order=2; order<=Taylor_switch; order++){

    /****************************************************
                         O*OP -> ON
    ****************************************************/

    for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
      fan = FNAN[ct_AN];
      san = SNAN[ct_AN];
      can = fan + san;
      ig = natn[ct_AN][0];
      ian = Spe_Total_NO[WhatSpecies[ig]];
      for (j=0; j<=can; j++){
        jg = natn[ct_AN][j];
        jan = Spe_Total_NO[WhatSpecies[jg]];
        for (k=0; k<=fan; k++){
          kg = natn[ct_AN][k];
          jl = RMI2[ct_AN][k][j];
          kan = Spe_Total_NO[WhatSpecies[kg]];
          kl = RMI2[ct_AN][j][k];
          for (m=0; m<=(ian-1); m++){
            for (n=0; n<=(jan-1); n++){
              sum = 0.0;
              for (l=0; l<=(kan-1); l++){
                sum = sum + O[ct_AN][k][m][l]*OP[kg][jl][l][n];
              }
              if (k==0){
                ON[ct_AN][j][m][n] = sum;
              }
              else {
                ON[ct_AN][j][m][n] = ON[ct_AN][j][m][n] + sum;
              } 
            }
          }          
        }
      }
    }

    /****************************************************
                       Averaging of ON
    ****************************************************/

    if (order%2==0){
      for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
        wan = WhatSpecies[ct_AN];
        tno1 = Spe_Total_NO[wan] - 1;
        for (i=1; i<=(FNAN[ct_AN]+SNAN[ct_AN]); i++){
          ig = natn[ct_AN][i];
          ino1 = Spe_Total_NO[WhatSpecies[ig]] - 1;
          k = RMI2[ct_AN][i][0];
          for (p=0; p<=tno1; p++){
	    for (q=0; q<=ino1; q++){
	      dum1 = ON[ct_AN][i][p][q];
	      dum2 = ON[ig][k][q][p];
	      dum = 0.50*(dum1 + dum2);
	      ON[ct_AN][i][p][q] = dum;
	      ON[ig][k][q][p] = dum;
	    }
          }
        } 
      }
    }

    /****************************************************
                         ON -> OP
    ****************************************************/

    for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
      fan = FNAN[ct_AN];
      san = SNAN[ct_AN];
      can = fan + san;
      ig = natn[ct_AN][0];
      ian = Spe_Total_NO[WhatSpecies[ig]];
      for (j=0; j<=can; j++){
        jg = natn[ct_AN][j];
        jan = Spe_Total_NO[WhatSpecies[jg]];
        for (m=0; m<=(ian-1); m++){
          for (n=0; n<=(jan-1); n++){
            OP[ct_AN][j][m][n] = ON[ct_AN][j][m][n];
          }
        }
      }
    }

    /****************************************************
                  IOLP = IOLP +- ON
    ****************************************************/

    for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
      fan = FNAN[ct_AN];
      san = SNAN[ct_AN];
      can = fan + san;
      ig = natn[ct_AN][0];
      ian = Spe_Total_NO[WhatSpecies[ig]];
      for (j=0; j<=can; j++){
        jg = natn[ct_AN][j];
        jan = Spe_Total_NO[WhatSpecies[jg]];
        for (m=0; m<=(ian-1); m++){
          for (n=0; n<=(jan-1); n++){
            if ((order%2)==0) {
              IOLP[ct_AN][j][m][n] = IOLP[ct_AN][j][m][n]
                                    + ON[ct_AN][j][m][n];
            }
            else {
              IOLP[ct_AN][j][m][n] = IOLP[ct_AN][j][m][n]
                                    - ON[ct_AN][j][m][n];
            } 
          }
        }
      }
    }
  }


  /****************************************************
    freeing of arrays:

    static double O[atomnum+1]
                   [FNAN+1]
                   [Spe_Total_NO[Cwan]]
                   [Spe_Total_NO[Hwan]] 

    static double OP[atomnum+1]
                    [FNAN+SNAN+1]
                    [Spe_Total_NO[Cwan]]
                    [Spe_Total_NO[Hwan]] 

    static double ON[atomnum+1]
                    [FNAN+SNAN+1]
                    [Spe_Total_NO[Cwan]]
                    [Spe_Total_NO[Hwan]] 
  ****************************************************/

  /* O */

  FNAN[0] = 0;
  SNAN[0] = 0;

  for (ct_AN=0; ct_AN<=atomnum; ct_AN++){

    if (ct_AN==0){
      tno0 = 1;
    }
    else{
      Cwan = WhatSpecies[ct_AN];
      tno0 = Spe_Total_NO[Cwan];  
    }    

    for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){

      if (ct_AN==0){
	tno1 = 1;  
      }
      else{
	Gh_AN = natn[ct_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	tno1 = Spe_Total_NO[Hwan];
      } 

      for (i=0; i<tno0; i++){
	free(O[ct_AN][h_AN][i]);
      }
      free(O[ct_AN][h_AN]);
    }
    free(O[ct_AN]);
  }
  free(O);

  /* OP */

  for (ct_AN=0; ct_AN<=atomnum; ct_AN++){

    if (ct_AN==0){
      tno0 = 1;
    }
    else{
      Cwan = WhatSpecies[ct_AN];
      tno0 = Spe_Total_NO[Cwan];  
    }    

    for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){

      if (ct_AN==0){
	tno1 = 1;  
      }
      else{
	Gh_AN = natn[ct_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	tno1 = Spe_Total_NO[Hwan];
      } 

      for (i=0; i<tno0; i++){
	free(OP[ct_AN][h_AN][i]);
      }
      free(OP[ct_AN][h_AN]);
    }
    free(OP[ct_AN]);
  }
  free(OP);

  /* ON */

  for (ct_AN=0; ct_AN<=atomnum; ct_AN++){

    if (ct_AN==0){
      tno0 = 1;
    }
    else{
      Cwan = WhatSpecies[ct_AN];
      tno0 = Spe_Total_NO[Cwan];  
    }    

    for (h_AN=0; h_AN<=(FNAN[ct_AN]+SNAN[ct_AN]); h_AN++){

      if (ct_AN==0){
	tno1 = 1;  
      }
      else{
	Gh_AN = natn[ct_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	tno1 = Spe_Total_NO[Hwan];
      } 

      for (i=0; i<tno0; i++){
	free(ON[ct_AN][h_AN][i]);
      }
      free(ON[ct_AN][h_AN]);
    }
    free(ON[ct_AN]);
  }
  free(ON);

}
Example #5
0
void Initial_CntCoes2(double *****nh, double *****OLP)
{
  static int firsttime=1;
  int i,j,l,n,n2,i1,j1;
  int wan;
  int po;
  int Second_switch;
  double time0;
  int Mc_AN,Gc_AN,wanA;
  int q,q0,al0,al1,pmax;
  int Mul0,Mul1,deg_on,deg_num;
  int al,p,L0,M0,p0,Np;
  int ig,im,jg,ian,jan,kl,m,Gi;
  int mu,nu,Anum,Bnum,NUM,maxp;
  int h_AN,Gh_AN,Hwan,tno1,tno2,Cwan,spin;
  double Beta0,scaleF,maxc;
  double *ko,*C0,*koSys;
  double **S,**Hks,**D,*abs_sum,*M1,**C,**B;
  int *jun,*ponu;
  double tmp0,tmp1,Max0,rc1,fugou,MaxV;
  double sum,TZ;
  double Num_State,x,FermiF,Dnum;
  double LChemP_MAX,LChemP_MIN,LChemP;
  double TStime,TEtime;

  double *tmp_array;
  double *tmp_array2;
  int *MP,*dege;
  int **tmp_index;
  int ***tmp_index1;
  int ***tmp_index2;
  double *Tmp_CntCoes;
  double **Check_ko;
  double *Weight_ko;
  double ***CntCoes_Spe;
  double ***My_CntCoes_Spe;
  double **InProd;
  int *Snd_CntCoes_Size;
  int *Rcv_CntCoes_Size;
  int *Snd_H_Size,*Rcv_H_Size;
  int *Snd_S_Size,*Rcv_S_Size;
  int size1,size2,num;
  int numprocs,myid,tag=999,ID,IDS,IDR;

  MPI_Status stat;
  MPI_Request request;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  dtime(&TStime);

  /****************************************************
    allocation of arrays:

     int MP[List_YOUSO[8]];

     int tmp_index[List_YOUSO[25]+1]
                         [2*(List_YOUSO[25]+1)+1];
     int tmp_index1[List_YOUSO[25]+1]
                          [List_YOUSO[24]]
                          [2*(List_YOUSO[25]+1)+1];

     int tmp_index2[List_YOUSO[25]+1]
                          [List_YOUSO[24]]
                          [2*(List_YOUSO[25]+1)+1];
 
     double Tmp_CntCoes[List_YOUSO[24]] 

     double Check_ko[List_YOUSO[25]+1]
                           [2*(List_YOUSO[25]+1)+1];

     double Weight_ko[List_YOUSO[7]];

  ****************************************************/

  MP = (int*)malloc(sizeof(int)*List_YOUSO[8]);
  
  tmp_index = (int**)malloc(sizeof(int*)*(List_YOUSO[25]+1)); 
  for (i=0; i<(List_YOUSO[25]+1); i++){
    tmp_index[i] = (int*)malloc(sizeof(int)*(2*(List_YOUSO[25]+1)+1)); 
  }

  tmp_index1 = (int***)malloc(sizeof(int**)*(List_YOUSO[25]+1)); 
  for (i=0; i<(List_YOUSO[25]+1); i++){
    tmp_index1[i] = (int**)malloc(sizeof(int*)*List_YOUSO[24]); 
    for (j=0; j<List_YOUSO[24]; j++){
      tmp_index1[i][j] = (int*)malloc(sizeof(int)*(2*(List_YOUSO[25]+1)+1)); 
    }
  }

  tmp_index2 = (int***)malloc(sizeof(int**)*(List_YOUSO[25]+1)); 
  for (i=0; i<(List_YOUSO[25]+1); i++){
    tmp_index2[i] = (int**)malloc(sizeof(int*)*List_YOUSO[24]); 
    for (j=0; j<List_YOUSO[24]; j++){
      tmp_index2[i][j] = (int*)malloc(sizeof(int)*(2*(List_YOUSO[25]+1)+1)); 
    }
  }

  Tmp_CntCoes = (double*)malloc(sizeof(double)*List_YOUSO[24]); 

  Check_ko = (double**)malloc(sizeof(double*)*(List_YOUSO[25]+1)); 
  for (i=0; i<(List_YOUSO[25]+1); i++){
    Check_ko[i] = (double*)malloc(sizeof(double)*(2*(List_YOUSO[25]+1)+1)); 
  }

  Weight_ko = (double*)malloc(sizeof(double)*List_YOUSO[7]);

  Snd_CntCoes_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Rcv_CntCoes_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Snd_H_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Rcv_H_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Snd_S_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Rcv_S_Size = (int*)malloc(sizeof(int)*Num_Procs);

  /* PrintMemory */

  if (firsttime) {
    PrintMemory("Initial_CntCoes: tmp_index",sizeof(int)*(List_YOUSO[25]+1)*
                                            (2*(List_YOUSO[25]+1)+1),NULL);
    PrintMemory("Initial_CntCoes: tmp_index1",sizeof(int)*(List_YOUSO[25]+1)*
                             List_YOUSO[24]*(2*(List_YOUSO[25]+1)+1) ,NULL);
    PrintMemory("Initial_CntCoes: tmp_index2",sizeof(int)*(List_YOUSO[25]+1)*
                             List_YOUSO[24]*(2*(List_YOUSO[25]+1)+1) ,NULL);
    PrintMemory("Initial_CntCoes: Check_ko",sizeof(double)*(List_YOUSO[25]+1)*
                                            (2*(List_YOUSO[25]+1)+1),NULL);
    firsttime=0;
  }

  /****************************************************
    MPI:

    nh(=H)
  ****************************************************/

  /***********************************
             set data size
  ************************************/

  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){
      tag = 999;

      /* find data size to send block data */
      if (F_Snd_Num[IDS]!=0){

        size1 = 0;
        for (spin=0; spin<=SpinP_switch; spin++){
          for (n=0; n<F_Snd_Num[IDS]; n++){
            Mc_AN = Snd_MAN[IDS][n];
            Gc_AN = Snd_GAN[IDS][n];
            Cwan = WhatSpecies[Gc_AN]; 
            tno1 = Spe_Total_NO[Cwan];
            for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
              Gh_AN = natn[Gc_AN][h_AN];        
              Hwan = WhatSpecies[Gh_AN];
              tno2 = Spe_Total_NO[Hwan];
              size1 += tno1*tno2; 
	    }
          }
	}
 
        Snd_H_Size[IDS] = size1;
        MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
      }
      else{
        Snd_H_Size[IDS] = 0;
      }

      /* receiving of size of data */

      if (F_Rcv_Num[IDR]!=0){
        MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
        Rcv_H_Size[IDR] = size2;
      }
      else{
        Rcv_H_Size[IDR] = 0;
      }

      if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);

    }
    else{
      Snd_H_Size[IDS] = 0;
      Rcv_H_Size[IDR] = 0;
    }
  }

  /***********************************
             data transfer
  ************************************/

  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){

      /*****************************
              sending of data 
      *****************************/

      if (F_Snd_Num[IDS]!=0){

        size1 = Snd_H_Size[IDS];

        /* allocation of array */

        tmp_array = (double*)malloc(sizeof(double)*size1);

        /* multidimentional array to vector array */

        num = 0;
        for (spin=0; spin<=SpinP_switch; spin++){
          for (n=0; n<F_Snd_Num[IDS]; n++){
            Mc_AN = Snd_MAN[IDS][n];
            Gc_AN = Snd_GAN[IDS][n];
            Cwan = WhatSpecies[Gc_AN]; 
            tno1 = Spe_Total_NO[Cwan];
            for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
              Gh_AN = natn[Gc_AN][h_AN];
              Hwan = WhatSpecies[Gh_AN];
              tno2 = Spe_Total_NO[Hwan];
              for (i=0; i<tno1; i++){
                for (j=0; j<tno2; j++){
                  tmp_array[num] = nh[spin][Mc_AN][h_AN][i][j];
                  num++;
                } 
              } 
	    }
          }
	}

        MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);

      }

      /*****************************
         receiving of block data
      *****************************/

      if (F_Rcv_Num[IDR]!=0){
        
        size2 = Rcv_H_Size[IDR];
        
        /* allocation of array */
        tmp_array2 = (double*)malloc(sizeof(double)*size2);
        
        MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
        
        num = 0;
        for (spin=0; spin<=SpinP_switch; spin++){
          Mc_AN = S_TopMAN[IDR] - 1;  /* S_TopMAN should be used. */
          for (n=0; n<F_Rcv_Num[IDR]; n++){
            Mc_AN++;
            Gc_AN = Rcv_GAN[IDR][n];
            Cwan = WhatSpecies[Gc_AN]; 
            tno1 = Spe_Total_NO[Cwan];

            for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
              Gh_AN = natn[Gc_AN][h_AN];        
              Hwan = WhatSpecies[Gh_AN];
              tno2 = Spe_Total_NO[Hwan];

              for (i=0; i<tno1; i++){
                for (j=0; j<tno2; j++){
                  nh[spin][Mc_AN][h_AN][i][j] = tmp_array2[num];
                  num++;
		}
	      }
	    }
	  }        
	}

        /* freeing of array */
        free(tmp_array2);

      }

      if (F_Snd_Num[IDS]!=0){
        MPI_Wait(&request,&stat);
        free(tmp_array);  /* freeing of array */
      } 
    }
  }

  /****************************************************
    MPI:

    OLP[0]
  ****************************************************/

  /***********************************
             set data size
  ************************************/

  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){
      tag = 999;

      /* find data size to send block data */
      if (F_Snd_Num[IDS]!=0){

        size1 = 0;
        for (n=0; n<F_Snd_Num[IDS]; n++){
          Mc_AN = Snd_MAN[IDS][n];
          Gc_AN = Snd_GAN[IDS][n];
          Cwan = WhatSpecies[Gc_AN]; 
          tno1 = Spe_Total_NO[Cwan];
          for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
            Gh_AN = natn[Gc_AN][h_AN];        
            Hwan = WhatSpecies[Gh_AN];
            tno2 = Spe_Total_NO[Hwan];
            size1 += tno1*tno2; 
	  }
        }
 
        Snd_S_Size[IDS] = size1;
        MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
      }
      else{
        Snd_S_Size[IDS] = 0;
      }

      /* receiving of size of data */

      if (F_Rcv_Num[IDR]!=0){
        MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
        Rcv_S_Size[IDR] = size2;
      }
      else{
        Rcv_S_Size[IDR] = 0;
      }

      if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);

    }
    else{
      Snd_S_Size[IDS] = 0;
      Rcv_S_Size[IDR] = 0;
    }
  }

  /***********************************
             data transfer
  ************************************/

  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){

      /*****************************
              sending of data 
      *****************************/

      if (F_Snd_Num[IDS]!=0){

        size1 = Snd_S_Size[IDS];

        /* allocation of array */

        tmp_array = (double*)malloc(sizeof(double)*size1);

        /* multidimentional array to vector array */

        num = 0;

        for (n=0; n<F_Snd_Num[IDS]; n++){
          Mc_AN = Snd_MAN[IDS][n];
          Gc_AN = Snd_GAN[IDS][n];
          Cwan = WhatSpecies[Gc_AN]; 
          tno1 = Spe_Total_NO[Cwan];
          for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
            Gh_AN = natn[Gc_AN][h_AN];        
            Hwan = WhatSpecies[Gh_AN];
            tno2 = Spe_Total_NO[Hwan];
            for (i=0; i<tno1; i++){
              for (j=0; j<tno2; j++){
                tmp_array[num] = OLP[0][Mc_AN][h_AN][i][j];
                num++;
              } 
            } 
	  }
        }

        MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
      }

      /*****************************
         receiving of block data
      *****************************/

      if (F_Rcv_Num[IDR]!=0){
        
        size2 = Rcv_S_Size[IDR];
        
        /* allocation of array */
        tmp_array2 = (double*)malloc(sizeof(double)*size2);
        
        MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
        
        num = 0;
        Mc_AN = S_TopMAN[IDR] - 1; /* S_TopMAN should be used. */
        for (n=0; n<F_Rcv_Num[IDR]; n++){
          Mc_AN++;
          Gc_AN = Rcv_GAN[IDR][n];
          Cwan = WhatSpecies[Gc_AN]; 
          tno1 = Spe_Total_NO[Cwan];

          for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
            Gh_AN = natn[Gc_AN][h_AN];        
            Hwan = WhatSpecies[Gh_AN];
            tno2 = Spe_Total_NO[Hwan];
            for (i=0; i<tno1; i++){
              for (j=0; j<tno2; j++){
                OLP[0][Mc_AN][h_AN][i][j] = tmp_array2[num];
                num++;
 	      }
	    }
	  }
	}        

        /* freeing of array */
        free(tmp_array2);

      }

      if (F_Snd_Num[IDS]!=0){
        MPI_Wait(&request,&stat);
        free(tmp_array); /* freeing of array */
      } 
    }
  }

  /****************************************************
     set of "initial" coefficients of the contraction
  ****************************************************/

  Second_switch = 1;

  Simple_InitCnt[0] = 0;
  Simple_InitCnt[1] = 0;
  Simple_InitCnt[2] = 0;
  Simple_InitCnt[3] = 0;
  Simple_InitCnt[4] = 0;
  Simple_InitCnt[5] = 0;

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];    
    wan = WhatSpecies[Gc_AN];

    for (al=0; al<Spe_Total_CNO[wan]; al++){
      for (p=0; p<Spe_Specified_Num[wan][al]; p++){
        CntCoes[Mc_AN][al][p] = 0.0;
      }
    }

    al = -1;
    for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){
      for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){
        for (M0=0; M0<=2*L0; M0++){
          al++;
          CntCoes[Mc_AN][al][Mul0] = 1.0;
	}
      }
    }
  }

  if (SICnt_switch==2) goto Simple_Init;

  /****************************************************
      Setting of Hamiltonian and overlap matrices

         MP indicates the starting position of
              atom i in arraies H and S
  ****************************************************/

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
    Gc_AN = M2G[Mc_AN];    
    wan = WhatSpecies[Gc_AN];

    if      ( FNAN[Gc_AN]<30 )  scaleF = 1.6;
    else if ( FNAN[Gc_AN]<40 )  scaleF = 1.4; 
    else if ( FNAN[Gc_AN]<50 )  scaleF = 1.2; 
    else                        scaleF = 1.0; 

    rc1 = scaleF*Spe_Atom_Cut1[wan];

    /***********************************************
         MP indicates the starting position of
              atom i in arraies H and S
    ***********************************************/
    
    Anum = 1;
    TZ = 0.0;
    for (i=0; i<=FNAN[Gc_AN]; i++){
      if (Dis[Gc_AN][i]<=rc1){
        MP[i] = Anum;
        Gi = natn[Gc_AN][i];
        wanA = WhatSpecies[Gi];
        Anum = Anum + Spe_Total_NO[wanA];
        TZ = TZ + Spe_Core_Charge[wanA];
      }
    }
    NUM = Anum - 1;

    /****************************************************
                       allocation
    ****************************************************/

    n2 = NUM + 3;

    koSys   = (double*)malloc(sizeof(double)*n2);
    ko      = (double*)malloc(sizeof(double)*n2);
    abs_sum = (double*)malloc(sizeof(double)*n2);
    M1      = (double*)malloc(sizeof(double)*n2);
    dege    = (int*)malloc(sizeof(int)*n2);
    C0      = (double*)malloc(sizeof(double)*n2);

    S   = (double**)malloc(sizeof(double*)*n2);
    Hks = (double**)malloc(sizeof(double*)*n2);
    D   = (double**)malloc(sizeof(double*)*n2);
    C   = (double**)malloc(sizeof(double*)*n2);
    B   = (double**)malloc(sizeof(double*)*n2);

    for (i=0; i<n2; i++){
      S[i]   = (double*)malloc(sizeof(double)*n2);
      Hks[i] = (double*)malloc(sizeof(double)*n2);
      D[i]   = (double*)malloc(sizeof(double)*n2);
      C[i]   = (double*)malloc(sizeof(double)*n2);
      B[i]   = (double*)malloc(sizeof(double)*n2);
    }

    jun  = (int*)malloc(sizeof(int)*n2);
    ponu = (int*)malloc(sizeof(int)*n2);

    InProd = (double**)malloc(sizeof(double*)*n2);
    for (i=0; i<n2; i++){
      InProd[i] = (double*)malloc(sizeof(double)*n2);
    }

    /****************************************************
                           calc
    ****************************************************/

    if (3<=level_stdout){
      printf("<Initial_CntCoes> Mc_AN=%2d Gc_AN=%2d  NUM=%2d\n",Mc_AN,Gc_AN,NUM);
    }
    
    for (i=0; i<=FNAN[Gc_AN]; i++){

      if (Dis[Gc_AN][i]<=rc1){

	ig = natn[Gc_AN][i];
        im = S_G2M[ig];  /* S_G2M must be used. */

	ian = Spe_Total_NO[WhatSpecies[ig]];
	Anum = MP[i];

	for (j=0; j<=FNAN[Gc_AN]; j++){

	  if (Dis[Gc_AN][j]<=rc1){

	    kl = RMI1[Mc_AN][i][j];
	    jg = natn[Gc_AN][j];
	    jan = Spe_Total_NO[WhatSpecies[jg]];
	    Bnum = MP[j];

	    if (0<=kl){
	      for (m=0; m<ian; m++){
		for (n=0; n<jan; n++){

		  S[Anum+m][Bnum+n] = OLP[0][im][kl][m][n];

		  if (SpinP_switch==0)
		    Hks[Anum+m][Bnum+n] = nh[0][im][kl][m][n];
		  else 
		    Hks[Anum+m][Bnum+n] = 0.5*(nh[0][im][kl][m][n]
				             + nh[1][im][kl][m][n]);
		}
	      }
	    }

	    else{
	      for (m=0; m<ian; m++){
		for (n=0; n<jan; n++){
		  S[Anum+m][Bnum+n] = 0.0;
		  Hks[Anum+m][Bnum+n] = 0.0;
		}
	      }
	    }
	  }
	}
      }
    }

    if (3<=level_stdout){
      printf("\n");
      printf("overlap matrix Gc_AN=%2d\n",Gc_AN);
      for (i1=1; i1<=NUM; i1++){
        for (j1=1; j1<=NUM; j1++){
          printf("%6.3f ",S[i1][j1]); 
        }
        printf("\n");
      }

      printf("\n");
      printf("hamiltonian matrix Gc_AN=%2d\n",Gc_AN);
      for (i1=1; i1<=NUM; i1++){
        for (j1=1; j1<=NUM; j1++){
          printf("%6.3f ",Hks[i1][j1]); 
        }
        printf("\n");
      }
      printf("\n");

    }

    /***********************************************
       Solve the generalized eigenvalue problem
    ***********************************************/

    Eigen_lapack(S,koSys,NUM,NUM);
  
    /***********************************************
           Searching of negative eigenvalues
    ************************************************/

    for (l=1; l<=NUM; l++){
      if (koSys[l]<0.0){

        koSys[l] = 1.0e-7;

        if (3<=level_stdout){
          printf("<Init_CntCoes>  Negative EV of OLP %2d %15.12f\n",l,koSys[l]);
	}
      }
    }
    for (l=1; l<=NUM; l++){
      M1[l] = 1.0/sqrt(koSys[l]);
    }

    for (i1=1; i1<=NUM; i1++){
      for (j1=i1+1; j1<=NUM; j1++){

        tmp0 = S[i1][j1];
        tmp1 = S[j1][i1];

        S[j1][i1] = tmp0;
        S[i1][j1] = tmp1; 
      }
    }
    
    for (i1=1; i1<=NUM; i1++){
      for (j1=1; j1<=NUM; j1++){

        sum = 0.0;
        tmp0 = M1[j1]; 
 
        for (l=1; l<=NUM; l++){
	  sum = sum + Hks[i1][l]*S[j1][l]*tmp0;
        }
        C[j1][i1] = sum;
      }
    }

    for (i1=1; i1<=NUM; i1++){
      for (j1=1; j1<=NUM; j1++){

        sum = 0.0;
        tmp0 = M1[i1]; 

        for (l=1; l<=NUM; l++){
	  sum = sum + tmp0*S[i1][l]*C[j1][l];
        }
        B[i1][j1] = sum;
      }
    }

    for (i1=1; i1<=NUM; i1++){
      for (j1=1; j1<=NUM; j1++){
        D[i1][j1] = B[i1][j1];       
      }
    }

    Eigen_lapack(D,koSys,NUM,NUM);

    /***********************************************
      transformation to the original eigenvectors.
                      NOTE 244P
    ***********************************************/

    for (i1=1; i1<=NUM; i1++){
      for (j1=1; j1<=NUM; j1++){
        C[i1][j1] = 0.0;
      }
    }

    for (i1=1; i1<=NUM; i1++){
      for (j1=i1+1; j1<=NUM; j1++){

        tmp0 = S[i1][j1];
        tmp1 = S[j1][i1];

        S[j1][i1] = tmp0;
        S[i1][j1] = tmp1; 

        tmp0 = D[i1][j1];
        tmp1 = D[j1][i1];

        D[j1][i1] = tmp0;
        D[i1][j1] = tmp1; 

      }
    }

    for (i1=1; i1<=NUM; i1++){
      for (j1=1; j1<=NUM; j1++){

        sum = 0.0;

        for (l=1; l<=NUM; l++){
          sum = sum + S[i1][l]*M1[l]*D[j1][l];
        }

        C[i1][j1] = sum;
      }
    }

    /****************************************************
           searching of a local chemical potential
    ****************************************************/

    po = 0;
    LChemP_MAX = 10.0;  
    LChemP_MIN =-10.0;

    Beta0 = 1.0/(kB*1500.0/eV2Hartree);

    do{
      LChemP = 0.50*(LChemP_MAX + LChemP_MIN);
      Num_State = 0.0;
      for (i1=1; i1<=NUM; i1++){
        x = (koSys[i1] - LChemP)*Beta0;
        if (x<=-30.0) x = -30.0;
        if (30.0<=x)  x = 30.0;
        FermiF = 2.0/(1.0 + exp(x)); 
        Num_State = Num_State + FermiF;
      }
      Dnum = TZ - Num_State;
      if (0.0<=Dnum) LChemP_MIN = LChemP;
      else           LChemP_MAX = LChemP;
      if (fabs(Dnum)<0.000000000001) po = 1;
    }
    while (po==0); 

    if (3<=level_stdout){
      for (i1=1; i1<=NUM; i1++){
        x = (koSys[i1] - LChemP)*Beta0;
        if (x<=-30.0) x = -30.0;
        if (30.0<=x)  x = 30.0;
        FermiF = 1.0/(1.0 + exp(x)); 
        printf("<Init_CntCoes>  %2d  eigenvalue=%15.12f  FermiF=%15.12f\n",
                i1,koSys[i1],FermiF);
      }
    }

    if (3<=level_stdout){
      printf("\n");
      printf("first C Gc_AN=%2d\n",Gc_AN);
      for (i1=1; i1<=NUM; i1++){
        for (j1=1; j1<=NUM; j1++){
          printf("%10.6f ",C[i1][j1]); 
        }
        printf("\n");
      }
    }
    
    if (3<=level_stdout){
      printf("<Init_CntCoes>  LChemP=%15.12f\n",LChemP);
    }
    
    /************************************************
       maximize the "overlap" between wave functions 
       and contracted basis functions              
    ************************************************/

    /* make a table function converting [L0][Mul0][M0] to "al" for primitive orbitals */

    al = -1;
    for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){
      for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
        for (M0=0; M0<=2*L0; M0++){
          al++;
	  tmp_index1[L0][Mul0][M0] = al;
        }
      }
    }

    /* make a table function converting [L0][Mul0][M0] to "al" for contracted orbitals */

    al = -1;
    for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){
      for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){
	for (M0=0; M0<=2*L0; M0++){
	  al++;
	  tmp_index2[L0][Mul0][M0] = al;
	}
      }
    }

    /* loop for L0 */
     
    for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){

      for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
        for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){
          Hks[Mul0+1][Mul1+1] = 0.0;
        }
      }

      for (M0=0; M0<=2*L0; M0++){

	for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){

          i = tmp_index1[L0][Mul0][M0]; 

	  for (mu=1; mu<=NUM; mu++){
            InProd[mu][Mul0] = C[MP[0]+i][mu];
	  } /* mu */
	} /* Mul0 */

	for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
	  for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){

            sum = 0.0;
	    for (mu=1; mu<=NUM; mu++){

	      x = (koSys[mu] - LChemP)*Beta0;
	      if (x<=-30.0) x = -30.0;
	      if (30.0<=x)  x = 30.0;
	      FermiF = 1.0/(1.0 + exp(x)); 

              sum += FermiF*InProd[mu][Mul0]*InProd[mu][Mul1];   
	    }

            Hks[Mul0+1][Mul1+1] -= sum; 
  	  }

          /* for calculation of a single atom */

          tmp0 = (double)(Spe_Num_Basis[wan][L0]-Mul0); 
          Hks[Mul0+1][Mul0+1] += -1.0e-9*tmp0*tmp0;

	}

      } /* M0 */

      /*
      M0 = 0; 
      for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
        i = tmp_index1[L0][Mul0][M0]; 
	for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){
          j = tmp_index1[L0][Mul1][M0]; 
          S[Mul0+1][Mul1+1] = OLP[0][Mc_AN][0][i][j];
	}
      }
      */

      for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
	for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){
	  S[Mul0+1][Mul1+1] = 0.0;
	}
        S[Mul0+1][Mul0+1] = 1.0;
      }


      if (3<=level_stdout){
	printf("<Hks Gc_AN=%2d L0=%2d>\n",Gc_AN,L0);
	for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
	  for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){
	    printf("%15.10f ",Hks[Mul0+1][Mul1+1]);
	  }
	  printf("\n");
	}
      }

      if (3<=level_stdout){
	printf("<S Gc_AN=%2d L0=%2d>\n",Gc_AN,L0);
	for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
	  for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){
	    printf("%15.10f ",S[Mul0+1][Mul1+1]);
	  }
	  printf("\n");
	}
      }

      /* diagonalization */

      Np = Spe_Num_Basis[wan][L0];

      Eigen_lapack(S,ko,Np,Np);

      for (l=1; l<=Np; l++){
        M1[l] = 1.0/sqrt(ko[l]);
      }

      for (i1=1; i1<=Np; i1++){
	for (j1=1; j1<=Np; j1++){
	  sum = 0.0;
	  for (l=1; l<=Np; l++){
	    sum = sum + Hks[i1][l]*S[l][j1]*M1[j1]; 
	  }
	  C[i1][j1] = sum;
	}
      }

      for (i1=1; i1<=Np; i1++){
	for (j1=1; j1<=Np; j1++){
	  sum = 0.0;
	  for (l=1; l<=Np; l++){
	    sum = sum + M1[i1]*S[l][i1]*C[l][j1];
	  }
	  B[i1][j1] = sum;
	}
      }

      for (i1=1; i1<=Np; i1++){
	for (j1=1; j1<=Np; j1++){
	  D[i1][j1] = B[i1][j1];
	}
      }

      Eigen_lapack(D,ko,Np,Np);

      /* transformation to the original eigenvectors */
 
      for (i1=1; i1<=Np; i1++){
	for (j1=1; j1<=Np; j1++){
	  C[i1][j1] = 0.0;
	}
      }

      for (i1=1; i1<=Np; i1++){
	for (j1=1; j1<=Np; j1++){
	  sum = 0.0;
	  for (l=1; l<=Np; l++){
	    sum = sum + S[i1][l]*M1[l]*D[l][j1];
	  }
	  C[i1][j1] = sum;
	}
      }

      if (3<=level_stdout){
	printf("<Eigenvalues Gc_AN=%2d L0=%2d>\n",Gc_AN,L0);
	for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
	  printf("Mul=%2d ko=%15.12f\n",Mul0,ko[Mul0+1]);
	}

	printf("<C Gc_AN=%2d L0=%2d>\n",Gc_AN,L0);
	for (Mul0=0; Mul0<Spe_Num_Basis[wan][L0]; Mul0++){
	  for (Mul1=0; Mul1<Spe_Num_Basis[wan][L0]; Mul1++){
	    printf("%15.10f ",C[Mul0+1][Mul1+1]);
	  }
	  printf("\n");
	}
      }

      /* set up contraction coefficients */
     
      for (M0=0; M0<=2*L0; M0++){
	for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){

	  al = tmp_index2[L0][Mul0][M0];

          /* if (SCnt_switch==1) */
          if ( SCnt_switch==1 && Mul0==(Spe_Num_CBasis[wan][L0]-1) ){
	    for (p=0; p<Spe_Num_Basis[wan][L0]; p++){
	      CntCoes[Mc_AN][al][p] = C[p+1][1];
  	    }
	  }
 
          else {
	    for (p=0; p<Spe_Num_Basis[wan][L0]; p++){
	      CntCoes[Mc_AN][al][p] = C[p+1][Mul0+1];;
	    }
	  }

          maxc = -1.0e+10;  
	  for (p=0; p<Spe_Num_Basis[wan][L0]; p++){
	    if (maxc<fabs(CntCoes[Mc_AN][al][p])){
              maxc = fabs(CntCoes[Mc_AN][al][p]); 
              maxp = p;
	    }
	  }

          tmp0 = sgn(CntCoes[Mc_AN][al][maxp]);
	  for (p=0; p<Spe_Num_Basis[wan][L0]; p++){
	    CntCoes[Mc_AN][al][p] *= tmp0;
	  }

	}
      }

    } /* L0 */    

    if (3<=level_stdout){
      for (al=0; al<Spe_Total_CNO[wan]; al++){
        for (p=0; p<Spe_Specified_Num[wan][al]; p++){
          printf("A Init_CntCoes Mc_AN=%2d Gc_AN=%2d al=%2d p=%2d  %15.12f\n",
                  Mc_AN,Gc_AN,al,p,CntCoes[Mc_AN][al][p]);
        }
      }
    }

    /****************************************************
                        free arrays
    ****************************************************/

    for (i=0; i<n2; i++){
      free(InProd[i]);
    }
    free(InProd);

    free(koSys);
    free(ko);
    free(abs_sum);
    free(M1);
    free(dege);
    free(jun);
    free(ponu);
    free(C0);

    for (i=0; i<n2; i++){
      free(S[i]);
      free(Hks[i]);
      free(D[i]);
      free(C[i]);
      free(B[i]);
    }
    free(S);
    free(Hks);
    free(D);
    free(C);
    free(B);

  } /* Mc_AN */ 

  /*************************************************************
    in case of optimization of only the last orbital in each L
  *************************************************************/

  if (SCnt_switch==1){

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

      Gc_AN = M2G[Mc_AN];    
      wan = WhatSpecies[Gc_AN];

      al = -1;
      for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){
	for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){
	  for (M0=0; M0<=2*L0; M0++){

	    al++;

            if ( Mul0!=(Spe_Num_CBasis[wan][L0]-1) ){

	      for (p=0; p<Spe_Specified_Num[wan][al]; p++){
		CntCoes[Mc_AN][al][p] = 0.0;
	      }

  	      CntCoes[Mc_AN][al][Mul0] = 1.0;
            }

	  }
	}
      }
    }
  }

  /****************************************************
            average contraction coefficients
  ****************************************************/

  if (ACnt_switch==1){

    /* allocation */
    My_CntCoes_Spe = (double***)malloc(sizeof(double**)*(SpeciesNum+1));
    for (i=0; i<=SpeciesNum; i++){
      My_CntCoes_Spe[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
      for (j=0; j<List_YOUSO[7]; j++){
        My_CntCoes_Spe[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]);
      }
    }

    CntCoes_Spe = (double***)malloc(sizeof(double**)*(SpeciesNum+1));
    for (i=0; i<=SpeciesNum; i++){
      CntCoes_Spe[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
      for (j=0; j<List_YOUSO[7]; j++){
        CntCoes_Spe[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]);
      }
    }

    /* initialize */
    for (wan=0; wan<SpeciesNum; wan++){
      for (i=0; i<List_YOUSO[7]; i++){
        for (j=0; j<List_YOUSO[24]; j++){
          My_CntCoes_Spe[wan][i][j] = 0.0;
	}
      }
    }

    /* local sum in a proccessor */
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];    
      wan = WhatSpecies[Gc_AN];
      for (al=0; al<Spe_Total_CNO[wan]; al++){
        for (p=0; p<Spe_Specified_Num[wan][al]; p++){
          My_CntCoes_Spe[wan][al][p] += CntCoes[Mc_AN][al][p];
        }        
      }
    }

    /* global sum by MPI */
    for (wan=0; wan<SpeciesNum; wan++){
      for (al=0; al<Spe_Total_CNO[wan]; al++){
        for (p=0; p<Spe_Specified_Num[wan][al]; p++){
          MPI_Allreduce(&My_CntCoes_Spe[wan][al][p], &CntCoes_Spe[wan][al][p],
                         1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
	}
      }
    }    

    /* copy CntCoes_Spe to CntCoes_Species */
    for (wan=0; wan<SpeciesNum; wan++){
      for (al=0; al<Spe_Total_CNO[wan]; al++){
        for (p=0; p<Spe_Specified_Num[wan][al]; p++){
          CntCoes_Species[wan][al][p] = CntCoes_Spe[wan][al][p];
	}
      }
    }    

    /* CntCoes_Spe to CntCoes */
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];    
      wan = WhatSpecies[Gc_AN];
      for (al=0; al<Spe_Total_CNO[wan]; al++){
        for (p=0; p<Spe_Specified_Num[wan][al]; p++){
          CntCoes[Mc_AN][al][p] = CntCoes_Spe[wan][al][p];
        }        
      }
    }

    /* free */
    for (i=0; i<=SpeciesNum; i++){
      for (j=0; j<List_YOUSO[7]; j++){
        free(My_CntCoes_Spe[i][j]);
      }
      free(My_CntCoes_Spe[i]);
    }
    free(My_CntCoes_Spe);

    for (i=0; i<=SpeciesNum; i++){
      for (j=0; j<List_YOUSO[7]; j++){
        free(CntCoes_Spe[i][j]);
      }
      free(CntCoes_Spe[i]);
    }
    free(CntCoes_Spe);

  }

  /**********************************************
    transformation of optimized orbitals by 
    an extended Gauss elimination and 
    the Gram-Schmidt orthogonalization
  ***********************************************/

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];    
    wan = WhatSpecies[Gc_AN];

    al = -1;
    for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){
      for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){
	for (M0=0; M0<=2*L0; M0++){
	  al++;
	  tmp_index2[L0][Mul0][M0] = al;
	}
      }
    }

    for (L0=0; L0<=Spe_MaxL_Basis[wan]; L0++){
      for (M0=0; M0<=2*L0; M0++){

	/**********************************************
                  extended Gauss elimination
	***********************************************/

	for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){
	  al0 = tmp_index2[L0][Mul0][M0]; 
	  for (Mul1=0; Mul1<Spe_Num_CBasis[wan][L0]; Mul1++){
	    al1 = tmp_index2[L0][Mul1][M0];

	    if (Mul1!=Mul0){

	      tmp0 = CntCoes[Mc_AN][al0][Mul0]; 
	      tmp1 = CntCoes[Mc_AN][al1][Mul0]; 

	      for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
		CntCoes[Mc_AN][al1][p] -= CntCoes[Mc_AN][al0][p]/tmp0*tmp1;
	      }
	    }

	  }
	}

	/**********************************************
           orthonormalization of initial contraction 
           coefficients
        ***********************************************/

	for (Mul0=0; Mul0<Spe_Num_CBasis[wan][L0]; Mul0++){
	  al0 = tmp_index2[L0][Mul0][M0]; 

	  /* x - sum_i <x|e_i>e_i */

	  for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
	    Tmp_CntCoes[p] = 0.0;
	  }
         
	  for (Mul1=0; Mul1<Mul0; Mul1++){
	    al1 = tmp_index2[L0][Mul1][M0];

	    sum = 0.0;
	    for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
	      sum = sum + CntCoes[Mc_AN][al0][p]*CntCoes[Mc_AN][al1][p];
	    }

	    for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
	      Tmp_CntCoes[p] = Tmp_CntCoes[p] + sum*CntCoes[Mc_AN][al1][p];
	    }
	  }

	  for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
	    CntCoes[Mc_AN][al0][p] = CntCoes[Mc_AN][al0][p] - Tmp_CntCoes[p];
	  }

	  /* Normalize */

	  sum = 0.0;
	  Max0 = -100.0;
	  pmax = 0;
	  for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
	    sum = sum + CntCoes[Mc_AN][al0][p]*CntCoes[Mc_AN][al0][p];
	    if (Max0<fabs(CntCoes[Mc_AN][al0][p])){
	      Max0 = fabs(CntCoes[Mc_AN][al0][p]);
	      pmax = p;
	    }
	  }

	  if (fabs(sum)<1.0e-11)
	    tmp0 = 0.0;
	  else 
	    tmp0 = 1.0/sqrt(sum); 

	  tmp1 = sgn(CntCoes[Mc_AN][al0][pmax]);
            
	  for (p=0; p<Spe_Specified_Num[wan][al0]; p++){
	    CntCoes[Mc_AN][al0][p] = tmp0*tmp1*CntCoes[Mc_AN][al0][p];
	  }

	}
      }
    }

  } /* Mc_AN */

  /****************************************************
                     Normalization
  ****************************************************/

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
    Gc_AN = M2G[Mc_AN];    
    wan = WhatSpecies[Gc_AN];
    for (al=0; al<Spe_Total_CNO[wan]; al++){

      sum = 0.0;
      for (p=0; p<Spe_Specified_Num[wan][al]; p++){
	p0 = Spe_Trans_Orbital[wan][al][p];

	for (q=0; q<Spe_Specified_Num[wan][al]; q++){
          q0 = Spe_Trans_Orbital[wan][al][q];

          tmp0 = CntCoes[Mc_AN][al][p]*CntCoes[Mc_AN][al][q];
          sum = sum + tmp0*OLP[0][Mc_AN][0][p0][q0]; 
        }
      }

      tmp0 = 1.0/sqrt(sum);
      for (p=0; p<Spe_Specified_Num[wan][al]; p++){
        CntCoes[Mc_AN][al][p] = CntCoes[Mc_AN][al][p]*tmp0;
      } 
    }

    if (3<=level_stdout){
      for (al=0; al<Spe_Total_CNO[wan]; al++){
        for (p=0; p<Spe_Specified_Num[wan][al]; p++){
          printf("B Init_CntCoes Mc_AN=%2d Gc_AN=%2d al=%2d p=%2d  %15.12f\n",
                  Mc_AN,Gc_AN,al,p,CntCoes[Mc_AN][al][p]);
        }
      }
    }

  } /* Mc_AN */

 Simple_Init:

  /****************************************************
    MPI:

    CntCoes_Species
  ****************************************************/

  for (wan=0; wan<SpeciesNum; wan++){

    Gc_AN = 1;
    po = 0;

    do {

      wanA = WhatSpecies[Gc_AN];

      if (wan==wanA){

        ID = G2ID[Gc_AN];
        Mc_AN = F_G2M[Gc_AN]; 

        for (al=0; al<Spe_Total_CNO[wan]; al++){
          for (p=0; p<Spe_Specified_Num[wan][al]; p++){

            if (ID==myid) tmp0 = CntCoes[Mc_AN][al][p];

            MPI_Bcast(&tmp0, 1, MPI_DOUBLE, ID, mpi_comm_level1);
            CntCoes_Species[wan][al][p] = tmp0; 
	  }
	}

        po = 1;
      }

      Gc_AN++;

    } while (po==0 && Gc_AN<=atomnum);
  }

  /****************************************************
    MPI:

    CntCoes
  ****************************************************/

  /***********************************
             set data size
  ************************************/

  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){
      tag = 999;

      /* find data size to send block data */
      if (F_Snd_Num[IDS]!=0){

        size1 = 0;
        for (n=0; n<F_Snd_Num[IDS]; n++){
          Mc_AN = Snd_MAN[IDS][n];
          Gc_AN = Snd_GAN[IDS][n];
          wan = WhatSpecies[Gc_AN]; 
          for (al=0; al<Spe_Total_CNO[wan]; al++){
            size1 += Spe_Specified_Num[wan][al];
	  }
	}

        Snd_CntCoes_Size[IDS] = size1;
        MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
      }
      else{
        Snd_CntCoes_Size[IDS] = 0;
      }

      /* receiving of size of data */

      if (F_Rcv_Num[IDR]!=0){
        MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
        Rcv_CntCoes_Size[IDR] = size2;
      }
      else{
        Rcv_CntCoes_Size[IDR] = 0;
      }
    
      if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);

    }
    else {
      Snd_CntCoes_Size[myid] = 0;
      Rcv_CntCoes_Size[myid] = 0;
    }
  }

  /***********************************
             data transfer
  ************************************/

  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){

      /*****************************
              sending of data 
      *****************************/

      if (F_Snd_Num[IDS]!=0){

        size1 = Snd_CntCoes_Size[IDS];

        /* allocation of array */
        tmp_array = (double*)malloc(sizeof(double)*size1);

        /* multidimentional array to vector array */

        num = 0;
        for (n=0; n<F_Snd_Num[IDS]; n++){
          Mc_AN = Snd_MAN[IDS][n];
          Gc_AN = Snd_GAN[IDS][n];
          wan = WhatSpecies[Gc_AN]; 
          for (al=0; al<Spe_Total_CNO[wan]; al++){
            for (p=0; p<Spe_Specified_Num[wan][al]; p++){
              tmp_array[num] = CntCoes[Mc_AN][al][p];
              num++;
  	    }
	  }
        }

        MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
      }

      /*****************************
         receiving of block data
      *****************************/

      if (F_Rcv_Num[IDR]!=0){

        size2 = Rcv_CntCoes_Size[IDR];

        /* allocation of array */
        tmp_array2 = (double*)malloc(sizeof(double)*size2);

        MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);

        num = 0;
        Mc_AN = F_TopMAN[IDR] - 1;
        for (n=0; n<F_Rcv_Num[IDR]; n++){
          Mc_AN++;
          Gc_AN = Rcv_GAN[IDR][n];
          wan = WhatSpecies[Gc_AN];
          for (al=0; al<Spe_Total_CNO[wan]; al++){
            for (p=0; p<Spe_Specified_Num[wan][al]; p++){
              CntCoes[Mc_AN][al][p] = tmp_array2[num];
              num++;
	    }
  	  }
        }

        /* freeing of array */
        free(tmp_array2);
      }

      if (F_Snd_Num[IDS]!=0){
        MPI_Wait(&request,&stat);
        free(tmp_array); /* freeing of array */
      }
    }
  }

  /****************************************************
    freeing of arrays:

     int MP[List_YOUSO[8]];

     int tmp_index[List_YOUSO[25]+1]
                         [2*(List_YOUSO[25]+1)+1];
     int tmp_index1[List_YOUSO[25]+1]
                          [List_YOUSO[24]]
                          [2*(List_YOUSO[25]+1)+1];
     int tmp_index2[List_YOUSO[25]+1]
                          [List_YOUSO[24]]
                          [2*(List_YOUSO[25]+1)+1];
 
     double Tmp_CntCoes[List_YOUSO[24]] 

     double Check_ko[List_YOUSO[25]+1]
                           [2*(List_YOUSO[25]+1)+1];

     double Weight_ko[List_YOUSO[7]];
  ****************************************************/

  free(MP);
  
  for (i=0; i<(List_YOUSO[25]+1); i++){
    free(tmp_index[i]);
  }
  free(tmp_index);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      free(tmp_index1[i][j]);
    }
    free(tmp_index1[i]);
  }
  free(tmp_index1);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      free(tmp_index2[i][j]);
    }
    free(tmp_index2[i]);
  }
  free(tmp_index2);

  free(Tmp_CntCoes);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    free(Check_ko[i]);
  }
  free(Check_ko);

  free(Weight_ko);

  free(Snd_CntCoes_Size);
  free(Rcv_CntCoes_Size);

  free(Snd_H_Size);
  free(Rcv_H_Size);

  free(Snd_S_Size);
  free(Rcv_S_Size);

  /* for elapsed time */
  dtime(&TEtime);
  time0 = TEtime - TStime;
}
Example #6
0
/** Determine grid sizes. If this is the first time this routine is called
  * or the grid sizes have changed (re)allocate the grid and set up the
  * grid pointers.
  */
int PairList::SetupGrids(Vec3 const& recipLengths) {
  int offsetX = cellOffset_;
  int offsetY = cellOffset_;
  int offsetZ = cellOffset_;

  double dc1 = cutList_ / (double)offsetX;
  double dc2 = cutList_ / (double)offsetY;
  double dc3 = cutList_ / (double)offsetZ;

  nGridX_ = std::max(1, (int)(recipLengths[0] / dc1)); // nucgrd1
  nGridY_ = std::max(1, (int)(recipLengths[1] / dc2));
  nGridZ_ = std::max(1, (int)(recipLengths[2] / dc3));

  // Check if grid (re)allocation needs to happen
  if ( nGridX_ == nGridX_0_ &&
       nGridY_ == nGridY_0_ &&
       nGridZ_ == nGridZ_0_ )
    return 0;
  if (nGridX_0_ != -1) // -1 is the initial allocation
    mprintf("Warning: Unit cell size has changed so much that grid must be recalculated.\n"
            "Warning: Old sizes= {%i, %i, %i}  New sizes= {%i, %i, %i}\n",
            nGridX_0_, nGridY_0_, nGridZ_0_, nGridX_, nGridY_, nGridZ_);
  nGridX_0_ = nGridX_;
  nGridY_0_ = nGridY_;
  nGridZ_0_ = nGridZ_;

  // TODO Add non-periodic case
  // Check short range cutoff
  dc1 = recipLengths[0] / (double)nGridX_;
  dc2 = recipLengths[1] / (double)nGridY_;
  dc3 = recipLengths[2] / (double)nGridZ_;
  double cut = (double)offsetX * dc1;
  if (offsetY*dc2 < cut)
    cut = (double)offsetY*dc2;
  if (offsetZ*dc3 < cut)
    cut = (double)offsetZ*dc3;
  //if(nogrdptrs)cut=cutlist
  // Allocation
  int nGridMax = nGridX_ * nGridY_ * nGridZ_;
  cells_.clear();
  cells_.resize( nGridMax );
  if (debug_ > 0) {
    mprintf("DEBUG: Number of grids per unit cell in each dimension: %i %i %i\n",
            nGridX_, nGridY_, nGridZ_);
    //mprintf("Unit cell edge lengths in each dimension: %9.3f %9.3f %9.3f\n",);
    mprintf("DEBUG: Distance between parallel faces of unit cell: %9.3f %9.3f %9.3f\n",
            recipLengths[0], recipLengths[1], recipLengths[2]);
    mprintf("DEBUG: Distance between faces of short range grid subcells: %9.3f %9.3f %9.3f\n",
            dc1, dc2, dc3);
    mprintf("DEBUG: Resulting cutoff from subcell neighborhoods is %f\n", cut);
    mprintf("%zu total grid cells\n", cells_.size());
  }
  if (cut < cutList_) {
    mprinterr("Error: Resulting cutoff %f too small for lower limit %f\n", cut, cutList_);
    return 1;
  }

  // NOTE: myindex are for parallelization later (maybe).
  int myindexlo = 0;
  int myindexhi = (int)cells_.size();
  CalcGridPointers(myindexlo, myindexhi);

  PrintMemory();

  return 0;
}
Example #7
0
int run_main(int argc, char *argv[], int numprocs0, int myid0) 
{
  int MD_iter,i,j,po,ip;
  char fileE[YOUSO10] = ".ene"; 
  char fileDRC[YOUSO10] = ".md";
  char fileMemory[YOUSO10]; 
  char fileRestart[YOUSO10];
  char operate[200];
  double TStime,TEtime;

  /* for idle CPUs */
  int tag;
  int complete;
  MPI_Request request;
  MPI_Status  status;

  /* for measuring elapsed time */

  dtime(&TStime);

  /* allocation of CompTime */
  CompTime = (double**)malloc(sizeof(double*)*numprocs0); 
  for (i=0; i<numprocs0; i++){
    CompTime[i] = (double*)malloc(sizeof(double)*30); 
    for (j=0; j<30; j++) CompTime[i][j] = 0.0;
  }

  if (myid0==Host_ID){  
    printf("\n*******************************************************\n"); 
    printf("*******************************************************\n"); 
    printf(" Welcome to OpenMX   Ver. %s                           \n",Version_OpenMX); 
    printf(" Copyright (C), 2002-2009, T.Ozaki                     \n"); 
    printf(" OpenMX comes with ABSOLUTELY NO WARRANTY.             \n"); 
    printf(" This is free software, and you are welcome to         \n"); 
    printf(" redistribute it under the constitution of the GNU-GPL.\n");
    printf("*******************************************************\n"); 
    printf("*******************************************************\n\n"); 
  } 

  Init_List_YOUSO();
  remake_headfile = 0;
  ScaleSize = 1.2; 

  /****************************************************
                   Read the input file
  ****************************************************/

  init_alloc_first();
  CompTime[myid0][1] = readfile(argv);
  MPI_Barrier(MPI_COMM_WORLD1);

  /* initialize PrintMemory routine */

  sprintf(fileMemory,"%s%s.memory%i",filepath,filename,myid0);
  PrintMemory(fileMemory,0,"init"); 
  PrintMemory_Fix();
 
  /* initialize */
  
  init();
  fnjoint(filepath,filename,fileE);
  fnjoint(filepath,filename,fileDRC);

  /****************************************************
      SCF-DFT calculations and MD and geometrical
      optimization.
  ****************************************************/

  MD_iter = 1;

  do {

    CompTime[myid0][2] += truncation(MD_iter,1);

    if (ML_flag==1 && myid0==Host_ID) Get_VSZ(MD_iter);  

    if (Solver==4) {
      TRAN_Calc_GridBound( mpi_comm_level1, atomnum, WhatSpecies, Spe_Atom_Cut1,
                           Ngrid1, Grid_Origin, Gxyz, tv, gtv, rgtv, Left_tv, Right_tv );

      /* output: TRAN_region[], TRAN_grid_bound */
    }

    CompTime[myid0][3] += DFT(MD_iter,(MD_iter-1)%orbitalOpt_per_MDIter+1);
    if (myid0==Host_ID) iterout(MD_iter,MD_TimeStep*MD_iter,fileE,fileDRC);

    if (ML_flag==0) CompTime[myid0][4] += MD_pac(MD_iter,argv[1]);

    MD_iter++;

  } while(MD_Opt_OK==0 && MD_iter<=MD_IterNumber);

  if ( TRAN_output_hks ) {
    /* left is dummy */
    TRAN_RestartFile(mpi_comm_level1, "write","left",filepath,TRAN_hksoutfilename);
  }

  /****************************************************
               calculate Voronoi charge
  ****************************************************/
 
  if (Voronoi_Charge_flag==1) Voronoi_Charge();

  /****************************************************
  making of a file *.frac for the fractional coordinates
  ****************************************************/

  Make_FracCoord(argv[1]);

  /****************************************************
   generate Wannier functions added by Hongming Weng
  ****************************************************/

  /* hmweng */
  if(Wannier_Func_Calc){
    if (myid0==Host_ID) printf("Calling Generate_Wannier...\n");fflush(0);

    Generate_Wannier(argv[1]);
  }

  /****************************************************
                  Making of output files
  ****************************************************/

  CompTime[myid0][20] = OutData(argv[1]);

  /****************************************************
    write connectivity, Hamiltonian, overlap, density
    matrices, and etc. to a file, filename.scfout 
  ****************************************************/

  if (HS_fileout==1) SCF2File("write",argv[1]);

  /* elapsed time */

  dtime(&TEtime);
  CompTime[myid0][0] = TEtime - TStime;
  Output_CompTime();
  for (i=0; i<numprocs0; i++){
    free(CompTime[i]);
  }
  free(CompTime);

  /* merge log files */

  Merge_LogFile(argv[1]);

  /* free arrays */

  Free_Arrays(0);

  /* print memory */

  PrintMemory("total",0,"sum");

  /****************************************************
         reconstruct the original MPI group
  ****************************************************/

  {
    int *new_ranks; 
    MPI_Group  new_group,old_group; 

    new_ranks = (int*)malloc(sizeof(int)*numprocs0);
    for (i=0; i<numprocs0; i++) {
      new_ranks[i]=i; /* a new group is made of original rank=0:Pnum[k]-1 */
    }

    MPI_Comm_group(MPI_COMM_WORLD1, &old_group);

    /* define a new group */
    MPI_Group_incl(old_group,numprocs0,new_ranks,&new_group);
    MPI_Comm_create(MPI_COMM_WORLD1,new_group,&mpi_comm_level1);

    MPI_Group_free(&new_group);
    free(new_ranks); /* never forget cleaning! */
  }

  MPI_Barrier(MPI_COMM_WORLD1);
  if (myid0==Host_ID){
    printf("\nThe calculation was normally finished.\n");
  }

  return 0;
}
Example #8
0
void Set_XC_Grid(int XC_P_switch, int XC_switch)
{
  /****************************************************
        XC_P_switch:
            0  \epsilon_XC (XC energy density)  
            1  \mu_XC      (XC potential)  
            2  \epsilon_XC - \mu_XC
  ****************************************************/

  int MN,MN1,MN2,i,j,k,ri,ri1,ri2;
  int i1,i2,j1,j2,k1,k2,n,nmax;
  double den_min=1.0e-14; 
  double Ec_unif[1],Vc_unif[2],Exc[2],Vxc[2];
  double Ex_unif[1],Vx_unif[2],tot_den;
  double ED[2],GDENS[3][2];
  double DEXDD[2],DECDD[2];
  double DEXDGD[3][2],DECDGD[3][2];
  double ***dEXC_dGD,***dDen_Grid;
  double up_x_a,up_x_b,up_x_c;
  double up_y_a,up_y_b,up_y_c;
  double up_z_a,up_z_b,up_z_c;
  double dn_x_a,dn_x_b,dn_x_c;
  double dn_y_a,dn_y_b,dn_y_c;
  double dn_z_a,dn_z_b,dn_z_c;
  double up_a,up_b,up_c;
  double dn_a,dn_b,dn_c;

  double tmp0,tmp1;
  double cot,sit,sip,cop,phi,theta;
  double detA,igtv[4][4];
  int numprocs,myid;

  /* for OpenMP */
  int OMPID,Nthrds,Nprocs;

  /****************************************************
   when GGA, allocation

   double dEXC_dGD[2][3][My_NumGrid1]
   double dDen_Grid[2][3][My_NumGrid1]
  ****************************************************/

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  if (XC_switch==4){

    dDen_Grid = (double***)malloc(sizeof(double**)*2); 
    for (k=0; k<=1; k++){
      dDen_Grid[k] = (double**)malloc(sizeof(double*)*3); 
      for (i=0; i<3; i++){
        dDen_Grid[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); 
        for (j=0; j<My_NumGrid1; j++) dDen_Grid[k][i][j] = 0.0;
      }
    }

    if (XC_P_switch!=0){
      dEXC_dGD = (double***)malloc(sizeof(double**)*2); 
      for (k=0; k<=1; k++){
        dEXC_dGD[k] = (double**)malloc(sizeof(double*)*3); 
        for (i=0; i<3; i++){
          dEXC_dGD[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); 
          for (j=0; j<My_NumGrid1; j++) dEXC_dGD[k][i][j] = 0.0;
        }
      }
    }

    /* PrintMemory */
    PrintMemory("Set_XC_Grid: dDen_Grid", sizeof(double)*6*My_NumGrid1, NULL);
    PrintMemory("Set_XC_Grid: dEXC_dGD",  sizeof(double)*6*My_NumGrid1, NULL);

    /****************************************************
     calculate dDen_Grid
    ****************************************************/
 
    detA =   gtv[1][1]*gtv[2][2]*gtv[3][3]
           + gtv[1][2]*gtv[2][3]*gtv[3][1]
           + gtv[1][3]*gtv[2][1]*gtv[3][2]
           - gtv[1][3]*gtv[2][2]*gtv[3][1]
           - gtv[1][2]*gtv[2][1]*gtv[3][3]
           - gtv[1][1]*gtv[2][3]*gtv[3][2];     

    igtv[1][1] =  (gtv[2][2]*gtv[3][3] - gtv[2][3]*gtv[3][2])/detA;
    igtv[2][1] = -(gtv[2][1]*gtv[3][3] - gtv[2][3]*gtv[3][1])/detA;
    igtv[3][1] =  (gtv[2][1]*gtv[3][2] - gtv[2][2]*gtv[3][1])/detA; 

    igtv[1][2] = -(gtv[1][2]*gtv[3][3] - gtv[1][3]*gtv[3][2])/detA;
    igtv[2][2] =  (gtv[1][1]*gtv[3][3] - gtv[1][3]*gtv[3][1])/detA;
    igtv[3][2] = -(gtv[1][1]*gtv[3][2] - gtv[1][2]*gtv[3][1])/detA; 

    igtv[1][3] =  (gtv[1][2]*gtv[2][3] - gtv[1][3]*gtv[2][2])/detA;
    igtv[2][3] = -(gtv[1][1]*gtv[2][3] - gtv[1][3]*gtv[2][1])/detA;
    igtv[3][3] =  (gtv[1][1]*gtv[2][2] - gtv[1][2]*gtv[2][1])/detA; 

#pragma omp parallel shared(igtv,dDen_Grid,PCCDensity_Grid,PCC_switch,Density_Grid,den_min,My_Cell0,My_Cell1,Ngrid3,Ngrid2,Num_Cells0) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_a,dn_a,up_b,dn_b,up_c,dn_c)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();
      nmax = Num_Cells0*Ngrid2*Ngrid3; 

      for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){

	i = n/(Ngrid2*Ngrid3);
	j = (n-i*Ngrid2*Ngrid3)/Ngrid3;
	k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; 
	ri = My_Cell1[i];

	/* find ri1, ri2, i1, and i2 */

	if (ri==0){
	  ri1 = Ngrid1 - 1;
	  ri2 = 1;        
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}
	else if (ri==(Ngrid1-1)){
	  ri1 = Ngrid1 - 2;
	  ri2 = 0;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}      
	else{
	  ri1 = ri - 1;
	  ri2 = ri + 1;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}

	/* because we have +-1 buffer cells. */

	if (i1!=-1 && i2!=-1){

	  /* find j1 and j2 */

	  if (j==0){
	    j1 = Ngrid2 - 1;
	    j2 = 1;
	  }
	  else if (j==(Ngrid2-1)){
	    j1 = Ngrid2 - 2;
	    j2 = 0;
	  }
	  else{
	    j1 = j - 1;
	    j2 = j + 1;
	  }

	  /* find k1 and k2 */

	  if (k==0){
	    k1 = Ngrid3 - 1;
	    k2 = 1;
	  }
	  else if (k==(Ngrid3-1)){
	    k1 = Ngrid3 - 2;
	    k2 = 0;
	  }
	  else{
	    k1 = k - 1;
	    k2 = k + 1;
	  }  

	  /* set MN */

	  MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; 

	  /* set dDen_Grid */

	  if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){

	    /* a-axis */

	    MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k;
	    MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k;

	    if (PCC_switch==0) {
	      up_a = Density_Grid[0][MN2] - Density_Grid[0][MN1];
	      dn_a = Density_Grid[1][MN2] - Density_Grid[1][MN1];
	    }
	    else if (PCC_switch==1) {
	      up_a = Density_Grid[0][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[0][MN1] - PCCDensity_Grid[MN1];
	      dn_a = Density_Grid[1][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[1][MN1] - PCCDensity_Grid[MN1];
	    }

	    /* b-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; 
	    MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; 

	    if (PCC_switch==0) {
	      up_b = Density_Grid[0][MN2] - Density_Grid[0][MN1];
	      dn_b = Density_Grid[1][MN2] - Density_Grid[1][MN1];
	    }
	    else if (PCC_switch==1) {
	      up_b = Density_Grid[0][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[0][MN1] - PCCDensity_Grid[MN1];
	      dn_b = Density_Grid[1][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[1][MN1] - PCCDensity_Grid[MN1];
	    }

	    /* c-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; 
	    MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; 

	    if (PCC_switch==0) {
	      up_c = Density_Grid[0][MN2] - Density_Grid[0][MN1];
	      dn_c = Density_Grid[1][MN2] - Density_Grid[1][MN1];
	    }
	    else if (PCC_switch==1) {
	      up_c = Density_Grid[0][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[0][MN1] - PCCDensity_Grid[MN1];
	      dn_c = Density_Grid[1][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[1][MN1] - PCCDensity_Grid[MN1];
	    }

	    /* up */
	    dDen_Grid[0][0][MN] = 0.5*(igtv[1][1]*up_a + igtv[1][2]*up_b + igtv[1][3]*up_c);
	    dDen_Grid[0][1][MN] = 0.5*(igtv[2][1]*up_a + igtv[2][2]*up_b + igtv[2][3]*up_c);
	    dDen_Grid[0][2][MN] = 0.5*(igtv[3][1]*up_a + igtv[3][2]*up_b + igtv[3][3]*up_c);

	    /* down */
	    dDen_Grid[1][0][MN] = 0.5*(igtv[1][1]*dn_a + igtv[1][2]*dn_b + igtv[1][3]*dn_c);
	    dDen_Grid[1][1][MN] = 0.5*(igtv[2][1]*dn_a + igtv[2][2]*dn_b + igtv[2][3]*dn_c);
	    dDen_Grid[1][2][MN] = 0.5*(igtv[3][1]*dn_a + igtv[3][2]*dn_b + igtv[3][3]*dn_c);
	  }

	  else{
	    dDen_Grid[0][0][MN] = 0.0;
	    dDen_Grid[0][1][MN] = 0.0;
	    dDen_Grid[0][2][MN] = 0.0;
	    dDen_Grid[1][0][MN] = 0.0;
	    dDen_Grid[1][1][MN] = 0.0;
	    dDen_Grid[1][2][MN] = 0.0;
	  }

	} /* if (i1!=-1 && i2!=-1) */
      } /* n */

#pragma omp flush(dDen_Grid)

    } /* #pragma omp parallel */
  } /* if (XC_switch==4) */ 

  /****************************************************
   loop MN
  ****************************************************/

#pragma omp parallel shared(dDen_Grid,dEXC_dGD,den_min,Vxc_Grid,My_NumGrid1,XC_P_switch,XC_switch,Density_Grid,PCC_switch,PCCDensity_Grid) private(OMPID,Nthrds,Nprocs,MN,tot_den,tmp0,ED,Exc,Ec_unif,Vc_unif,Vxc,Ex_unif,Vx_unif,GDENS,DEXDD,DECDD,DEXDGD,DECDGD)
  {

    OMPID = omp_get_thread_num();
    Nthrds = omp_get_num_threads();
    Nprocs = omp_get_num_procs();

    for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){

      switch(XC_switch){
        
	/******************************************************************
         LDA (Ceperly-Alder)

         constructed by Ceperly and Alder,
         ref.
         D. M. Ceperley, Phys. Rev. B18, 3126 (1978)
         D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) 

         and parametrized by Perdew and Zunger.
         ref.
         J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981)
	******************************************************************/
        
      case 1:
        
	tot_den = Density_Grid[0][MN] + Density_Grid[1][MN];

	/* partial core correction */
	if (PCC_switch==1) {
	  tot_den += PCCDensity_Grid[MN]*2.0;
	}

	tmp0 = XC_Ceperly_Alder(tot_den,XC_P_switch);
	Vxc_Grid[0][MN] = tmp0;
	Vxc_Grid[1][MN] = tmp0;
        
	break;

	/******************************************************************
         LSDA-CA (Ceperly-Alder)

         constructed by Ceperly and Alder,
         ref.
         D. M. Ceperley, Phys. Rev. B18, 3126 (1978)
         D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) 

         and parametrized by Perdew and Zunger.
         ref.
         J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981)
	******************************************************************/

      case 2:

	ED[0] = Density_Grid[0][MN];
	ED[1] = Density_Grid[1][MN];

	/* partial core correction */
	if (PCC_switch==1) {
	  ED[0] += PCCDensity_Grid[MN];
	  ED[1] += PCCDensity_Grid[MN];
	}

	XC_CA_LSDA(ED[0], ED[1], Exc, XC_P_switch);
	Vxc_Grid[0][MN] = Exc[0];
	Vxc_Grid[1][MN] = Exc[1];

	break;

	/******************************************************************
         LSDA-PW (PW91)
         used as Grad\rho = 0 in their GGA formalism

         ref.
         J.P.Perdew and Yue Wang, Phys. Rev. B45, 13244 (1992) 
	******************************************************************/

      case 3:

	ED[0] = Density_Grid[0][MN];
	ED[1] = Density_Grid[1][MN];

	/* partial core correction */
	if (PCC_switch==1) {
	  ED[0] += PCCDensity_Grid[MN];
	  ED[1] += PCCDensity_Grid[MN];
	}

	if ((ED[0]+ED[1])<den_min){
	  Vxc_Grid[0][MN] = 0.0;
	  Vxc_Grid[1][MN] = 0.0;
	}
	else{
      
	  if (XC_P_switch==0){

	    XC_PW91C(ED,Ec_unif,Vc_unif);

	    Vxc[0] = Vc_unif[0];
	    Vxc[1] = Vc_unif[1];
	    Exc[0] = Ec_unif[0];

	    XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif);
	    Vxc[0] = Vxc[0] + Vx_unif[0];
	    Exc[1] = 2.0*ED[0]*Ex_unif[0];

	    XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif);
	    Vxc[1] += Vx_unif[0];
	    Exc[1] += 2.0*ED[1]*Ex_unif[0];

	    Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]);

	    Vxc_Grid[0][MN] = Exc[0] + Exc[1];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1];
	  }

	  else if (XC_P_switch==1){
	    XC_PW91C(ED,Ec_unif,Vc_unif);
	    Vxc_Grid[0][MN] = Vc_unif[0];
	    Vxc_Grid[1][MN] = Vc_unif[1];

	    XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif);
	    Vxc_Grid[0][MN] = Vxc_Grid[0][MN] + Vx_unif[0];

	    XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif);
	    Vxc_Grid[1][MN] = Vxc_Grid[1][MN] + Vx_unif[0];
	  }

	  else if (XC_P_switch==2){

	    XC_PW91C(ED,Ec_unif,Vc_unif);

	    Vxc[0] = Vc_unif[0];
	    Vxc[1] = Vc_unif[1];
	    Exc[0] = Ec_unif[0];

	    XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif);
	    Vxc[0]  = Vxc[0] + Vx_unif[0];
	    Exc[1]  = 2.0*ED[0]*Ex_unif[0];

	    XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif);
	    Vxc[1] += Vx_unif[0];
	    Exc[1] += 2.0*ED[1]*Ex_unif[0];

	    Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]);

	    Vxc_Grid[0][MN] = Exc[0] + Exc[1] - Vxc[0];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1] - Vxc[1];
	  }
	}

	break;

	/******************************************************************
         GGA-PBE
         ref.
         J. P. Perdew, K. Burke, and M. Ernzerhof,
         Phys. Rev. Lett. 77, 3865 (1996).
	******************************************************************/

      case 4:

	/****************************************************
         ED[0]       density of up spin:     n_up   
         ED[1]       density of down spin:   n_down

         GDENS[0][0] derivative (x) of density of up spin
         GDENS[1][0] derivative (y) of density of up spin
         GDENS[2][0] derivative (z) of density of up spin
         GDENS[0][1] derivative (x) of density of down spin
         GDENS[1][1] derivative (y) of density of down spin
         GDENS[2][1] derivative (z) of density of down spin

         DEXDD[0]    d(fx)/d(n_up) 
         DEXDD[1]    d(fx)/d(n_down) 
         DECDD[0]    d(fc)/d(n_up) 
         DECDD[1]    d(fc)/d(n_down) 

         n'_up_x   = d(n_up)/d(x)
         n'_up_y   = d(n_up)/d(y)
         n'_up_z   = d(n_up)/d(z)
         n'_down_x = d(n_down)/d(x)
         n'_down_y = d(n_down)/d(y)
         n'_down_z = d(n_down)/d(z)
       
         DEXDGD[0][0] d(fx)/d(n'_up_x) 
         DEXDGD[1][0] d(fx)/d(n'_up_y) 
         DEXDGD[2][0] d(fx)/d(n'_up_z) 
         DEXDGD[0][1] d(fx)/d(n'_down_x) 
         DEXDGD[1][1] d(fx)/d(n'_down_y) 
         DEXDGD[2][1] d(fx)/d(n'_down_z) 

         DECDGD[0][0] d(fc)/d(n'_up_x) 
         DECDGD[1][0] d(fc)/d(n'_up_y) 
         DECDGD[2][0] d(fc)/d(n'_up_z) 
         DECDGD[0][1] d(fc)/d(n'_down_x) 
         DECDGD[1][1] d(fc)/d(n'_down_y) 
         DECDGD[2][1] d(fc)/d(n'_down_z) 
	****************************************************/

	ED[0] = Density_Grid[0][MN];
	ED[1] = Density_Grid[1][MN];

	if ((ED[0]+ED[1])<den_min){
	  Vxc_Grid[0][MN] = 0.0;
	  Vxc_Grid[1][MN] = 0.0;

	  /* later add its derivatives */
	  if (XC_P_switch!=0){
	    dEXC_dGD[0][0][MN] = 0.0;
	    dEXC_dGD[0][1][MN] = 0.0;
	    dEXC_dGD[0][2][MN] = 0.0;

	    dEXC_dGD[1][0][MN] = 0.0;
	    dEXC_dGD[1][1][MN] = 0.0;
	    dEXC_dGD[1][2][MN] = 0.0;
	  }
	}
     
	else{

	  GDENS[0][0] = dDen_Grid[0][0][MN];
	  GDENS[1][0] = dDen_Grid[0][1][MN];
	  GDENS[2][0] = dDen_Grid[0][2][MN];
	  GDENS[0][1] = dDen_Grid[1][0][MN];
	  GDENS[1][1] = dDen_Grid[1][1][MN];
	  GDENS[2][1] = dDen_Grid[1][2][MN];

	  if (PCC_switch==1) {
	    ED[0] += PCCDensity_Grid[MN];
	    ED[1] += PCCDensity_Grid[MN];
	  }

	  XC_PBE(ED, GDENS, Exc, DEXDD, DECDD, DEXDGD, DECDGD);

	  /* XC energy density */
	  if      (XC_P_switch==0){
	    Vxc_Grid[0][MN] = Exc[0] + Exc[1];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1];
	  }

	  /* XC potential */
	  else if (XC_P_switch==1){
	    Vxc_Grid[0][MN] = DEXDD[0] + DECDD[0];
	    Vxc_Grid[1][MN] = DEXDD[1] + DECDD[1];
	  }

	  /* XC energy density - XC potential */
	  else if (XC_P_switch==2){
	    Vxc_Grid[0][MN] = Exc[0] + Exc[1] - DEXDD[0] - DECDD[0];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1] - DEXDD[1] - DECDD[1];
	  }

	  /* later add its derivatives */
	  if (XC_P_switch!=0){
	    dEXC_dGD[0][0][MN] = DEXDGD[0][0] + DECDGD[0][0];
	    dEXC_dGD[0][1][MN] = DEXDGD[1][0] + DECDGD[1][0];
	    dEXC_dGD[0][2][MN] = DEXDGD[2][0] + DECDGD[2][0];

	    dEXC_dGD[1][0][MN] = DEXDGD[0][1] + DECDGD[0][1];
	    dEXC_dGD[1][1][MN] = DEXDGD[1][1] + DECDGD[1][1];
	    dEXC_dGD[1][2][MN] = DEXDGD[2][1] + DECDGD[2][1];
	  }
	}

	break;

      } /* switch(XC_switch) */
    }   /* MN */

#pragma omp flush(dEXC_dGD)

  } /* #pragma omp parallel */

  /****************************************************
        calculate the second part of XC potential
               when GGA and XC_P_switch!=0
  ****************************************************/

  if (XC_switch==4 && XC_P_switch!=0){

#pragma omp parallel shared(XC_P_switch,Vxc_Grid,igtv,dEXC_dGD,Density_Grid,den_min,My_Cell0,My_Cell1,Num_Cells0,Ngrid2,Ngrid3) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_x_a,up_y_a,up_z_a,dn_x_a,dn_y_a,dn_z_a,up_x_b,up_y_b,up_z_b,dn_x_b,dn_y_b,dn_z_b,up_x_c,up_y_c,up_z_c,dn_x_c,dn_y_c,dn_z_c,tmp0,tmp1)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();
      nmax = Num_Cells0*Ngrid2*Ngrid3; 

      for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){

	i = n/(Ngrid2*Ngrid3);
	j = (n-i*Ngrid2*Ngrid3)/Ngrid3;
	k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; 
	ri = My_Cell1[i];

	/* find ri1, ri2, i1, and i2 */

	if (ri==0){
	  ri1 = Ngrid1 - 1;
	  ri2 = 1;        
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}
	else if (ri==(Ngrid1-1)){
	  ri1 = Ngrid1 - 2;
	  ri2 = 0;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}      
	else{
	  ri1 = ri - 1;
	  ri2 = ri + 1;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}

	if (i1!=-1 && i2!=-1){

	  /* find j1 and j2 */

	  if (j==0){
	    j1 = Ngrid2 - 1;
	    j2 = 1;
	  }
	  else if (j==(Ngrid2-1)){
	    j1 = Ngrid2 - 2;
	    j2 = 0;
	  }
	  else{
	    j1 = j - 1;
	    j2 = j + 1;
	  }

	  /* find k1 and k2 */

	  if (k==0){
	    k1 = Ngrid3 - 1;
	    k2 = 1;
	  }
	  else if (k==(Ngrid3-1)){
	    k1 = Ngrid3 - 2;
	    k2 = 0;
	  }
	  else{
	    k1 = k - 1;
	    k2 = k + 1;
	  }  

	  /* set MN */

	  MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; 

	  /* set Vxc_Grid */

	  if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){

	    /* a-axis */

	    MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k;
	    MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k;

	    up_x_a = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1];
	    up_y_a = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1];
	    up_z_a = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1];

	    dn_x_a = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1];
	    dn_y_a = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1];
	    dn_z_a = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1];

	    /* b-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; 
	    MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; 

	    up_x_b = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1];
	    up_y_b = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1];
	    up_z_b = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1];

	    dn_x_b = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1];
	    dn_y_b = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1];
	    dn_z_b = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1];

	    /* c-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; 
	    MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; 

	    up_x_c = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1];
	    up_y_c = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1];
	    up_z_c = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1];

	    dn_x_c = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1];
	    dn_y_c = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1];
	    dn_z_c = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1];

	    /* up */

	    tmp0 = igtv[1][1]*up_x_a + igtv[1][2]*up_x_b + igtv[1][3]*up_x_c
	      + igtv[2][1]*up_y_a + igtv[2][2]*up_y_b + igtv[2][3]*up_y_c
	      + igtv[3][1]*up_z_a + igtv[3][2]*up_z_b + igtv[3][3]*up_z_c;
	    tmp0 = 0.5*tmp0;

	    /* down */

	    tmp1 = igtv[1][1]*dn_x_a + igtv[1][2]*dn_x_b + igtv[1][3]*dn_x_c
	      + igtv[2][1]*dn_y_a + igtv[2][2]*dn_y_b + igtv[2][3]*dn_y_c
	      + igtv[3][1]*dn_z_a + igtv[3][2]*dn_z_b + igtv[3][3]*dn_z_c;
	    tmp1 = 0.5*tmp1;

	    /* XC potential */

	    if (XC_P_switch==1){
	      Vxc_Grid[0][MN] -= tmp0; 
	      Vxc_Grid[1][MN] -= tmp1;
	    }

	    /* XC energy density - XC potential */

	    else if (XC_P_switch==2){
	      Vxc_Grid[0][MN] += tmp0; 
	      Vxc_Grid[1][MN] += tmp1;
	    }

	  }
	}
      }

#pragma omp flush(Vxc_Grid)

    } /* #pragma omp parallel */
  } /* if (XC_switch==4 && XC_P_switch!=0) */

  /****************************************************
            In case of non-collinear spin DFT 
  ****************************************************/

  if (SpinP_switch==3 && XC_P_switch!=2){

#pragma omp parallel shared(Density_Grid,Vxc_Grid,My_NumGrid1) private(OMPID,Nthrds,Nprocs,MN,tmp0,tmp1,theta,phi,sit,cot,sip,cop)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();

      for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){

	tmp0 = 0.5*(Vxc_Grid[0][MN] + Vxc_Grid[1][MN]);
	tmp1 = 0.5*(Vxc_Grid[0][MN] - Vxc_Grid[1][MN]);
	theta = Density_Grid[2][MN];
	phi   = Density_Grid[3][MN];
	sit = sin(theta);
	cot = cos(theta);
	sip = sin(phi);
	cop = cos(phi);

	Vxc_Grid[0][MN] =  tmp0 + cot*tmp1;  /* Re Vxc11 */
	Vxc_Grid[1][MN] =  tmp0 - cot*tmp1;  /* Re Vxc22 */
	Vxc_Grid[2][MN] =  tmp1*sit*cop;     /* Re Vxc12 */
	Vxc_Grid[3][MN] = -tmp1*sit*sip;     /* Im Vxc12 */ 
      }

#pragma omp flush(Vxc_Grid)

    } /* #pragma omp parallel */ 
  }

  /*
  {
    int hN1,hN2,hN3,i;
    double Re11,Re22,Re12,Im12;

    hN1 = Ngrid1/2;
    hN2 = Ngrid2/2;
    hN3 = Ngrid3/2;

    for (i=0; i<Num_Cells0; i++){

    MN = i*Ngrid2*Ngrid3 + hN2*Ngrid3 + hN3;
 

    Re11 = Vxc_Grid[0][MN];
    Re22 = Vxc_Grid[1][MN];
    Re12 = Vxc_Grid[2][MN];
    Im12 = Vxc_Grid[3][MN];

    printf("MN=%4d %15.12f %15.12f %15.12f %15.12f\n",
           MN,Re11,Re22,Re12,Im12);
    }
  }


  MPI_Finalize();
  exit(0);
  */


  /****************************************************
   In case of GGA,
   free arrays
   double dEXC_dGD[2][3][My_NumGrid1]
   double dDen_Grid[2][3][My_NumGrid1]
  ****************************************************/

  if (XC_switch==4){

    for (k=0; k<=1; k++){
      for (i=0; i<3; i++){
        free(dDen_Grid[k][i]);
      }
      free(dDen_Grid[k]);
    }
    free(dDen_Grid);

    if (XC_P_switch!=0){
      for (k=0; k<=1; k++){
        for (i=0; i<3; i++){
          free(dEXC_dGD[k][i]);
        }
        free(dEXC_dGD[k]);
      }
      free(dEXC_dGD);
    }
  }
}
Example #9
0
void Overlap_Band(int Host_ID1,
                  double ****OLP,
                  dcomplex **S, int *MP,
                  double k1, double k2, double k3)
{
  static int firsttime=1;
  int i,j,k,wanA,wanB,tnoA,tnoB,Anum,Bnum;
  int NUM,MA_AN,GA_AN,LB_AN,GB_AN;
  int l1,l2,l3,Rn,n2;
  double **S1,**S2;
  double *tmp_array1,*tmp_array2;
  double kRn,si,co,s;
  int ID,myid,numprocs,tag=999;

  MPI_Status stat;
  MPI_Request request;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  MPI_Barrier(mpi_comm_level1);

  /* set MP */
  Anum = 1;
  for (i=1; i<=atomnum; i++){
    MP[i] = Anum;
    wanA = WhatSpecies[i];
    tnoA = Spe_Total_CNO[wanA];
    Anum = Anum + tnoA;
  }
  NUM = Anum - 1;

  /****************************************************
                       Allocation
  ****************************************************/

  n2 = NUM + 2;

  S1 = (double**)malloc(sizeof(double*)*n2);
  for (i=0; i<n2; i++){
    S1[i] = (double*)malloc(sizeof(double)*n2);
  }
  if (firsttime)
  PrintMemory("Overlap_Band: S1",sizeof(double)*n2*n2,NULL);

  S2 = (double**)malloc(sizeof(double*)*n2);
  for (i=0; i<n2; i++){
    S2[i] = (double*)malloc(sizeof(double)*n2);
  }
  if (firsttime)
  PrintMemory("Overlap_Band: S2",sizeof(double)*n2*n2,NULL);

  /* for PrintMemory */
  firsttime=0;

  /****************************************************
                       set overlap
  ****************************************************/

  S[0][0].r = NUM;
  for (i=1; i<=NUM; i++){
    for (j=1; j<=NUM; j++){
      S1[i][j] = 0.0;
      S2[i][j] = 0.0;
    }
  }

  for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
    GA_AN = M2G[MA_AN];    
    wanA = WhatSpecies[GA_AN];
    tnoA = Spe_Total_CNO[wanA];
    Anum = MP[GA_AN];

    for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
      GB_AN = natn[GA_AN][LB_AN];
      Rn = ncn[GA_AN][LB_AN];
      wanB = WhatSpecies[GB_AN];
      tnoB = Spe_Total_CNO[wanB];

      /*
      kRn = k1*( (double)atv_ijk[Rn][1] + Cell_Gxyz[GB_AN][1] - Cell_Gxyz[GA_AN][1] )
          + k2*( (double)atv_ijk[Rn][2] + Cell_Gxyz[GB_AN][2] - Cell_Gxyz[GA_AN][2] )
          + k3*( (double)atv_ijk[Rn][3] + Cell_Gxyz[GB_AN][3] - Cell_Gxyz[GA_AN][3] );
      */

      l1 = atv_ijk[Rn][1];
      l2 = atv_ijk[Rn][2];
      l3 = atv_ijk[Rn][3];
      kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;

      si = sin(2.0*PI*kRn);
      co = cos(2.0*PI*kRn);
      Bnum = MP[GB_AN];
      for (i=0; i<tnoA; i++){
	for (j=0; j<tnoB; j++){
	  s = OLP[MA_AN][LB_AN][i][j];
	  S1[Anum+i][Bnum+j] += s*co;
	  S2[Anum+i][Bnum+j] += s*si;
	}
      }
    }
  }

  /******************************************************
    MPI: S1 and S2
  ******************************************************/

  tmp_array1 = (double*)malloc(sizeof(double)*2*n2*List_YOUSO[7]);
  tmp_array2 = (double*)malloc(sizeof(double)*2*n2*List_YOUSO[7]);

  /* S1 and S2 */

  if (myid!=Host_ID1){
    for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
      GA_AN = M2G[MA_AN];    
      wanA = WhatSpecies[GA_AN];
      tnoA = Spe_Total_CNO[wanA];
      Anum = MP[GA_AN];

      k = 0;  
      for (i=0; i<tnoA; i++){
        for (j=0; j<n2; j++){
          tmp_array1[k] = S1[Anum+i][j];          
	  k++; 
	}
      }
      for (i=0; i<tnoA; i++){
        for (j=0; j<n2; j++){
          tmp_array1[k] = S2[Anum+i][j];          
	  k++; 
	}
      }

      tag = 999;
      MPI_Isend(&tmp_array1[0], 2*tnoA*n2, MPI_DOUBLE, Host_ID1,
                tag, mpi_comm_level1, &request);
      MPI_Wait(&request,&stat);
    }
  }
  else{
    for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
      wanA = WhatSpecies[GA_AN];
      tnoA = Spe_Total_CNO[wanA];
      Anum = MP[GA_AN];
      ID = G2ID[GA_AN];
      if (ID!=Host_ID1){

        tag = 999;
        MPI_Recv(&tmp_array2[0], 2*tnoA*n2, MPI_DOUBLE, ID, tag, mpi_comm_level1, &stat);

	k = 0;
        for (i=0; i<tnoA; i++){
          for (j=0; j<n2; j++){
            S1[Anum+i][j] = tmp_array2[k];
	    k++;
	  }
	}
        for (i=0; i<tnoA; i++){
          for (j=0; j<n2; j++){
            S2[Anum+i][j] = tmp_array2[k];
	    k++;
	  }
	}

      }    
    }
  }

  free(tmp_array1);
  free(tmp_array2);

  /******************************************************
    Make the full complex matrix of S
  ******************************************************/

  for (i=1; i<=NUM; i++){
    for (j=1; j<=NUM; j++){
      S[i][j].r = S1[i][j];
      S[i][j].i = S2[i][j];
    }
  }

  /****************************************************
                       free arrays
  ****************************************************/

  for (i=0; i<n2; i++){
    free(S2[i]);
  }
  free(S2);
 
  for (i=0; i<n2; i++){
    free(S1[i]);
  }
  free(S1);

}
Example #10
0
double Set_Aden_Grid(int init_density)
{
  /****************************************************
          Densities by the atomic superposition
                   densities on grids
  ****************************************************/

  static int firsttime=1;
  int i,k,MN,ct_AN,Gc_AN,Mc_AN,top_num;
  int Rn,Cwan,Nc,GNc,GRc,n,size1,size2;
  int My_Max,Max_Size;
  int size_AtomDen_Grid;
  double time0;
  double x,y,z,DenA,DenPCC,dDenA,dDenPCC;
  double tmp0,dx,dy,dz,r,rmin=10e-14;
  double Nele,Nu,Nd,M,ocupcy_u,ocupcy_d;
  double rho,mag,magx,magy,magz,theta,phi;
  double TStime,TEtime;
  double Cxyz[4];
  double S_coordinate[3];
  double *tmp_array;
  double *tmp_array2;
  double **AtomDen_Grid;
  double **PCCDen_Grid;
  double *AtomDen2_Grid;
  double *PCCDen2_Grid;
  int *Snd_Size,*Rcv_Size;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  double Stime_atom, Etime_atom;
  double Nup,Ndown,sit,cot,sip,cop;
  dcomplex U[2][2];

  MPI_Status stat;
  MPI_Request request;

  /* for OpenMP */
  int OMPID,Nthrds,Nprocs;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  
  dtime(&TStime);

  /****************************************************
    allocation of arrays:

    int Snd_Size[numprocs]
    int Rcv_Size[numprocs]

    double AtomDen_Grid[Matomnum+MatomnumF+1]
                       [GridN_Atom[Gc_AN]]

    double PCCDen_Grid[Matomnum+MatomnumF+1]
                       [GridN_Atom[Gc_AN]]
  ****************************************************/

  Snd_Size = (int*)malloc(sizeof(int)*numprocs); 
  Rcv_Size = (int*)malloc(sizeof(int)*numprocs); 

  size_AtomDen_Grid = Matomnum+MatomnumF+1;
  AtomDen_Grid = (double**)malloc(sizeof(double*)*(Matomnum+MatomnumF+1)); 
  AtomDen_Grid[0] = (double*)malloc(sizeof(double)*1); 
  for (Mc_AN=1; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
    Gc_AN = F_M2G[Mc_AN];
    AtomDen_Grid[Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]);
    size_AtomDen_Grid += GridN_Atom[Gc_AN];
  }

  PCCDen_Grid = (double**)malloc(sizeof(double*)*(Matomnum+MatomnumF+1)); 
  PCCDen_Grid[0] = (double*)malloc(sizeof(double)*1); 
  for (Mc_AN=1; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
    Gc_AN = F_M2G[Mc_AN];
    PCCDen_Grid[Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]); 
  }

  /* PrintMemory */

  if (firsttime==1){
    PrintMemory("Set_Aden_Grid: AtomDen_Grid", sizeof(double)*size_AtomDen_Grid, NULL);
    PrintMemory("Set_Aden_Grid: PCCDen_Grid",  sizeof(double)*size_AtomDen_Grid, NULL);
    firsttime = 0;
  }

  /******************************************************
                 setting of AtomDen_Grid
  ******************************************************/
 
  /**************************************
   for spin non-collinear
   1. set rho, mx, my, mz
   2. calculate theta and phi 
   3. n_up = (rho+m)/2 
      n_dn = (rho-m)/2 
  **************************************/

  if (SpinP_switch==3){
    for (MN=0; MN<My_NumGrid1; MN++){
      Density_Grid[0][MN] = 0.0;
      Density_Grid[1][MN] = 0.0;
      Density_Grid[2][MN] = 0.0;
      Density_Grid[3][MN] = 0.0;
      ADensity_Grid[MN]   = 0.0;
    }
  }

  /* spin collinear */
  else{ 
    for (MN=0; MN<My_NumGrid1; MN++){
      Density_Grid[0][MN] = 0.0;
      Density_Grid[1][MN] = 0.0;
      ADensity_Grid[MN]   = 0.0;
    }
  }  

  /* PCC */
  if (PCC_switch==1) {
    for (MN=0; MN<My_NumGrid1; MN++){
      PCCDensity_Grid[MN] = 0.0;
    }
  }

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    dtime(&Stime_atom);

    Gc_AN = M2G[Mc_AN];    
    Cwan = WhatSpecies[Gc_AN];
 
#pragma omp parallel shared(PCCDen_Grid,PCC_switch,AtomDen_Grid,Cwan,Gxyz,atv,Mc_AN,CellListAtom,GridListAtom,GridN_Atom,Gc_AN) private(OMPID,Nthrds,Nprocs,Nc,GNc,GRc,Cxyz,dx,dy,dz,r,DenA)
    {

      /* get info. on OpenMP */ 

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();

      for (Nc=OMPID*GridN_Atom[Gc_AN]/Nthrds; Nc<(OMPID+1)*GridN_Atom[Gc_AN]/Nthrds; Nc++){
      
	GNc = GridListAtom[Mc_AN][Nc];
	GRc = CellListAtom[Mc_AN][Nc];
      
	Get_Grid_XYZ(GNc,Cxyz);
	dx = Cxyz[1] + atv[GRc][1] - Gxyz[Gc_AN][1];
	dy = Cxyz[2] + atv[GRc][2] - Gxyz[Gc_AN][2];
	dz = Cxyz[3] + atv[GRc][3] - Gxyz[Gc_AN][3];
      
	r = sqrt(dx*dx + dy*dy + dz*dz); 

	/* AtomicDenF */
	DenA = AtomicDenF(Cwan,r);
	AtomDen_Grid[Mc_AN][Nc] = DenA;
      
	/*  partial core correction */
	if (PCC_switch==1) {
	  PCCDen_Grid[Mc_AN][Nc] = AtomicPCCF(Cwan,r);
	}
      }

    } /* #pragma omp parallel */

    dtime(&Etime_atom);
    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
  }

  /******************************************************
   MPI:
        AtomDen_Grid
  ******************************************************/

  /* find data size for sending and recieving */

  tag = 999;
  My_Max = -10000;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){
      /*  sending size */
      if (F_Snd_Num[IDS]!=0){
        /* find data size  */
        size1 = 0; 
        for (n=0; n<F_Snd_Num[IDS]; n++){
          Gc_AN = Snd_GAN[IDS][n];
          size1 += GridN_Atom[Gc_AN];
	}

        Snd_Size[IDS] = size1;
        MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
      }
      else{
        Snd_Size[IDS] = 0;
      }

      /*  receiving size */
      if (F_Rcv_Num[IDR]!=0){
        MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
        Rcv_Size[IDR] = size2;
      }
      else{
        Rcv_Size[IDR] = 0;
      }

      if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);

    } 
    else{
      Snd_Size[IDS] = 0;
      Rcv_Size[IDR] = 0;
    }

    if (My_Max<Snd_Size[IDS]) My_Max = Snd_Size[IDS];
    if (My_Max<Rcv_Size[IDR]) My_Max = Rcv_Size[IDR];
  }  

  MPI_Allreduce(&My_Max, &Max_Size, 1, MPI_INT, MPI_MAX, mpi_comm_level1);
  tmp_array  = (double*)malloc(sizeof(double)*Max_Size);
  tmp_array2 = (double*)malloc(sizeof(double)*Max_Size);

  /* send and recieve AtomDen_Grid */

  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){

      /*****************************
              sending of data 
      *****************************/

      if (F_Snd_Num[IDS]!=0){

        /* find data size  */
        size1 = Snd_Size[IDS];

        /* multidimentional array to vector array */
        k = 0; 
        for (n=0; n<F_Snd_Num[IDS]; n++){
          Mc_AN = Snd_MAN[IDS][n];
          Gc_AN = Snd_GAN[IDS][n];

          for (i=0; i<GridN_Atom[Gc_AN]; i++){
            tmp_array[k] = AtomDen_Grid[Mc_AN][i];
            k++;
          }          
	} 

        /* MPI_Isend */
        MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
      }

      /*****************************
         receiving of block data
      *****************************/

      if (F_Rcv_Num[IDR]!=0){

        /* find data size */
        size2 = Rcv_Size[IDR]; 

        /* MPI_Recv */
        MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);

        k = 0;
        Mc_AN = F_TopMAN[IDR] - 1;
        for (n=0; n<F_Rcv_Num[IDR]; n++){
          Mc_AN++;
          Gc_AN = Rcv_GAN[IDR][n];

          for (i=0; i<GridN_Atom[Gc_AN]; i++){
            AtomDen_Grid[Mc_AN][i] = tmp_array2[k];
            k++;
          }
        }
      }
      if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
    } 
  }  

  /******************************************************
   MPI:
         PCCDen_Grid
  ******************************************************/

  /* send and receive PCCDen_Grid */

  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){

      /*****************************
              sending of data 
      *****************************/

      if (F_Snd_Num[IDS]!=0){

        /* find data size  */
        size1 = Snd_Size[IDS];

        /* multidimentional array to vector array */
        k = 0; 
        for (n=0; n<F_Snd_Num[IDS]; n++){
          Mc_AN = Snd_MAN[IDS][n];
          Gc_AN = Snd_GAN[IDS][n];

          for (i=0; i<GridN_Atom[Gc_AN]; i++){
            tmp_array[k] = PCCDen_Grid[Mc_AN][i];
            k++;
          }
	} 

        /* MPI_Isend */
        MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
      }

      /*****************************
         receiving of block data
      *****************************/

      if (F_Rcv_Num[IDR]!=0){

        /* find data size */
        size2 = Rcv_Size[IDR]; 

        /* MPI_Recv */
        MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);

        k = 0;
        Mc_AN = F_TopMAN[IDR] - 1;
        for (n=0; n<F_Rcv_Num[IDR]; n++){
          Mc_AN++;
          Gc_AN = Rcv_GAN[IDR][n];

          for (i=0; i<GridN_Atom[Gc_AN]; i++){
            PCCDen_Grid[Mc_AN][i] = tmp_array2[k];
            k++;
          }          
        }
      }
      if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
    } 
  }  

  /****************************************************
    freeing of arrays:

     tmp_array
     tmp_array2
  ****************************************************/

  free(tmp_array);
  free(tmp_array2);

  /******************************************************
            superposition of atomic densities
  ******************************************************/

  for (Mc_AN=1; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){

    dtime(&Stime_atom);

    Gc_AN = F_M2G[Mc_AN];
    Cwan = WhatSpecies[Gc_AN];
    Nele = fabs(InitN_USpin[Gc_AN] + InitN_DSpin[Gc_AN]);
    Nu = InitN_USpin[Gc_AN];
    Nd = InitN_DSpin[Gc_AN];

    if (1.0e-15<Nele){
      ocupcy_u = Nu/Nele;
      ocupcy_d = Nd/Nele;
    }
    else{
      ocupcy_u = 0.0;
      ocupcy_d = 0.0;
    }
 
    if (2<=level_stdout){
      printf("  Mc_AN=%3d Gc_AN=%3d ocupcy_u=%15.12f ocupcy_d=%15.12f\n",
                Mc_AN,Gc_AN,ocupcy_u,ocupcy_d);
    }

    for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
      MN = MGridListAtom[Mc_AN][Nc];
      if (0<=MN){ 

        DenA = AtomDen_Grid[Mc_AN][Nc];

        /* spin collinear */
        if ( init_density==1 && SpinP_switch==1 ){
          Density_Grid[0][MN] += ocupcy_u*DenA;
          Density_Grid[1][MN] += ocupcy_d*DenA;
        } 

        /* spin non-collinear */
        else if ( init_density==1 && SpinP_switch==3 ){

          theta = Angle0_Spin[Gc_AN];
          phi   = Angle1_Spin[Gc_AN];
          
          rho = DenA;
          mag = (ocupcy_u - ocupcy_d)*DenA;             
          magx = mag*sin(theta)*cos(phi);
          magy = mag*sin(theta)*sin(phi);
          magz = mag*cos(theta);

          Density_Grid[0][MN] += rho;
          Density_Grid[1][MN] += magx;
          Density_Grid[2][MN] += magy;
          Density_Grid[3][MN] += magz;
        } 

        else if (init_density==1){
          Density_Grid[0][MN] += 0.5*DenA;
        }
        else{
          ADensity_Grid[MN] += DenA;
        }

        /*  partial core correction  */
        if (PCC_switch==1) {
          DenPCC = PCCDen_Grid[Mc_AN][Nc];
          /* later add this in Set_XC_Grid */
          PCCDensity_Grid[MN] += 0.5*DenPCC;
        }
      }
    }

    dtime(&Etime_atom);
    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
  }

  /******************************************************
    atomic densities (AtomDen2_Grid) in terms of FNAN2 
  ******************************************************/

  AtomDen2_Grid = (double*)malloc(sizeof(double)*FNAN2_Grid);

  /* MPI */
  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (ID!=0){

      /*****************************
              sending of data 
      *****************************/

      if (Num_Snd_FNAN2_Grid[IDS]!=0){

        tmp_array = (double*)malloc(sizeof(double)*Num_Snd_FNAN2_Grid[IDS]);

        /* vector array */
        for (i=0; i<Num_Snd_FNAN2_Grid[IDS]; i++){
          Gc_AN = Snd_FNAN2_At[IDS][i];
          Mc_AN = F_G2M[Gc_AN];
          Nc    = Snd_FNAN2_Nc[IDS][i];
          tmp_array[i] = AtomDen_Grid[Mc_AN][Nc];
        }

        /* MPI_Isend */
        MPI_Isend(&tmp_array[0], Num_Snd_FNAN2_Grid[IDS], MPI_DOUBLE,
                  IDS, tag, mpi_comm_level1, &request);
      }

      /*****************************
            receiving of data
      *****************************/

      if (Num_Rcv_FNAN2_Grid[IDR]!=0){
        top_num = TopMAN2_Grid[IDR];
        /* MPI_Recv */
        MPI_Recv(&AtomDen2_Grid[top_num], Num_Rcv_FNAN2_Grid[IDR], MPI_DOUBLE,
                  IDR, tag, mpi_comm_level1, &stat);
      }

      if (Num_Snd_FNAN2_Grid[IDS]!=0){
         MPI_Wait(&request,&stat);
         free(tmp_array);
      }

    }
  }

  /* Density_Grid += AtomDen2_Grid */

  for (i=0; i<FNAN2_Grid; i++){
    DenA = AtomDen2_Grid[i];
    MN    = Rcv_FNAN2_MN[i];
    Gc_AN = Rcv_FNAN2_GA[i];
    Nele = InitN_USpin[Gc_AN] + InitN_DSpin[Gc_AN];
    Nu = InitN_USpin[Gc_AN];
    Nd = InitN_DSpin[Gc_AN];

    if (1.0e-13<Nele){
      ocupcy_u = Nu/Nele;
      ocupcy_d = Nd/Nele;
    }
    else{
      ocupcy_u = 0.0;
      ocupcy_d = 0.0;
    }

    /* spin collinear */
    if ( init_density==1 && SpinP_switch==1 ){
      Density_Grid[0][MN] += ocupcy_u*DenA;
      Density_Grid[1][MN] += ocupcy_d*DenA;
    } 

    /* spin non-collinear */
    else if ( init_density==1 && SpinP_switch==3 ){

      theta = Angle0_Spin[Gc_AN];
      phi   = Angle1_Spin[Gc_AN];

      rho = DenA;
      mag = (ocupcy_u - ocupcy_d)*DenA;
      magx = mag*sin(theta)*cos(phi);
      magy = mag*sin(theta)*sin(phi);
      magz = mag*cos(theta);

      Density_Grid[0][MN] += rho;
      Density_Grid[1][MN] += magx;
      Density_Grid[2][MN] += magy;
      Density_Grid[3][MN] += magz;
    } 

    else if (init_density==1){
      Density_Grid[0][MN] += 0.5*DenA;
    }
  }

  /******************************************************
    pcc densities (PCCDen2_Grid) in terms of FNAN2 
  ******************************************************/

  PCCDen2_Grid = (double*)malloc(sizeof(double)*FNAN2_Grid);

  if (PCC_switch==1) {

    /* MPI */
    tag = 999;
    for (ID=0; ID<numprocs; ID++){

      IDS = (myid + ID) % numprocs;
      IDR = (myid - ID + numprocs) % numprocs;

      if (ID!=0){

        /*****************************
                sending of data 
        *****************************/

        if (Num_Snd_FNAN2_Grid[IDS]!=0){

          tmp_array = (double*)malloc(sizeof(double)*Num_Snd_FNAN2_Grid[IDS]);

          /* vector array */
          for (i=0; i<Num_Snd_FNAN2_Grid[IDS]; i++){
            Gc_AN = Snd_FNAN2_At[IDS][i];
            Mc_AN = F_G2M[Gc_AN];
            Nc    = Snd_FNAN2_Nc[IDS][i];
            tmp_array[i] = PCCDen_Grid[Mc_AN][Nc];
          }

          /* MPI_Isend */
          MPI_Isend(&tmp_array[0], Num_Snd_FNAN2_Grid[IDS], MPI_DOUBLE,
                    IDS, tag, mpi_comm_level1, &request);
        }

        /*****************************
              receiving of data
        *****************************/

        if (Num_Rcv_FNAN2_Grid[IDR]!=0){
          top_num = TopMAN2_Grid[IDR];
          /* MPI_Recv */
          MPI_Recv(&PCCDen2_Grid[top_num], Num_Rcv_FNAN2_Grid[IDR], MPI_DOUBLE,
                    IDR, tag, mpi_comm_level1, &stat);
        }

        if (Num_Snd_FNAN2_Grid[IDS]!=0){
           MPI_Wait(&request,&stat);
           free(tmp_array);
        }

      }
    }

    /* PCCDensity_Grid += PCCDen2_Grid */
    for (i=0; i<FNAN2_Grid; i++){
      MN = Rcv_FNAN2_MN[i];
      PCCDensity_Grid[MN] += 0.5*PCCDen2_Grid[i];
    }
  }

  /****************************************************
     initialize diagonal and off-diagonal densities
           in case of spin non-collinear DFT
  ****************************************************/

  if (init_density==1 && SpinP_switch==3){
    for (MN=0; MN<My_NumGrid1; MN++){

      rho  = Density_Grid[0][MN];
      magx = Density_Grid[1][MN];
      magy = Density_Grid[2][MN];
      magz = Density_Grid[3][MN];

      Density_Grid[0][MN] = 0.5*(rho + magz);
      Density_Grid[1][MN] = 0.5*(rho - magz);
      Density_Grid[2][MN] = 0.5*magx;
      Density_Grid[3][MN] =-0.5*magy;
    }
  }

  /******************************************************
               Density_Grid to ADensity_Grid
  ******************************************************/

  if ( init_density==1 && (SpinP_switch==1 || SpinP_switch==3) ){
    for (MN=0; MN<My_NumGrid1; MN++){
      ADensity_Grid[MN] = Density_Grid[0][MN] + Density_Grid[1][MN];
    }
  }
  else if (init_density==1){
    for (MN=0; MN<My_NumGrid1; MN++){
      ADensity_Grid[MN] = 2.0*Density_Grid[0][MN];
    }
  } 

  /****************************************************
            in case of non-spin polarization
                up-spin to down-spin
  ****************************************************/
  
  if (init_density==1 && SpinP_switch==0){
  
    for (MN=0; MN<My_NumGrid1; MN++){
      Density_Grid[1][MN] = Density_Grid[0][MN];
    }
  }

  /****************************************************
    freeing of arrays:

     Snd_Size
     Rcv_Size
     AtomDen_Grid
     PCCDen_Grid
  ****************************************************/

  free(Snd_Size);
  free(Rcv_Size);

  for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
    free(AtomDen_Grid[Mc_AN]);
  }
  free(AtomDen_Grid);

  for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
    free(PCCDen_Grid[Mc_AN]);
  }
  free(PCCDen_Grid);

  free(AtomDen2_Grid);
  free(PCCDen2_Grid);

  /* elapsed time */
  dtime(&TEtime);
  time0 = TEtime - TStime;
  return time0;
}
Example #11
0
void Make_VNA_Grid()
{
  static int firsttime=1;
  unsigned long long int n2D,N2D,GNc,GN;
  int i,Mc_AN,Gc_AN,BN,CN,LN,GRc,N3[4];
  int AN,Nc,MN,Cwan,NN_S,NN_R;
  int size_AtomVNA_Grid;
  int size_AtomVNA_Snd_Grid_A2B;
  int size_AtomVNA_Rcv_Grid_A2B;
  double Cxyz[4];
  double r,dx,dy,dz;
  double **AtomVNA_Grid;
  double **AtomVNA_Snd_Grid_A2B;
  double **AtomVNA_Rcv_Grid_A2B;
  double Stime_atom, Etime_atom;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  int OMPID,Nthrds,Nprocs;
  
  MPI_Status stat;
  MPI_Request request;
  MPI_Status *stat_send;
  MPI_Status *stat_recv;
  MPI_Request *request_send;
  MPI_Request *request_recv;
  
  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  
  /* allocation of arrays */
  
  size_AtomVNA_Grid = 1;
  AtomVNA_Grid = (double**)malloc(sizeof(double*)*(Matomnum+1)); 
  AtomVNA_Grid[0] = (double*)malloc(sizeof(double)*1); 
  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
    Gc_AN = F_M2G[Mc_AN];
    AtomVNA_Grid[Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]);
    size_AtomVNA_Grid += GridN_Atom[Gc_AN];
  }
  
  size_AtomVNA_Snd_Grid_A2B = 0; 
  AtomVNA_Snd_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
  for (ID=0; ID<numprocs; ID++){
    AtomVNA_Snd_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Snd_Grid_A2B[ID]);
    size_AtomVNA_Snd_Grid_A2B += Num_Snd_Grid_A2B[ID];
  }  
  
  size_AtomVNA_Rcv_Grid_A2B = 0;   
  AtomVNA_Rcv_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
  for (ID=0; ID<numprocs; ID++){
    AtomVNA_Rcv_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Rcv_Grid_A2B[ID]);
    size_AtomVNA_Rcv_Grid_A2B += Num_Rcv_Grid_A2B[ID];   
  }

  /* PrintMemory */
  if (firsttime) {
    PrintMemory("Set_Vpot: AtomVNA_Grid",sizeof(double)*size_AtomVNA_Grid,NULL);
    PrintMemory("Set_Vpot: AtomVNA_Snd_Grid_A2B",sizeof(double)*size_AtomVNA_Snd_Grid_A2B,NULL);
    PrintMemory("Set_Vpot: AtomVNA_Rcv_Grid_A2B",sizeof(double)*size_AtomVNA_Rcv_Grid_A2B,NULL);
  }

  /* calculation of AtomVNA_Grid */

  for (MN=0; MN<My_NumGridC; MN++) VNA_Grid[MN] = 0.0;

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    dtime(&Stime_atom);

    Gc_AN = M2G[Mc_AN];    
    Cwan = WhatSpecies[Gc_AN];

#pragma omp parallel shared(AtomVNA_Grid,GridN_Atom,atv,Gxyz,Gc_AN,Cwan,Mc_AN,GridListAtom,CellListAtom) private(OMPID,Nthrds,Nprocs,Nc,GNc,GRc,Cxyz,dx,dy,dz,r)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();

      for (Nc=OMPID*GridN_Atom[Gc_AN]/Nthrds; Nc<(OMPID+1)*GridN_Atom[Gc_AN]/Nthrds; Nc++){

	GNc = GridListAtom[Mc_AN][Nc];
	GRc = CellListAtom[Mc_AN][Nc];

	Get_Grid_XYZ(GNc,Cxyz);
	dx = Cxyz[1] + atv[GRc][1] - Gxyz[Gc_AN][1];
	dy = Cxyz[2] + atv[GRc][2] - Gxyz[Gc_AN][2];
	dz = Cxyz[3] + atv[GRc][3] - Gxyz[Gc_AN][3];

	r = sqrt(dx*dx + dy*dy + dz*dz);
	AtomVNA_Grid[Mc_AN][Nc] = VNAF(Cwan,r);
      }

#pragma omp flush(AtomVNA_Grid)

    } /* #pragma omp parallel */

    dtime(&Etime_atom);
    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;

  } /* Mc_AN */

  /******************************************************
    MPI communication from the partitions A to B 
  ******************************************************/
  
  /* copy AtomVNA_Grid to AtomVNA_Snd_Grid_A2B */

  for (ID=0; ID<numprocs; ID++) Num_Snd_Grid_A2B[ID] = 0;
  
  N2D = Ngrid1*Ngrid2;

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];

    for (AN=0; AN<GridN_Atom[Gc_AN]; AN++){

      GN = GridListAtom[Mc_AN][AN];
      GN2N(GN,N3);
      n2D = N3[1]*Ngrid2 + N3[2];
      ID = (int)(n2D*(unsigned long long int)numprocs/N2D);
      AtomVNA_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]] = AtomVNA_Grid[Mc_AN][AN];

      Num_Snd_Grid_A2B[ID]++;
    }
  }    

  /* MPI: A to B */  

  request_send = malloc(sizeof(MPI_Request)*NN_A2B_S);
  request_recv = malloc(sizeof(MPI_Request)*NN_A2B_R);
  stat_send = malloc(sizeof(MPI_Status)*NN_A2B_S);
  stat_recv = malloc(sizeof(MPI_Status)*NN_A2B_R);

  NN_S = 0;
  NN_R = 0;

  tag = 999;
  for (ID=1; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (Num_Snd_Grid_A2B[IDS]!=0){
      MPI_Isend(&AtomVNA_Snd_Grid_A2B[IDS][0], Num_Snd_Grid_A2B[IDS], MPI_DOUBLE,
		IDS, tag, mpi_comm_level1, &request_send[NN_S]);
      NN_S++;
    }

    if (Num_Rcv_Grid_A2B[IDR]!=0){
      MPI_Irecv( &AtomVNA_Rcv_Grid_A2B[IDR][0], Num_Rcv_Grid_A2B[IDR],
  	         MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
      NN_R++;
    }
  }

  if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
  if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);

  free(request_send);
  free(request_recv);
  free(stat_send);
  free(stat_recv);

  /* for myid */
  for (i=0; i<Num_Rcv_Grid_A2B[myid]; i++){
    AtomVNA_Rcv_Grid_A2B[myid][i] = AtomVNA_Snd_Grid_A2B[myid][i];
  }

  /******************************************************
           superposition of VNA in the partition B
  ******************************************************/

  /* initialize VNA_Grid_B */

  for (BN=0; BN<My_NumGridB_AB; BN++) VNA_Grid_B[BN] = 0.0;

  /* superposition of VNA */

  for (ID=0; ID<numprocs; ID++){
    for (LN=0; LN<Num_Rcv_Grid_A2B[ID]; LN++){

      BN = Index_Rcv_Grid_A2B[ID][3*LN+0];      
      VNA_Grid_B[BN] += AtomVNA_Rcv_Grid_A2B[ID][LN];

    } /* LN */ 
  } /* ID */  

  /******************************************************
           MPI: from the partitions B to C
  ******************************************************/

  Data_Grid_Copy_B2C_1( VNA_Grid_B, VNA_Grid );

  /* freeing of arrays */

  for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
    free(AtomVNA_Grid[Mc_AN]);
  }
  free(AtomVNA_Grid);

  for (ID=0; ID<numprocs; ID++){
    free(AtomVNA_Snd_Grid_A2B[ID]);
  }  
  free(AtomVNA_Snd_Grid_A2B);

  for (ID=0; ID<numprocs; ID++){
    free(AtomVNA_Rcv_Grid_A2B[ID]);
  }
  free(AtomVNA_Rcv_Grid_A2B);
}
int main(void){
	ListOfProcesses *LReady;
	ListOfProcesses *LBlock;
	ListOfProcesses *LFinished;
	Memory *Mmemory;
	FILE *pEntry;
	FILE *pOut;
	int i;
	int counter = 0;
	int retRunning;
	int TamReady = 0;

	tempoTotal = 0;

	pOut = fopen("OutputFIRST.txt", "w");
	if (pOut == NULL){
		printf("Erro ao tentar abrir o arquivo!\n");
		exit(1);
	}

	pEntry = fopen("teste.txt", "r");
	if (pEntry == NULL){
		printf("Erro ao tentar abrir o arquivo!\n");
		exit(1);
	}

	LReady = FileReader(pEntry, &TamReady);

	Mmemory = MemoryCreator();
	LBlock = BlockCreator(TamReady);
	LFinished = FinishedCreator(TamReady);
	Mmemory->LMemory->quantity = 0;

	for (i = 0; i < LReady->quantity; i++){
		PrintProcess(&(LReady->proc[i]), pOut);
	}

	while ((Mmemory->LMemory->quantity + LReady->quantity + LBlock->quantity) != 0){
		while ((CheckMemory(Mmemory) >= LReady->proc[0].Memory) && (LReady->proc[0].order != -1)){
			ReadyToMemory(LReady, Mmemory);
			OrganizeList(LReady);
			fprintf(pOut, "\n-----------------------Mapa de Alocação de Memória-----------------------\n");
			PrintMemory(Mmemory, pOut);
			fprintf(pOut, "-----------------------Fim Mapa de Alocação de Memória-----------------------\n");
		}
		counter++;
		retRunning = RunningProcess(Mmemory, LBlock, pOut);
		BlockToReady(LReady, LBlock);
		ProcessToEverywhere(Mmemory, LBlock, LReady, LFinished, retRunning);
		PrintMemory(Mmemory, pOut);
		fprintf(pOut, "\n-----------------------Processos na Fila de Prontos-----------------------\n");
		PrintListOfProcesses(LReady, pOut);
		fprintf(pOut, "\n-----------------------Processos em IO-----------------------\n");
		PrintListOfProcesses(LBlock, pOut);
		fprintf(pOut, "\n-----------------------Processos Já Terminados-----------------------\n");
		PrintListOfProcesses(LFinished, pOut);

		if ((Mmemory->LMemory->quantity == 0) && (LReady->quantity == 0) && (LBlock->quantity != 0)){
			for (i = 0; i < LBlock->quantity; i++){
				while (LBlock->proc[i].TimeIO[0] != 0){
					ExecutaBloqueados(LBlock);
					Mmemory->LMemory->quantity = 1;
				}
				LBlock->proc[i].order = -1;
				LBlock->quantity = LBlock->quantity - 1;
			}

		}

	}

	printf("Tempo Decorrido: %ds\n", tempoTotal);
	fprintf(pOut, "Tempo Total Decorrido: %d\n", tempoTotal);
	printf("Counter: %d\n", counter);
	fprintf(pOut, "Numero de iterações do while: %d\n", counter);

	free(LReady->proc);
	free(LBlock->proc);
	free(LFinished->proc);
	free(LReady);
	free(LBlock);
	free(LFinished);

	free(Mmemory->LMemory->proc);
	free(Mmemory->LMemory);
	free(Mmemory);

	fclose(pEntry);
	fclose(pOut);

	return 0;
}
Example #13
0
void CCommandProcessor::ParseCommand ( const char * const cmdBegin,
                                       const uint64_t currentTime )
{
  const char * const cmdEnd = SkipCharsNotInSet( cmdBegin, SPACE_AND_TAB );
  assert( cmdBegin != cmdEnd );

  const char * const paramBegin = SkipCharsInSet( cmdEnd, SPACE_AND_TAB );

  bool extraParamsFound = false;

  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_QUESTION_MARK, true, false, &extraParamsFound ) ||
       IsCmd( cmdBegin, cmdEnd, CMDNAME_HELP, false, false, &extraParamsFound ) )
  {
    PrintStr( "This console is similar to the Bus Pirate console." EOL );
    PrintStr( "Commands longer than 1 character are case insensitive." EOL );
    PrintStr( "WARNING: If a command takes too long to run, the watchdog may reset the board." EOL );
    PrintStr( "Commands are:" EOL );

    Printf( "  %s, %s: Show this help text." EOL, CMDNAME_QUESTION_MARK, CMDNAME_HELP );
    Printf( "  %s: Show version information." EOL, CMDNAME_I );
    Printf( "  %s: Test USB transfer speed." EOL, CMDNAME_USBSPEEDTEST );
    Printf( "  %s: Show JTAG pin status (read as inputs)." EOL, CMDNAME_JTAGPINS );
    Printf( "  %s: Test JTAG shift speed. WARNING: Do NOT connect any JTAG device." EOL, CMDNAME_JTAGSHIFTSPEEDTEST );
    Printf( "  %s: Exercises malloc()." EOL, CMDNAME_MALLOCTEST );
    Printf( "  %s: Exercises C++ exceptions." EOL, CMDNAME_CPP_EXCEPTION_TEST );
    Printf( "  %s: Shows memory usage." EOL, CMDNAME_MEMORY_USAGE );
    Printf( "  %s" EOL, CMDNAME_CPU_LOAD );
    Printf( "  %s" EOL, CMDNAME_UPTIME );
    Printf( "  %s" EOL, CMDNAME_RESET );
    Printf( "  %s" EOL, CMDNAME_RESET_CAUSE );
    Printf( "  %s <addr> <byte count>" EOL, CMDNAME_PRINT_MEMORY );
    Printf( "  %s <milliseconds>" EOL, CMDNAME_BUSY_WAIT );
    Printf( "  %s <command|protocol>" EOL, CMDNAME_SIMULATE_ERROR );

    return;
  }

  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_I, true, false, &extraParamsFound ) )
  {
    #ifndef NDEBUG
      const char buildType[] = "Debug build";
    #else
      const char buildType[] = "Release build";
    #endif

    Printf( "JtagDue %s" EOL, PACKAGE_VERSION );
    Printf( "%s, compiler version %s" EOL, buildType, __VERSION__ );
    Printf( "Watchdog %s" EOL, ENABLE_WDT ? "enabled" : "disabled" );

    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_RESET, false, false, &extraParamsFound ) )
  {
    // This message does not reach the other side, we would need to add some delay.
    //   UsbPrint( txBuffer, "Resetting the board..." EOL );
    __disable_irq();
    // Note that this message always goes to the serial port console,
    // even if the user is connected over USB. It might be possible to send
    // it over USB and then wait for the outgoing buffer to be empty.
    SerialSyncWriteStr( "Resetting the board..." EOL );
    SerialWaitForDataSent();
    ResetBoard( ENABLE_WDT );
    assert( false );  // We should never reach this point.
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_CPU_LOAD, false, false, &extraParamsFound ) )
  {
    if ( ENABLE_CPU_SLEEP )
      PrintStr( "CPU load statistics not available." EOL );
    else
      DisplayCpuLoad();

    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_UPTIME, false, false, &extraParamsFound ) )
  {
    char buffer[ CONVERT_TO_DEC_BUF_SIZE ];
    Printf( "Uptime: %s seconds." EOL, convert_unsigned_to_dec_th( GetUptime() / 1000, buffer, ',' ) );
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_RESET_CAUSE, false, false, &extraParamsFound ) )
  {
    DisplayResetCause();
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_PRINT_MEMORY, false, true, &extraParamsFound ) )
  {
    PrintMemory( paramBegin );
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_BUSY_WAIT, false, true, &extraParamsFound ) )
  {
    BusyWait( paramBegin );
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_USBSPEEDTEST, false, true, &extraParamsFound ) )
  {
    ProcessUsbSpeedTestCmd( paramBegin, currentTime );
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_JTAGPINS, false, false, &extraParamsFound ) )
  {
    PrintJtagPinStatus();
    return;
  }

  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_JTAGSHIFTSPEEDTEST, false, false, &extraParamsFound ) )
  {
    if ( !IsNativeUsbPort() )
      throw std::runtime_error( "This command is only available on the 'Native' USB port." );


    // Fill the Rx buffer with some test data.
    assert( m_rxBuffer != NULL );

    m_rxBuffer->Reset();
    for ( uint32_t i = 0; !m_rxBuffer->IsFull(); ++i )
    {
      m_rxBuffer->WriteElem( CUsbRxBuffer::ElemType( i ) );
    }


    // If the mode is set to MODE_HIZ, you cannot see the generated signal with the oscilloscope.
    // Note also that the built-in pull-ups on the Atmel ATSAM3X8 are too weak (between 50 and 100 KOhm,
    // yields too slow a rising time) to be of any use.

    const bool oldPullUps = GetJtagPullups();
    SetJtagPullups( false );

    const JtagPinModeEnum oldMode = GetJtagPinMode();
    SetJtagPinMode ( MODE_JTAG );


    // Each JTAG transfer needs 2 bits in the Rx buffer, TMS and TDI,
    // but produces only 1 bit, TDO.
    const uint32_t jtagByteCount = m_rxBuffer->GetElemCount() / 2;

    const uint16_t bitCount = jtagByteCount * 8;

    // Shift all JTAG data through several times.

    const uint64_t startTime = GetUptime();
    const uint32_t iterCount = 50;

    for ( uint32_t i = 0; i < iterCount; ++i )
    {
      // We hope that this will not clear the buffer contents.
      assert( m_rxBuffer != NULL );
      assert( m_txBuffer != NULL );

      m_rxBuffer->Reset();
      m_rxBuffer->CommitWrittenElements( jtagByteCount * 2 );

      m_txBuffer->Reset();

      ShiftJtagData( m_rxBuffer,
                     m_txBuffer,
                     bitCount );

      assert( m_txBuffer->GetElemCount() == jtagByteCount );
    }

    const uint64_t finishTime = GetUptime();
    const uint32_t elapsedTime = uint32_t( finishTime - startTime );

    m_rxBuffer->Reset();
    m_txBuffer->Reset();
    const unsigned kBitsPerSec = unsigned( uint64_t(bitCount) * iterCount * 1000 / elapsedTime / 1024 );

    SetJtagPinMode( oldMode );
    SetJtagPullups( oldPullUps );

    // I am getting 221 KiB/s with GCC 4.7.3 and optimisation level "-O3".
    Printf( EOL "Finished JTAG shift speed test, throughput %u Kbits/s (%u KiB/s)." EOL,
               kBitsPerSec, kBitsPerSec / 8 );

    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_MALLOCTEST, false, false, &extraParamsFound ) )
  {
    PrintStr( "Allocalling memory..." EOL );

    volatile uint32_t * const volatile mallocTest = (volatile uint32_t *) malloc(123);
    *mallocTest = 123;

    PrintStr( "Releasing memory..." EOL );

    free( const_cast< uint32_t * >( mallocTest ) );

    PrintStr( "Test finished." EOL );

    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_CPP_EXCEPTION_TEST, false, false, &extraParamsFound ) )
  {
    try
    {
      PrintStr( "Throwing integer exception..." EOL );
      throw 123;
      PrintStr( "Throw did not work." EOL );
      assert( false );
    }
    catch ( ... )
    {
      PrintStr( "Caught integer exception." EOL );
    }
    PrintStr( "Test finished." EOL );

    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_SIMULATE_ERROR, false, true, &extraParamsFound ) )
  {
    SimulateError( paramBegin );
    return;
  }


  if ( IsCmd( cmdBegin, cmdEnd, CMDNAME_MEMORY_USAGE, false, false, &extraParamsFound ) )
  {
    const unsigned heapSize = unsigned( GetHeapEndAddr() - uintptr_t( &_end ) );

    Printf( "Partitions: malloc heap: %u bytes, free: %u bytes, stack: %u bytes." EOL,
               heapSize,
               GetStackStartAddr() - GetHeapEndAddr(),
               STACK_SIZE );

    Printf( "Used stack (estimated): %u from %u bytes." EOL,
               unsigned( GetStackSizeUsageEstimate() ),
               STACK_SIZE );

    const struct mallinfo mi = mallinfo();
    const unsigned heapSizeAccordingToNewlib = unsigned( mi.arena );

    Printf( "Heap: %u allocated from %u bytes." EOL,
               unsigned( mi.uordblks ),
               unsigned( mi.arena ) );

    assert( heapSize == heapSizeAccordingToNewlib );
    UNUSED_IN_RELEASE( heapSizeAccordingToNewlib );

    return;
  }

  if ( extraParamsFound )
    Printf( "Command \"%.*s\" does not take any parameters." EOL, cmdEnd - cmdBegin, cmdBegin );
  else
    Printf( "Unknown command \"%.*s\"." EOL, cmdEnd - cmdBegin, cmdBegin );
}
Example #14
0
void Data_Grid_Copy_B2C_1(double *data_B, double *data_C)
{
  static int firsttime=1;
  int CN,BN,LN,spin,i,gp,NN_S,NN_R;
  double *Work_Array_Snd_Grid_B2C;
  double *Work_Array_Rcv_Grid_B2C;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  MPI_Status stat;
  MPI_Request request;
  MPI_Status *stat_send;
  MPI_Status *stat_recv;
  MPI_Request *request_send;
  MPI_Request *request_recv;

  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  /* allocation of arrays */
  
  Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]); 
  Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]); 

  if (firsttime==1){
    PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Snd_Grid_B2C",
		sizeof(double)*GP_B2C_S[NN_B2C_S], NULL);
    PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Rcv_Grid_B2C",
		sizeof(double)*GP_B2C_R[NN_B2C_R], NULL);
    firsttime = 0;
  }

  /******************************************************
             MPI: from the partitions B to C
  ******************************************************/

  request_send = malloc(sizeof(MPI_Request)*NN_B2C_S);
  request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R);
  stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S);
  stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R);

  NN_S = 0;
  NN_R = 0;

  /* MPI_Irecv */

  for (ID=0; ID<NN_B2C_R; ID++){

    IDR = ID_NN_B2C_R[ID];
    gp = GP_B2C_R[ID];

    if (IDR!=myid){ 
      MPI_Irecv( &Work_Array_Rcv_Grid_B2C[gp], Num_Rcv_Grid_B2C[IDR],
                 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
      NN_R++;
    }
  }
 
  /* MPI_Isend */

  for (ID=0; ID<NN_B2C_S; ID++){

    IDS = ID_NN_B2C_S[ID];
    gp = GP_B2C_S[ID];

    /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */

    for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){
      BN = Index_Snd_Grid_B2C[IDS][LN];
      Work_Array_Snd_Grid_B2C[gp+LN] = data_B[BN];
    } 

    if (IDS!=myid){
      MPI_Isend( &Work_Array_Snd_Grid_B2C[gp], Num_Snd_Grid_B2C[IDS], 
		 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
      NN_S++;
    }
  }

  /* MPI_Waitall */

  if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
  if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);

  free(request_send);
  free(request_recv);
  free(stat_send);
  free(stat_recv);

  /* copy Work_Array_Rcv_Grid_B2C to data_C */

  for (ID=0; ID<NN_B2C_R; ID++){

    IDR = ID_NN_B2C_R[ID];

    if (IDR==myid){
      gp = GP_B2C_S[ID];
      for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
	CN = Index_Rcv_Grid_B2C[IDR][LN];
	data_C[CN] = Work_Array_Snd_Grid_B2C[gp+LN];
      } 
    }
    else{

      gp = GP_B2C_R[ID];
      for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
	CN = Index_Rcv_Grid_B2C[IDR][LN];
	data_C[CN] = Work_Array_Rcv_Grid_B2C[gp+LN];
      }
    }
  }

  /* freeing of arrays */
  free(Work_Array_Snd_Grid_B2C);
  free(Work_Array_Rcv_Grid_B2C);
}
Example #15
0
void Data_Grid_Copy_B2C_2(double **data_B, double **data_C)
{
  static int firsttime=1;
  int CN,BN,LN,spin,i,gp,NN_S,NN_R;
  double *Work_Array_Snd_Grid_B2C;
  double *Work_Array_Rcv_Grid_B2C;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  MPI_Status stat;
  MPI_Request request;
  MPI_Status *stat_send;
  MPI_Status *stat_recv;
  MPI_Request *request_send;
  MPI_Request *request_recv;

  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  /* allocation of arrays */
  
  Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1)); 
  Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1)); 

  if (firsttime==1){
    PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Snd_Grid_B2C",
		sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1), NULL);
    PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Rcv_Grid_B2C",
		sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1), NULL);
    firsttime = 0;
  }

  /******************************************************
             MPI: from the partitions B to C
  ******************************************************/

  request_send = malloc(sizeof(MPI_Request)*NN_B2C_S);
  request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R);
  stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S);
  stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R);

  NN_S = 0;
  NN_R = 0;

  /* MPI_Irecv */

  for (ID=0; ID<NN_B2C_R; ID++){

    IDR = ID_NN_B2C_R[ID];
    gp = GP_B2C_R[ID];

    if (IDR!=myid){ 
      MPI_Irecv( &Work_Array_Rcv_Grid_B2C[(SpinP_switch+1)*gp], Num_Rcv_Grid_B2C[IDR]*(SpinP_switch+1),
                 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
      NN_R++;
    }

  }

  /* MPI_Isend */

  for (ID=0; ID<NN_B2C_S; ID++){

    IDS = ID_NN_B2C_S[ID];
    gp = GP_B2C_S[ID];

    /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */

    for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){
      BN = Index_Snd_Grid_B2C[IDS][LN];

      if (SpinP_switch==0){
        Work_Array_Snd_Grid_B2C[gp+LN]       = data_B[0][BN];
      }
      else if (SpinP_switch==1){
        Work_Array_Snd_Grid_B2C[2*gp+2*LN+0] = data_B[0][BN];
        Work_Array_Snd_Grid_B2C[2*gp+2*LN+1] = data_B[1][BN];
      }
      else if (SpinP_switch==3){
        Work_Array_Snd_Grid_B2C[4*gp+4*LN+0] = data_B[0][BN];
        Work_Array_Snd_Grid_B2C[4*gp+4*LN+1] = data_B[1][BN];
        Work_Array_Snd_Grid_B2C[4*gp+4*LN+2] = data_B[2][BN];
        Work_Array_Snd_Grid_B2C[4*gp+4*LN+3] = data_B[3][BN];
      }
    } /* LN */        

    if (IDS!=myid){
      MPI_Isend( &Work_Array_Snd_Grid_B2C[(SpinP_switch+1)*gp], Num_Snd_Grid_B2C[IDS]*(SpinP_switch+1), 
		 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
      NN_S++;
    }
  }

  /* MPI_Waitall */

  if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
  if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);

  free(request_send);
  free(request_recv);
  free(stat_send);
  free(stat_recv);

  /* copy Work_Array_Rcv_Grid_B2C to data_C */

  for (ID=0; ID<NN_B2C_R; ID++){

    IDR = ID_NN_B2C_R[ID];

    if (IDR==myid){

      gp = GP_B2C_S[ID];

      for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){

	CN = Index_Rcv_Grid_B2C[IDR][LN];

	if (SpinP_switch==0){
	  data_C[0][CN] = Work_Array_Snd_Grid_B2C[gp+LN];
	}     
	else if (SpinP_switch==1){
	  data_C[0][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+0];
	  data_C[1][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+1];
	}     
	else if (SpinP_switch==3){
	  data_C[0][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+0];
	  data_C[1][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+1];
	  data_C[2][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+2];
	  data_C[3][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+3];
	}
      } /* LN */   

    }
    else {

      gp = GP_B2C_R[ID];

      for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
	CN = Index_Rcv_Grid_B2C[IDR][LN];

	if (SpinP_switch==0){
	  data_C[0][CN] = Work_Array_Rcv_Grid_B2C[gp+LN];
	}
	else if (SpinP_switch==1){
	  data_C[0][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+0];
	  data_C[1][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+1];
	}     
	else if (SpinP_switch==3){
	  data_C[0][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+0];
	  data_C[1][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+1];
	  data_C[2][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+2];
	  data_C[3][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+3];
	}
      }
    }
  }

  /* if (SpinP_switch==0), 
     copy data_B[0] to data_B[1]
     copy data_C[0] to data_C[1]
  */

  if (SpinP_switch==0){
    for (BN=0; BN<My_NumGridB_AB; BN++){
      data_B[1][BN] = data_B[0][BN]; 
    }

    for (CN=0; CN<My_NumGridC; CN++){
      data_C[1][CN] = data_C[0][CN]; 
    }
  }

  /* freeing of arrays */
  free(Work_Array_Snd_Grid_B2C);
  free(Work_Array_Rcv_Grid_B2C);
}
Example #16
0
double Set_Density_Grid(int Cnt_kind, int Calc_CntOrbital_ON, double *****CDM)
{
  static int firsttime=1;
  int al,L0,Mul0,M0,p,size1,size2;
  int Gc_AN,Mc_AN,Mh_AN,LN,AN,BN,CN;
  int n1,n2,n3,k1,k2,k3,N3[4];
  int Cwan,NO0,NO1,Rn,N,Hwan,i,j,k,n;
  int NN_S,NN_R;
  unsigned long long int N2D,n2D,GN; 
  int Max_Size,My_Max;
  int size_Tmp_Den_Grid;
  int size_Den_Snd_Grid_A2B;
  int size_Den_Rcv_Grid_A2B;
  int h_AN,Gh_AN,Rnh,spin,Nc,GRc,Nh,Nog;
  int Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3;

  double threshold;
  double tmp0,tmp1,sk1,sk2,sk3,tot_den,sum;
  double tmp0_0,tmp0_1,tmp0_2,tmp0_3;
  double sum_0,sum_1,sum_2,sum_3;
  double d1,d2,d3,cop,sip,sit,cot;
  double x,y,z,Cxyz[4];
  double TStime,TEtime;
  double ***Tmp_Den_Grid;
  double **Den_Snd_Grid_A2B;
  double **Den_Rcv_Grid_A2B;
  double *tmp_array;
  double *tmp_array2;
  double *orbs0,*orbs1;
  double *orbs0_0,*orbs0_1,*orbs0_2,*orbs0_3;
  double *orbs1_0,*orbs1_1,*orbs1_2,*orbs1_3;
  double ***tmp_CDM;
  int *Snd_Size,*Rcv_Size;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  double Stime_atom, Etime_atom;
  double time0,time1,time2;

  MPI_Status stat;
  MPI_Request request;
  MPI_Status *stat_send;
  MPI_Status *stat_recv;
  MPI_Request *request_send;
  MPI_Request *request_recv;

  /* for OpenMP */
  int OMPID,Nthrds;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  
  dtime(&TStime);

  /* allocation of arrays */

  size_Tmp_Den_Grid = 0;
  Tmp_Den_Grid = (double***)malloc(sizeof(double**)*(SpinP_switch+1)); 
  for (i=0; i<(SpinP_switch+1); i++){
    Tmp_Den_Grid[i] = (double**)malloc(sizeof(double*)*(Matomnum+1)); 
    Tmp_Den_Grid[i][0] = (double*)malloc(sizeof(double)*1); 
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = F_M2G[Mc_AN];
      Tmp_Den_Grid[i][Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]);
	  
      /* AITUNE */
      for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
	Tmp_Den_Grid[i][Mc_AN][Nc] = 0.0;
      }
      
      size_Tmp_Den_Grid += GridN_Atom[Gc_AN];
    }
  }

  size_Den_Snd_Grid_A2B = 0; 
  Den_Snd_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
  for (ID=0; ID<numprocs; ID++){
    Den_Snd_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Snd_Grid_A2B[ID]*(SpinP_switch+1));
    size_Den_Snd_Grid_A2B += Num_Snd_Grid_A2B[ID]*(SpinP_switch+1);
  }  

  size_Den_Rcv_Grid_A2B = 0;   
  Den_Rcv_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
  for (ID=0; ID<numprocs; ID++){
    Den_Rcv_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1));
    size_Den_Rcv_Grid_A2B += Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1);   
  }

  /* PrintMemory */

  if (firsttime==1){
    PrintMemory("Set_Density_Grid: AtomDen_Grid",    sizeof(double)*size_Tmp_Den_Grid, NULL);
    PrintMemory("Set_Density_Grid: Den_Snd_Grid_A2B",sizeof(double)*size_Den_Snd_Grid_A2B, NULL);
    PrintMemory("Set_Density_Grid: Den_Rcv_Grid_A2B",sizeof(double)*size_Den_Rcv_Grid_A2B, NULL);
    firsttime = 0;
  }

  /****************************************************
                when orbital optimization
  ****************************************************/

  if (Calc_CntOrbital_ON==1 && Cnt_kind==0 && Cnt_switch==1){
      
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
       
      dtime(&Stime_atom);
      
      /* COrbs_Grid */
 
      Gc_AN = M2G[Mc_AN];
      Cwan = WhatSpecies[Gc_AN];
      NO0 = Spe_Total_CNO[Cwan]; 
      for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){

        al = -1;
	for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	  for (Mul0=0; Mul0<Spe_Num_CBasis[Cwan][L0]; Mul0++){
	    for (M0=0; M0<=2*L0; M0++){

	      al++;
	      tmp0 = 0.0;

	      for (p=0; p<Spe_Specified_Num[Cwan][al]; p++){
	        j = Spe_Trans_Orbital[Cwan][al][p];  
	        tmp0 += CntCoes[Mc_AN][al][p]*Orbs_Grid[Mc_AN][Nc][j];/* AITUNE */
	      }

	      COrbs_Grid[Mc_AN][al][Nc] = (Type_Orbs_Grid)tmp0;
	    }
	  }
        }
      }

      dtime(&Etime_atom);
      time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
    }

    /**********************************************
     MPI:

     COrbs_Grid    
    ***********************************************/

    /* allocation of arrays  */
    Snd_Size = (int*)malloc(sizeof(int)*numprocs); 
    Rcv_Size = (int*)malloc(sizeof(int)*numprocs); 

    /* find data size for sending and receiving */

    My_Max = -10000;
    for (ID=0; ID<numprocs; ID++){

      IDS = (myid + ID) % numprocs;
      IDR = (myid - ID + numprocs) % numprocs;

      if (ID!=0){
        /*  sending size */
        if (F_Snd_Num[IDS]!=0){
          /* find data size */ 
          size1 = 0; 
          for (n=0; n<F_Snd_Num[IDS]; n++){
            Gc_AN = Snd_GAN[IDS][n];
            Cwan = WhatSpecies[Gc_AN];
            size1 += GridN_Atom[Gc_AN]*Spe_Total_CNO[Cwan];
          }

          Snd_Size[IDS] = size1;
          MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
        }
        else{
          Snd_Size[IDS] = 0;
        }

        /*  receiving size */
        if (F_Rcv_Num[IDR]!=0){
          MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
          Rcv_Size[IDR] = size2;
        }
        else{
          Rcv_Size[IDR] = 0;
        }
        if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
      } 
      else{
        Snd_Size[IDS] = 0;
        Rcv_Size[IDR] = 0;
      }

      if (My_Max<Snd_Size[IDS]) My_Max = Snd_Size[IDS];
      if (My_Max<Rcv_Size[IDR]) My_Max = Rcv_Size[IDR];

    }  

    MPI_Allreduce(&My_Max, &Max_Size, 1, MPI_INT, MPI_MAX, mpi_comm_level1);
    /* allocation of arrays */ 
    tmp_array  = (double*)malloc(sizeof(double)*Max_Size);
    tmp_array2 = (double*)malloc(sizeof(double)*Max_Size);

    /* send and receive COrbs_Grid */

    for (ID=0; ID<numprocs; ID++){

      IDS = (myid + ID) % numprocs;
      IDR = (myid - ID + numprocs) % numprocs;

      if (ID!=0){

        /* sending of data */ 

        if (F_Snd_Num[IDS]!=0){

          /* find data size */
          size1 = Snd_Size[IDS];

          /* multidimentional array to vector array */
          k = 0; 
          for (n=0; n<F_Snd_Num[IDS]; n++){
            Mc_AN = Snd_MAN[IDS][n];
            Gc_AN = Snd_GAN[IDS][n];
            Cwan = WhatSpecies[Gc_AN];
            NO0 = Spe_Total_CNO[Cwan]; 
            for (i=0; i<NO0; i++){
              for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
                tmp_array[k] = COrbs_Grid[Mc_AN][i][Nc];
                k++;
              }          
            }
          } 

          /* MPI_Isend */
          MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS,
                    tag, mpi_comm_level1, &request);
        }

        /* receiving of block data */

        if (F_Rcv_Num[IDR]!=0){

          /* find data size */
          size2 = Rcv_Size[IDR]; 

          /* MPI_Recv */
          MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);

          k = 0;
          Mc_AN = F_TopMAN[IDR] - 1;
          for (n=0; n<F_Rcv_Num[IDR]; n++){
            Mc_AN++;
            Gc_AN = Rcv_GAN[IDR][n];
            Cwan = WhatSpecies[Gc_AN];
            NO0 = Spe_Total_CNO[Cwan]; 

            for (i=0; i<NO0; i++){
              for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
                COrbs_Grid[Mc_AN][i][Nc] = tmp_array2[k];
                k++;
              }          
            }
          }
        }
        if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
      } 
    }  

    /* freeing of arrays  */
    free(tmp_array);
    free(tmp_array2);
    free(Snd_Size);
    free(Rcv_Size);
  }

  /**********************************************
              calculate Tmp_Den_Grid
  ***********************************************/
    
  dtime(&time1);
  
  
  /* AITUNE ========================== */ 
  int OneD_Nloop = 0;
  int ai_MaxNc = 0;
  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
    int Gc_AN = M2G[Mc_AN];    
    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
      OneD_Nloop++;
      if(ai_MaxNc < GridN_Atom[Gc_AN]) {ai_MaxNc = GridN_Atom[Gc_AN];}
    }
  }  
  /* ai_MaxNc is maximum of GridN_Atom[] */

  int gNthrds;
#pragma omp parallel
  {
    gNthrds = omp_get_num_threads();
  }

  double*** ai_tmpDG_all = (double***)malloc(sizeof(double**)*gNthrds);
	
  /* ========================== AITUNE */ 

#pragma omp parallel shared(myid,G2ID,Orbs_Grid_FNAN,List_YOUSO,time_per_atom,Tmp_Den_Grid,Orbs_Grid,COrbs_Grid,Cnt_switch,Cnt_kind,GListTAtoms2,GListTAtoms1,NumOLG,CDM,SpinP_switch,WhatSpecies,ncn,F_G2M,natn,Spe_Total_CNO,M2G) private(OMPID,Nthrds,Mc_AN,h_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,NO0,Gh_AN,Mh_AN,Rnh,Hwan,NO1,spin,i,j,tmp_CDM,Nog,Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3,orbs0_0,orbs0_1,orbs0_2,orbs0_3,orbs1_0,orbs1_1,orbs1_2,orbs1_3,sum_0,sum_1,sum_2,sum_3,tmp0_0,tmp0_1,tmp0_2,tmp0_3,Nc,Nh,orbs0,orbs1,sum,tmp0)
  {

    orbs0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);

    orbs0_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs0_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs0_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs0_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs1_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs1_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs1_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
    orbs1_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]);

    tmp_CDM = (double***)malloc(sizeof(double**)*(SpinP_switch+1)); 
    for (i=0; i<(SpinP_switch+1); i++){
      tmp_CDM[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]); 
      for (j=0; j<List_YOUSO[7]; j++){
	tmp_CDM[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]); 
      }
    }

    /* get info. on OpenMP */ 

    OMPID = omp_get_thread_num();
    Nthrds = omp_get_num_threads();

	
    /* AITUNE ========================== */  


    double *ai_tmpDGs[4];
    {
      int spin;
      for (spin=0; spin<=SpinP_switch; spin++){
	ai_tmpDGs[spin] = (double*)malloc(sizeof(double)* ai_MaxNc);
      }
    }
    ai_tmpDG_all[OMPID] = ai_tmpDGs;
    /* ==================================== AITUNE */


    /* for (Mc_AN=(OMPID+1); Mc_AN<=Matomnum; Mc_AN+=Nthrds){ AITUNE */
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

      dtime(&Stime_atom);

      /* set data on Mc_AN */

      Gc_AN = M2G[Mc_AN];
      Cwan = WhatSpecies[Gc_AN];
      NO0 = Spe_Total_CNO[Cwan]; 
	  
      int spin;
      for (spin=0; spin<=SpinP_switch; spin++){
	int Nc;
	for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
	  ai_tmpDGs[spin][Nc] = 0.0;
	}
      }

      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){

	/* set data on h_AN */
    
	Gh_AN = natn[Gc_AN][h_AN];
	Mh_AN = F_G2M[Gh_AN];
	Rnh = ncn[Gc_AN][h_AN];
	Hwan = WhatSpecies[Gh_AN];
	NO1 = Spe_Total_CNO[Hwan];

	/* store CDM into tmp_CDM */

	for (spin=0; spin<=SpinP_switch; spin++){
	  for (i=0; i<NO0; i++){
	    for (j=0; j<NO1; j++){
	      tmp_CDM[spin][i][j] = CDM[spin][Mc_AN][h_AN][i][j];
	    }
	  }
	}

	/* summation of non-zero elements */
	/* for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]; Nog++){ */
#pragma omp for
	for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]-3; Nog+=4){

	  Nc_0 = GListTAtoms1[Mc_AN][h_AN][Nog];
	  Nc_1 = GListTAtoms1[Mc_AN][h_AN][Nog+1];
	  Nc_2 = GListTAtoms1[Mc_AN][h_AN][Nog+2];
	  Nc_3 = GListTAtoms1[Mc_AN][h_AN][Nog+3];
	  
	  Nh_0 = GListTAtoms2[Mc_AN][h_AN][Nog];
	  Nh_1 = GListTAtoms2[Mc_AN][h_AN][Nog+1];
	  Nh_2 = GListTAtoms2[Mc_AN][h_AN][Nog+2];
	  Nh_3 = GListTAtoms2[Mc_AN][h_AN][Nog+3];
	  
	  /* Now under the orbital optimization */
	  if (Cnt_kind==0 && Cnt_switch==1){
	    for (i=0; i<NO0; i++){
	      orbs0_0[i] = COrbs_Grid[Mc_AN][i][Nc_0];
	      orbs0_1[i] = COrbs_Grid[Mc_AN][i][Nc_1];
	      orbs0_2[i] = COrbs_Grid[Mc_AN][i][Nc_2];
	      orbs0_3[i] = COrbs_Grid[Mc_AN][i][Nc_3];
	    }
	    for (j=0; j<NO1; j++){
	      orbs1_0[j] = COrbs_Grid[Mh_AN][j][Nh_0];
	      orbs1_1[j] = COrbs_Grid[Mh_AN][j][Nh_1];
	      orbs1_2[j] = COrbs_Grid[Mh_AN][j][Nh_2];
	      orbs1_3[j] = COrbs_Grid[Mh_AN][j][Nh_3];
	    }
	  }
	  /* else if ! "now under the orbital optimization" */
	  else{
	    for (i=0; i<NO0; i++){
	      orbs0_0[i] = Orbs_Grid[Mc_AN][Nc_0][i];
	      orbs0_1[i] = Orbs_Grid[Mc_AN][Nc_1][i];
	      orbs0_2[i] = Orbs_Grid[Mc_AN][Nc_2][i];
	      orbs0_3[i] = Orbs_Grid[Mc_AN][Nc_3][i]; 
	    }

            if (G2ID[Gh_AN]==myid){
	      for (j=0; j<NO1; j++){
		orbs1_0[j] = Orbs_Grid[Mh_AN][Nh_0][j];
		orbs1_1[j] = Orbs_Grid[Mh_AN][Nh_1][j];
		orbs1_2[j] = Orbs_Grid[Mh_AN][Nh_2][j];
		orbs1_3[j] = Orbs_Grid[Mh_AN][Nh_3][j]; 
	      }
	    }
            else{
	      for (j=0; j<NO1; j++){
		orbs1_0[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog  ][j];
		orbs1_1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+1][j];
		orbs1_2[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+2][j];
		orbs1_3[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+3][j]; 
	      }
	    }
	  }
	  
	  for (spin=0; spin<=SpinP_switch; spin++){

	    /* Tmp_Den_Grid */

	    sum_0 = 0.0;
	    sum_1 = 0.0;
	    sum_2 = 0.0;
	    sum_3 = 0.0;

	    for (i=0; i<NO0; i++){

	      tmp0_0 = 0.0;
	      tmp0_1 = 0.0;
	      tmp0_2 = 0.0;
	      tmp0_3 = 0.0;

	      for (j=0; j<NO1; j++){
		tmp0_0 += orbs1_0[j]*tmp_CDM[spin][i][j];
		tmp0_1 += orbs1_1[j]*tmp_CDM[spin][i][j];
		tmp0_2 += orbs1_2[j]*tmp_CDM[spin][i][j];
		tmp0_3 += orbs1_3[j]*tmp_CDM[spin][i][j];
	      }

	      sum_0 += orbs0_0[i]*tmp0_0;
	      sum_1 += orbs0_1[i]*tmp0_1;
	      sum_2 += orbs0_2[i]*tmp0_2;
	      sum_3 += orbs0_3[i]*tmp0_3;
	    }
		
	    ai_tmpDGs[spin][Nc_0] += sum_0;
	    ai_tmpDGs[spin][Nc_1] += sum_1;
	    ai_tmpDGs[spin][Nc_2] += sum_2;
	    ai_tmpDGs[spin][Nc_3] += sum_3;

	    /*
	      Tmp_Den_Grid[spin][Mc_AN][Nc_0] += sum_0;
	      Tmp_Den_Grid[spin][Mc_AN][Nc_1] += sum_1;
	      Tmp_Den_Grid[spin][Mc_AN][Nc_2] += sum_2;
	      Tmp_Den_Grid[spin][Mc_AN][Nc_3] += sum_3;
	    */

	  } /* spin */
	} /* Nog */

#pragma omp for
	for (Nog = NumOLG[Mc_AN][h_AN] - (NumOLG[Mc_AN][h_AN] % 4); Nog<NumOLG[Mc_AN][h_AN]; Nog++){
	  /*for (; Nog<NumOLG[Mc_AN][h_AN]; Nog++){*/
	
	  Nc = GListTAtoms1[Mc_AN][h_AN][Nog];
	  Nh = GListTAtoms2[Mc_AN][h_AN][Nog]; 
 

	  if (Cnt_kind==0 && Cnt_switch==1){
	    for (i=0; i<NO0; i++){
	      orbs0[i] = COrbs_Grid[Mc_AN][i][Nc];
	    }
	    for (j=0; j<NO1; j++){
	      orbs1[j] = COrbs_Grid[Mh_AN][j][Nh];
	    }
	  }
	  else{
	    for (i=0; i<NO0; i++){
	      orbs0[i] = Orbs_Grid[Mc_AN][Nc][i];
	    }

	    if (G2ID[Gh_AN]==myid){
	      for (j=0; j<NO1; j++){
		orbs1[j] = Orbs_Grid[Mh_AN][Nh][j];
	      }
	    }
	    else{
	      for (j=0; j<NO1; j++){
		orbs1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog][j];
	      }
	    }
	  }

	  for (spin=0; spin<=SpinP_switch; spin++){
 
 
	    sum = 0.0;
	    for (i=0; i<NO0; i++){
	      tmp0 = 0.0;
	      for (j=0; j<NO1; j++){
		tmp0 += orbs1[j]*tmp_CDM[spin][i][j];
	      }
	      sum += orbs0[i]*tmp0;
	    }
 
	    ai_tmpDGs[spin][Nc] += sum;
	    /*Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;*/
	  }

	} /* Nog */
	
      } /* h_AN */

      /* AITUNE   merge temporary buffer for all omp threads */	
      for (spin=0; spin<=SpinP_switch; spin++){
	int Nc;
#pragma omp for
	for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
	  double sum = 0.0;
	  int th;
	  for(th = 0; th < Nthrds; th++){
	    sum += ai_tmpDG_all[th][spin][Nc];
	  }
	  Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;
	}
      }
		

      dtime(&Etime_atom);
      time_per_atom[Gc_AN] += Etime_atom - Stime_atom;

    } /* Mc_AN */

    /* freeing of arrays */ 

    free(orbs0);
    free(orbs1);

    free(orbs0_0);
    free(orbs0_1);
    free(orbs0_2);
    free(orbs0_3);
    free(orbs1_0);
    free(orbs1_1);
    free(orbs1_2);
    free(orbs1_3);

    for (i=0; i<(SpinP_switch+1); i++){
      for (j=0; j<List_YOUSO[7]; j++){
	free(tmp_CDM[i][j]);
      }
      free(tmp_CDM[i]);
	
      free(ai_tmpDGs[i]); /* AITUNE */
	
    }
    free(tmp_CDM);

#pragma omp flush(Tmp_Den_Grid)

  } /* #pragma omp parallel */
  
  free(ai_tmpDG_all);

  dtime(&time2);
  if(myid==0 && measure_time){
    printf("Time for Part1=%18.5f\n",(time2-time1));fflush(stdout);
  }

  /******************************************************
      MPI communication from the partitions A to B 
  ******************************************************/
  
  /* copy Tmp_Den_Grid to Den_Snd_Grid_A2B */

  for (ID=0; ID<numprocs; ID++) Num_Snd_Grid_A2B[ID] = 0;
  
  N2D = Ngrid1*Ngrid2;

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];

    for (AN=0; AN<GridN_Atom[Gc_AN]; AN++){

      GN = GridListAtom[Mc_AN][AN];
      GN2N(GN,N3);
      n2D = N3[1]*Ngrid2 + N3[2];
      ID = (int)(n2D*(unsigned long long int)numprocs/N2D);

      if (SpinP_switch==0){
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]] = Tmp_Den_Grid[0][Mc_AN][AN];
      }
      else if (SpinP_switch==1){
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+0] = Tmp_Den_Grid[0][Mc_AN][AN];
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+1] = Tmp_Den_Grid[1][Mc_AN][AN];
      }
      else if (SpinP_switch==3){
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+0] = Tmp_Den_Grid[0][Mc_AN][AN];
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+1] = Tmp_Den_Grid[1][Mc_AN][AN];
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+2] = Tmp_Den_Grid[2][Mc_AN][AN];
        Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+3] = Tmp_Den_Grid[3][Mc_AN][AN];
      }

      Num_Snd_Grid_A2B[ID]++;
    }
  }    

  /* MPI: A to B */  

  request_send = malloc(sizeof(MPI_Request)*NN_A2B_S);
  request_recv = malloc(sizeof(MPI_Request)*NN_A2B_R);
  stat_send = malloc(sizeof(MPI_Status)*NN_A2B_S);
  stat_recv = malloc(sizeof(MPI_Status)*NN_A2B_R);

  NN_S = 0;
  NN_R = 0;

  tag = 999;
  for (ID=1; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    if (Num_Snd_Grid_A2B[IDS]!=0){
      MPI_Isend( &Den_Snd_Grid_A2B[IDS][0], Num_Snd_Grid_A2B[IDS]*(SpinP_switch+1), 
	         MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
      NN_S++;
    }

    if (Num_Rcv_Grid_A2B[IDR]!=0){
      MPI_Irecv( &Den_Rcv_Grid_A2B[IDR][0], Num_Rcv_Grid_A2B[IDR]*(SpinP_switch+1), 
  	         MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
      NN_R++;
    }
  }

  if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
  if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);

  free(request_send);
  free(request_recv);
  free(stat_send);
  free(stat_recv);

  /* for myid */
  for (i=0; i<Num_Rcv_Grid_A2B[myid]*(SpinP_switch+1); i++){
    Den_Rcv_Grid_A2B[myid][i] = Den_Snd_Grid_A2B[myid][i];
  }

  /******************************************************
   superposition of rho_i to calculate charge density 
   in the partition B.
  ******************************************************/

  /* initialize arrays */

  for (spin=0; spin<(SpinP_switch+1); spin++){
    for (BN=0; BN<My_NumGridB_AB; BN++){
      Density_Grid_B[spin][BN] = 0.0;
    }
  }

  /* superposition of densities rho_i */

  for (ID=0; ID<numprocs; ID++){

    for (LN=0; LN<Num_Rcv_Grid_A2B[ID]; LN++){

      BN    = Index_Rcv_Grid_A2B[ID][3*LN+0];      
      Gc_AN = Index_Rcv_Grid_A2B[ID][3*LN+1];        
      GRc   = Index_Rcv_Grid_A2B[ID][3*LN+2]; 

      if (Solver!=4 || (Solver==4 && atv_ijk[GRc][1]==0 )){

	/* spin collinear non-polarization */
	if ( SpinP_switch==0 ){
	  Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN];
	}

	/* spin collinear polarization */
	else if ( SpinP_switch==1 ){
	  Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*2  ];
	  Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*2+1];
	} 

	/* spin non-collinear */
	else if ( SpinP_switch==3 ){
	  Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*4  ];
	  Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*4+1];
	  Density_Grid_B[2][BN] += Den_Rcv_Grid_A2B[ID][LN*4+2];
	  Density_Grid_B[3][BN] += Den_Rcv_Grid_A2B[ID][LN*4+3];
	} 

      } /* if (Solve!=4.....) */           

    } /* AN */ 
  } /* ID */  

  /****************************************************
   Conjugate complex of Density_Grid[3][MN] due to
   difference in the definition between density matrix
   and charge density
  ****************************************************/

  if (SpinP_switch==3){

    for (BN=0; BN<My_NumGridB_AB; BN++){
      Density_Grid_B[3][BN] = -Density_Grid_B[3][BN]; 
    }
  }

  /******************************************************
             MPI: from the partitions B to D
  ******************************************************/

  Density_Grid_Copy_B2D();

  /* freeing of arrays */

  for (i=0; i<(SpinP_switch+1); i++){
    for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
      free(Tmp_Den_Grid[i][Mc_AN]);
    }
    free(Tmp_Den_Grid[i]);
  }
  free(Tmp_Den_Grid);

  for (ID=0; ID<numprocs; ID++){
    free(Den_Snd_Grid_A2B[ID]);
  }  
  free(Den_Snd_Grid_A2B);

  for (ID=0; ID<numprocs; ID++){
    free(Den_Rcv_Grid_A2B[ID]);
  }
  free(Den_Rcv_Grid_A2B);

  /* elapsed time */
  dtime(&TEtime);
  time0 = TEtime - TStime;
  if(myid==0 && measure_time) printf("time0=%18.5f\n",time0);

  return time0;
}