F_VOID_FUNC blacs_gridinit_(int *ConTxt, F_CHAR order, int *nprow, int *npcol)
#endif
{
   int *tmpgrid, *iptr;
   int i, j;

/*
 * Grid can be row- or column-major natural ordering when blacs_gridinit is
 * called.  Define a tmpgrid to reflect this, and call blacs_gridmap to
 * set it up
 */
   iptr = tmpgrid = (int*) malloc( Mpval(nprow)*Mpval(npcol)*sizeof(*tmpgrid) );
   if (Mlowcase(F2C_CharTrans(order)) == 'c')
   {
      i = Mpval(npcol) * Mpval(nprow);
      for (j=0; j < i; j++) iptr[j] = j;
   }
   else
   {
      for (j=0; j < Mpval(npcol); j++)
      {
         for (i=0; i < Mpval(nprow); i++) iptr[i] = i * Mpval(npcol) + j;
         iptr += Mpval(nprow);
      }
   }
#if (INTFACE == C_CALL)
   Cblacs_gridmap(ConTxt, tmpgrid, nprow, nprow, npcol);
#else
   blacs_gridmap_(ConTxt, tmpgrid, nprow, nprow, npcol);
#endif
   free(tmpgrid);
}
F_VOID_FUNC dgamn2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
                     double *A, int *lda, int *rA, int *cA, int *ldia,
                     int *rdest, int *cdest)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Combine amn operation for double precision rectangular matrices.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to double precision two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *  RA      (output) Integer Array, dimension (LDIA, N)
 *          Contains process row that the amn of each element
 *          of A was found on: i.e., rA(1,2) contains the process
 *          row that the amn of A(1,2) was found on.
 *          Values are left on process {rdest, cdest} only, others
 *          may be modified, but not left with interesting data.
 *          If rdest == -1, then result is left on all processes in scope.
 *          If LDIA == -1, this array is not accessed, and need not exist.
 *
 *  CA      (output) Integer Array, dimension (LDIA, N)
 *          Contains process column that the amn of each element
 *          of A was found on: i.e., cA(1,2) contains the process
 *          column that the max/min of A(1,2) was found on.
 *          Values are left on process {rdest, cdest} only, others
 *          may be modified, but not left with interesting data.
 *          If rdest == -1, then result is left on all processes in scope.
 *          If LDIA == -1, this array is not accessed, and need not exist.
 *
 *  LDIA    (input) Ptr to int
 *          If (LDIA == -1), then the arrays RA and CA are not accessed.
 *          ELSE leading dimension of the arrays RA and CA.  LDIA >= M.
 *
 *  RDEST   (input) Ptr to int
 *          The process row of the destination of the amn.
 *          If rdest == -1, then result is left on all processes in scope.
 *
 *  CDEST   (input) Ptr to int
 *          The process column of the destination of the amn.
 *          If rdest == -1, then CDEST ignored.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                     int, int);
   void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                    int, int);
   void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
   void BI_dvvamn(int, char *, char *);
   void BI_dvvamn2(int, char *, char *);
   void BI_dMPI_amn(void *, void *, int *, MPI_Datatype *);
   void BI_dMPI_amn2(void *, void *, int *, MPI_Datatype *);
/*
 *  Variable Declarations
 */
   VVFUNPTR vvop;
   BLACBUFF *bp, *bp2;
   BLACSCONTEXT *ctxt;
   char ttop, tscope;
   int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
   int len[2];
   MPI_Aint disp[2];
   MPI_Datatype dtypes[2];
   MPI_Op BlacComb;
   MPI_Datatype MyType;
   BI_DistType *dist, mydist;
   extern BLACBUFF *BI_ActiveQ;
   extern BLACBUFF BI_AuxBuff;

   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
/*
 *  If the user has set the default combine topology, use it instead of
 *  BLACS default
 */
#ifdef DefCombTop
   if (ttop == ' ') ttop = DefCombTop;
#endif
   if (Mpval(cdest) == -1) trdest = -1;
   else trdest = Mpval(rdest);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
               Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
   if (Mpval(ldia) < Mpval(m))
   {
      if (Mpval(ldia) != -1)
         BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
                      "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
                      Mpval(m));
   }
#endif
   if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
   else tlda = Mpval(m);
   if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
   else tldia = Mpval(ldia);
   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      if (trdest == -1) dest = -1;
      else dest = Mpval(cdest);
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      dest = trdest;
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      if (trdest == -1) dest = -1;
      else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }


/*
 * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
 * topology if we've got one
 */
   if (ttop == ' ')
      if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat) ) ttop = '1';
   N = Mpval(m) * Mpval(n);
/*
 * If process who has amn is to be communicated, must set up distance
 * vector after value vector
 */
   if (Mpval(ldia) != -1)
   {
      vvop = BI_dvvamn;
      length = N * sizeof(double);
      i = length % sizeof(BI_DistType);  /* ensure dist vec aligned correctly */
      if (i) length += sizeof(BI_DistType) - i;
      idist = length;
      length += N * sizeof(BI_DistType);
/*
 *    For performance, insist second buffer is at least 8-byte aligned
 */
      j = 8;
      if (sizeof(double) > j) j = sizeof(double);
      i = length % j;
      if (i) length += j - i;
      i = 2 * length;

      bp = BI_GetBuff(i);
      bp2 = &BI_AuxBuff;
      bp2->Buff = &bp->Buff[length];
      BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
/*
 *    Fill in distance vector
 */
      if (dest == -1) mydist = ctxt->scp->Iam;
      else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
      dist = (BI_DistType *) &bp->Buff[idist];
      for (i=0; i < N; i++) dist[i] = mydist;

/*
 *    Create the MPI datatype holding both user's buffer and distance vector
 */
      len[0] = len[1] = N;
      disp[0] = 0;
      disp[1] = idist;
      dtypes[0] = MPI_DOUBLE;
      dtypes[1] = BI_MpiDistType;
#ifdef ZeroByteTypeBug
      if (N > 0)
      {
#endif
      i = 2;
      ierr=MPI_Type_struct(i, len, disp, dtypes, &MyType);
      ierr=MPI_Type_commit(&MyType);
      bp->N = bp2->N = 1;
      bp->dtype = bp2->dtype = MyType;
#ifdef ZeroByteTypeBug
      }
      else
      {
         bp->N = bp2->N = 0;
         bp->dtype = bp2->dtype = MPI_INT;
      }
#endif
   }
   else
   {
      vvop = BI_dvvamn2;
      length = N * sizeof(double);
/*
 *    If A is contiguous, we can use it as one of our buffers
 */
      if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
      {
         bp = &BI_AuxBuff;
         bp->Buff = (char *) A;
         bp2 = BI_GetBuff(length);
      }
      else
      {
         bp = BI_GetBuff(length*2);
         bp2 = &BI_AuxBuff;
         bp2->Buff = &bp->Buff[length];
         BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
      }
      bp->N = bp2->N = N;
      bp->dtype = bp2->dtype = MPI_DOUBLE;
   }

   switch(ttop)
   {
   case ' ':         /* use MPI's reduction by default */
      i = 1;
      if (Mpval(ldia) == -1)
      {
         ierr=MPI_Op_create(BI_dMPI_amn2, i, &BlacComb);
      }
      else
      {
         ierr=MPI_Op_create(BI_dMPI_amn, i, &BlacComb);
         BI_AuxBuff.Len = N;  /* set this up for the MPI OP wrappers */
      }

      if (trdest != -1)
      {
         ierr=MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
	 	       ctxt->scp->comm);
         if (ctxt->scp->Iam == dest)
	 {
	    BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
	    if (Mpval(ldia) != -1)
               BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                            (BI_DistType *) &bp2->Buff[idist],
			    trdest, Mpval(cdest));
	 }
      }
      else
      {
         ierr=MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
		          ctxt->scp->comm);
	 BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
         if (Mpval(ldia) != -1)
            BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                         (BI_DistType *) &bp2->Buff[idist],
                         trdest, Mpval(cdest));
      }
      ierr=MPI_Op_free(&BlacComb);
      if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
         if (N > 0)
#endif
         ierr=BI_MPI_TYPE_FREE(&MyType);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
      break;
   case 'i':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
      break;
   case 'd':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
      break;
   case 's':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
      break;
   case 'm':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
      break;
   case 'f':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
      break;
   case 't':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
      break;
   case 'h':
/*
 *    Use bidirectional exchange if everyone wants answer
 */
      if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
         BI_BeComb(ctxt, bp, bp2, N, vvop);
      else
         BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

   if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
      if (N > 0)
#endif
      ierr=BI_MPI_TYPE_FREE(&MyType);
/*
 * If I am selected to receive answer
 */
   if ( (ctxt->scp->Iam == dest) || (dest == -1) )
   {
/*
 *    Translate the distances stored in the latter part of bp->Buff into
 *    process grid coordinates, and output these coordinates in the
 *    arrays rA and cA.
 */
      if (Mpval(ldia) != -1)
         BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                      dist, trdest, Mpval(cdest));
/*
 *    Unpack the amn array
 */
      if (bp != &BI_AuxBuff) BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
   }
}
F_VOID_FUNC ztrbr2d_(int *ConTxt, F_CHAR scope, F_CHAR top, F_CHAR uplo,
                     F_CHAR diag, int *m, int *n, double *A, int *lda,
                     int *rsrc, int *csrc)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Broadcast/receive for trapezoidal double complex arrays.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  UPLO    (input) Ptr to char
 *          Specifies the part of the matrix to be sent.
 *          = 'U':      Upper trapezoidal part
 *          ELSE :      Lower trapezoidal part
 *
 *  DIAG    (input) Ptr to char
 *          Specifies whether the matrix is unit diagonal or not.
 *          = 'U':      Matrix is unit diagonal, diagonal not communicated.
 *          ELSE :      Matrix is not unit diagonal, diagonal is communicated.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to double complex two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *          If UPLO = 'U', only the upper trapezoid is accessed;
 *          if UPLO = 'L', only the lower trapezoid is accessed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *
 *  RSRC    (input) Ptr to int
 *          The process row of the source of the matrix.
 *
 *  CSRC    (input) Ptr to int
 *          The process column of the source of the matrix.
 *
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   int BI_HypBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_IdringBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int, int);
   void BI_SringBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_MpathBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int, int);
   void BI_TreeBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int, int);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiTrType(BLACSCONTEXT *, char, char, int, int, int,
                                   MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_Ssend(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_Asend(BLACSCONTEXT *, int, int, BLACBUFF *);

   BLACSCONTEXT *ctxt;
   BLACBUFF *bp=NULL;
   SDRVPTR send;
   MPI_Datatype MatTyp;
   int length, src, tlda, error, one=1;
   char ttop, tscope, tuplo, tdiag;
   extern BLACBUFF BI_AuxBuff, *BI_ActiveQ;

   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
   tdiag = F2C_CharTrans(diag);
   tdiag = Mlowcase(tdiag);
   tuplo = F2C_CharTrans(uplo);
   tuplo = Mlowcase(tuplo);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_BR, __FILE__, tscope, tuplo, tdiag, Mpval(m),
               Mpval(n), Mpval(lda), 1, Mpaddress(rsrc), Mpaddress(csrc));
#endif
/*
 *  If the user has set the default broadcast topology, use it instead of
 *  BLACS default
 */
#ifdef DefBSTop
   if (ttop == ' ') ttop = DefBSTop;
#endif
   if (Mpval(m) <= Mpval(lda)) tlda = Mpval(lda);
   else tlda = Mpval(m);

   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      src = Mpval(csrc);
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      src = Mpval(rsrc);
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      src = Mvkpnum(ctxt, Mpval(rsrc), Mpval(csrc));
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }

   MatTyp = BI_GetMpiTrType(ctxt, tuplo, tdiag, Mpval(m), Mpval(n), tlda,
                            MPI_DOUBLE_COMPLEX, &BI_AuxBuff.N);
/*
 * If using default topology, use MPI native broadcast
 */
   if (ttop == ' ')
   {
      error=MPI_Bcast(A, BI_AuxBuff.N, MatTyp, src, ctxt->scp->comm);
      error=MPI_Type_free(&MatTyp);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
   }
/*
 * If MPI handles non-contiguous buffering well, always use MPI data types
 * instead of packing
 */
#ifdef MpiBuffGood
   send = BI_Ssend;
   BI_AuxBuff.Buff = (char *) A;
   BI_AuxBuff.dtype = MatTyp;
   bp = &BI_AuxBuff;
#else

   send = BI_Asend;
   MPI_Pack_size(one, MatTyp, ctxt->scp->comm, &length, error);
   bp = BI_GetBuff(length);
   bp->N = length;
   bp->dtype = MPI_PACKED;
#if ZeroByteTypeBug
   if (MatTyp == MPI_BYTE)
   {
      send = BI_Ssend;
      bp->N = 0;
      bp->dtype = MPI_BYTE;
   }
#endif

#endif

   switch(ttop)
   {
   case 'h':
      error = BI_HypBR(ctxt, bp, send, src);
      if (error == NPOW2) BI_TreeBR(ctxt, bp, send, src, 2);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeBR(ctxt, bp, send, src, ttop-47);
      break;
   case 't':
      BI_TreeBR(ctxt, bp, send, src, ctxt->Nb_bs);
      break;
   case 'i':
      BI_IdringBR(ctxt, bp, send, src, 1);
      break;
   case 'd':
      BI_IdringBR(ctxt, bp, send, src, -1);
      break;
   case 's':
      BI_SringBR(ctxt, bp, send, src);
      break;
   case 'm':
      BI_MpathBR(ctxt, bp, send, src, ctxt->Nr_bs);
      break;
   case 'f':
      BI_MpathBR(ctxt, bp, send, src, FULLCON);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

#ifdef MpiBuffGood
   error=MPI_Type_free(&MatTyp);
   if (BI_ActiveQ) BI_UpdateBuffs(NULL);
#endif
#ifndef MpiBuffGood
   BI_Unpack(ctxt, (BVOID *) A, bp, MatTyp);
   BI_UpdateBuffs(bp);
#endif
}
F_VOID_FUNC ztrbs2d_(int *ConTxt, F_CHAR scope, F_CHAR top, F_CHAR uplo,
                     F_CHAR diag, int *m, int *n, double *A, int *lda)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Broadcast/send for trapezoidal double complex arrays.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  UPLO    (input) Ptr to char
 *          Specifies the part of the matrix to be sent.
 *          = 'U':      Upper trapezoidal part
 *          ELSE :      Lower trapezoidal part
 *
 *  DIAG    (input) Ptr to char
 *          Specifies whether the matrix is unit diagonal or not.
 *          = 'U':      Matrix is unit diagonal, diagonal not communicated.
 *          ELSE :      Matrix is not unit diagonal, diagonal is communicated.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (input) Ptr to double complex two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *          If UPLO = 'U', only the upper trapezoid is accessed;
 *          if UPLO = 'L', only the lower trapezoid is accessed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   int BI_HypBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR);
   void BI_IdringBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_SringBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR);
   void BI_MpathBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_TreeBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiTrType(BLACSCONTEXT *, char, char, int, int, int,
                                   MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_Ssend(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_Asend(BLACSCONTEXT *, int, int, BLACBUFF *);

   char ttop, tscope, tuplo, tdiag;
   int error, tlda;
   MPI_Datatype MatTyp;
   SDRVPTR send;
   BLACBUFF *bp;
   BLACSCONTEXT *ctxt;
   extern BLACBUFF BI_AuxBuff, *BI_ActiveQ;
/*
 * get context, lowcase char variables, and perform parameter checking
 */
   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
   tuplo = F2C_CharTrans(uplo);
   tuplo = Mlowcase(tuplo);
   tdiag = F2C_CharTrans(diag);
   tdiag = Mlowcase(tdiag);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_BS, __FILE__, 'a', tuplo, tdiag, Mpval(m),
               Mpval(n), Mpval(lda), 0, NULL, NULL);
#endif
/*
 *  If the user has set the default broadcast topology, use it instead of
 *  BLACS default
 */
#ifdef DefBSTop
   if (ttop == ' ') ttop = DefBSTop;
#endif
   if (Mpval(lda) < Mpval(m)) tlda = Mpval(m);
   else tlda = Mpval(lda);

   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }
   MatTyp = BI_GetMpiTrType(ctxt, tuplo, tdiag, Mpval(m), Mpval(n), tlda,
                            MPI_DOUBLE_COMPLEX, &BI_AuxBuff.N);
/*
 * If using default topology, use MPI native broadcast
 */
   if (ttop == ' ')
   {
      error=MPI_Bcast(A, BI_AuxBuff.N, MatTyp, ctxt->scp->Iam, ctxt->scp->comm);
      error=MPI_Type_free(&MatTyp);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
   }
/*
 * If MPI handles non-contiguous buffering well, always use MPI data types
 * instead of packing
 */
#ifdef MpiBuffGood
   send = BI_Ssend;
   BI_AuxBuff.Buff = (char *) A;
   BI_AuxBuff.dtype = MatTyp;
   bp = &BI_AuxBuff;
#endif
/*
 * Pack and use non-blocking sends for broadcast if MPI's data types aren't
 * more efficient
 */
#ifndef MpiBuffGood
   send = BI_Asend;
   bp = BI_Pack(ctxt, (BVOID *) A, NULL, MatTyp);
#endif

/*
 * Call correct topology for BS/BR
 */
   switch(ttop)
   {
   case 'h':
      error = BI_HypBS(ctxt, bp, send);
      if (error == NPOW2) BI_TreeBS(ctxt, bp, send, 2);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeBS(ctxt, bp, send, ttop-47);
      break;
   case 't':
      BI_TreeBS(ctxt, bp, send, ctxt->Nb_bs);
      break;
   case 'i':
      BI_IdringBS(ctxt, bp, send, 1);
      break;
   case 'd':
      BI_IdringBS(ctxt, bp, send, -1);
      break;
   case 's':
      BI_SringBS(ctxt, bp, send);
      break;
   case 'f':
      BI_MpathBS(ctxt, bp, send, FULLCON);
      break;
   case 'm':
      BI_MpathBS(ctxt, bp, send, ctxt->Nr_bs);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

   error=MPI_Type_free(&MatTyp);
   if (bp == &BI_AuxBuff)
   {
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
   }
   else BI_UpdateBuffs(bp);
}  /* end  ztrbs2d_  */
F_VOID_FUNC strrv2d_(int *ConTxt, F_CHAR uplo, F_CHAR diag, int *m, int *n,
                     float *A, int *lda, int *rsrc, int *csrc)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Globally-blocking point to point trapezoidal real receive.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  UPLO    (input) Ptr to char
 *          Specifies the part of the matrix to be sent.
 *          = 'U':      Upper trapezoidal part
 *          ELSE :      Lower trapezoidal part
 *
 *  DIAG    (input) Ptr to char
 *          Specifies whether the matrix is unit diagonal or not.
 *          = 'U':      Matrix is unit diagonal, diagonal not communicated.
 *          ELSE :      Matrix is not unit diagonal, diagonal is communicated.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to real two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *          If UPLO = 'U', only the upper trapezoid is accessed;
 *          if UPLO = 'L', only the lower trapezoid is accessed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *
 *  RSRC    (input) Ptr to int
 *          The process row of the source of the matrix.
 *
 *  CSRC    (input) Ptr to int
 *          The process column of the source of the matrix.
 *
 *
 * ------------------------------------------------------------------------
 */
{
/*
 *  Prototypes and variable declarations
 */
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   MPI_Datatype BI_GetMpiTrType(BLACSCONTEXT *, char, char, int, int, int,
                                   MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_Srecv(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   int tuplo, tdiag, tlda;
   int ierr, length;
   BLACBUFF *bp;
   MPI_Datatype MatTyp;
   BLACSCONTEXT *ctxt;
   extern BLACBUFF BI_AuxBuff, *BI_ActiveQ;

   MGetConTxt(Mpval(ConTxt), ctxt);
   tdiag = F2C_CharTrans(diag);
   tuplo = F2C_CharTrans(uplo);
   tdiag = Mlowcase(tdiag);
   tuplo = Mlowcase(tuplo);

#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_RV, __FILE__, 'a', tuplo, tdiag, Mpval(m),
               Mpval(n), Mpval(lda), 1, Mpaddress(rsrc), Mpaddress(csrc));
#endif
   if (Mpval(lda) < Mpval(m)) tlda = Mpval(m);
   else tlda = Mpval(lda);
   ctxt->scp = &ctxt->pscp;

   MatTyp = BI_GetMpiTrType(ctxt, tuplo, tdiag, Mpval(m), Mpval(n), tlda,
                            MPI_FLOAT, &BI_AuxBuff.N);
   BI_AuxBuff.Buff = (char *) A;
   BI_AuxBuff.dtype = MatTyp;
   BI_Srecv(ctxt, Mkpnum(ctxt, Mpval(rsrc), Mpval(csrc)), PT2PTID, &BI_AuxBuff);
   ierr=BI_MPI_TYPE_FREE(&MatTyp);
   if (BI_ActiveQ) BI_UpdateBuffs(NULL);
}
Beispiel #6
0
F_VOID_FUNC igsum2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
                     int *A, int *lda, int *rdest, int *cdest)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Combine sum operation for integer rectangular matrices.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to integer two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *  RDEST   (input) Ptr to int
 *          The process row of the destination of the sum.
 *          If rdest == -1, then result is left on all processes in scope.
 *
 *  CDEST   (input) Ptr to int
 *          The process column of the destination of the sum.
 *          If rdest == -1, then CDEST ignored.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                     int, int);
   void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                    int, int);
   void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
   void BI_ivvsum(int, char *, char *);
/*
 *  Variable Declarations
 */
   BLACBUFF *bp, *bp2;
   BLACSCONTEXT *ctxt;
   char ttop, tscope;
   int N, length, dest, tlda, trdest, ierr, itr;
   extern BLACBUFF *BI_ActiveQ;
   extern BLACBUFF BI_AuxBuff;

   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
/*
 *  If the user has set the default combine topology, use it instead of
 *  BLACS default
 */
#ifdef DefCombTop
   if (ttop == ' ') ttop = DefCombTop;
#endif
   if (Mpval(cdest) == -1) trdest = -1;
   else trdest = Mpval(rdest);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
               Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
#endif
   if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
   else tlda = Mpval(m);
   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      if (trdest == -1) dest = -1;
      else dest = Mpval(cdest);
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      dest = trdest;
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      if (trdest == -1) dest = -1;
      else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }


/*
 * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
 * topology if we've got one.  Note that integer operations are always
 * repeatable.
 */
   if (ttop == ' ') if ( (Mpval(m) < 1) || (Mpval(n) < 1) ) ttop = '1';
   N = Mpval(m) * Mpval(n);
   length = N * sizeof(int);
/*
 * If A is contiguous, we can use it as one of the buffers
 */
   if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
   {
      bp = &BI_AuxBuff;
      bp->Buff = (char *) A;
      bp2 = BI_GetBuff(length);
   }
/*
 * Otherwise, we must allocate both buffers
 */
   else
   {
      bp = BI_GetBuff(length*2);
      bp2 = &BI_AuxBuff;
      bp2->Buff = &bp->Buff[length];
      BI_imvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
   }
   bp->dtype = bp2->dtype = MPI_INT;
   bp->N = bp2->N = N;

   switch(ttop)
   {
   case ' ':         /* use MPI's reduction by default */
      if (dest != -1)
      {
         ierr=MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, MPI_SUM,
                       dest, ctxt->scp->comm);
         if (ctxt->scp->Iam == dest)
	    BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
      }
      else
      {
         ierr=MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, MPI_SUM,
		          ctxt->scp->comm);
	 BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
      }
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
      break;
   case 'i':
      BI_MringComb(ctxt, bp, bp2, N, BI_ivvsum, dest, 1);
      break;
   case 'd':
      BI_MringComb(ctxt, bp, bp2, N, BI_ivvsum, dest, -1);
      break;
   case 's':
      BI_MringComb(ctxt, bp, bp2, N, BI_ivvsum, dest, 2);
      break;
   case 'm':
      BI_MringComb(ctxt, bp, bp2, N, BI_ivvsum, dest, ctxt->Nr_co);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeComb(ctxt, bp, bp2, N, BI_ivvsum, dest, ttop-47);
      break;
   case 'f':
      BI_TreeComb(ctxt, bp, bp2, N, BI_ivvsum, dest, FULLCON);
      break;
   case 't':
      BI_TreeComb(ctxt, bp, bp2, N, BI_ivvsum, dest, ctxt->Nb_co);
      break;
   case 'h':
/*
 *    Use bidirectional exchange if everyone wants answer
 */
      if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
         BI_BeComb(ctxt, bp, bp2, N, BI_ivvsum);
      else
         BI_TreeComb(ctxt, bp, bp2, N, BI_ivvsum, dest, 2);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

/*
 * If I am selected to receive answer
 */
   if (bp != &BI_AuxBuff)
   {
      if ( (ctxt->scp->Iam == dest) || (dest == -1) )
         BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
      BI_UpdateBuffs(bp);
   }
   else
   {
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      BI_BuffIsFree(bp, 1);
   }
}
Beispiel #7
0
main(int nargs, char *args[])
{
   char ln[127], pre='d', ln2[127], ch;
   int i, nb=28;
   double mflops[3];
   double mf1, mf4;
   FILE *fpin, *fp;

   for (i=1; i < nargs; i++)
   {
      if (args[i][0] != '-') PrintUsage(args[0]);
      switch(args[i][1])
      {
      case 'p':
         ch = args[++i][0];
         pre = Mlowcase(ch);
         break;
      case 'n':
         nb = atoi(args[++i]);
         break;
      default:
         PrintUsage(args[0]);
      }
   }
   sprintf(ln, "res/M%cNB%d_4x1x1_0-1.mflop", pre, nb);
   if (!FileExists(ln))
   {
      sprintf(ln2, "make %chc_cases nb=%d\n", pre, nb);
      assert(system(ln2) == 0);
   }
   fpin = fopen(ln, "r");
   assert(fpin);
   for (i=0; i < 3; i++) assert( fscanf(fpin, "%lf", mflops+i) );
   mf1 = GetAvg(3, 1.2, mflops);
   fclose(fpin);

   sprintf(ln, "res/M%cNB%d_4x4x1_0-1.mflop", pre, nb);
   if (!FileExists(ln))
   {
      sprintf(ln2, "make %chc_cases nb=%d\n", pre, nb);
      if (system(ln2) != 0)
      {
         sprintf(ln, "rm -f res/M%cNB*\n", pre);
         system(ln);
         fprintf(stderr, "Error in command: %s", ln2);
         exit(-1);
      }
   }
   fpin = fopen(ln, "r");
   assert(fpin);
   for (i=0; i < 3; i++) assert( fscanf(fpin, "%lf", mflops+i) );
   mf4 = GetAvg(3, 1.2, mflops);
   fclose(fpin);

   sprintf(ln, "res/%cmmcase.h", pre);
   fp = fopen(ln, "w");
   assert(fp);
   fprintf(fp, "#ifndef %cMMCASE_H\n", Mupcase(pre));
   fprintf(fp, "   #define %cMMCASE_H\n", Mupcase(pre));
   if (mf1 >= mf4) fprintf(fp, "   #define Use4x1\n");
   else fprintf(fp, "   #define Use4x4\n");
   fprintf(fp, "#endif\n");
   fclose(fp);
   sprintf(ln, "res/%cHCRES", pre);
   fp = fopen(ln, "w");
   assert(fp);
   fprintf(fp, "%f\n", mf1);
   fprintf(fp, "%f\n", mf4);
   fclose(fp);

   exit(0);
}
F_VOID_FUNC igebs2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
                     int *A, int *lda)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Broadcast/send for general integer arrays.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (input) Ptr to integer two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   int BI_HypBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR);
   void BI_IdringBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_SringBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR);
   void BI_MpathBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_TreeBS(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_Ssend(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_Asend(BLACSCONTEXT *, int, int, BLACBUFF *);

   char ttop, tscope;
   int error, tlda;
   MPI_Datatype MatTyp;
   SDRVPTR send;
   BLACBUFF *bp;
   BLACSCONTEXT *ctxt;
   extern BLACBUFF BI_AuxBuff, *BI_ActiveQ;
/*
 * get context, lowcase char variables, and perform parameter checking
 */
   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_BS, __FILE__, 'a', 'u', 'u', Mpval(m),
               Mpval(n), Mpval(lda), 0, NULL, NULL);
#endif
/*
 *  If the user has set the default broadcast topology, use it instead of
 *  BLACS default
 */
#ifdef DefBSTop
   if (ttop == ' ') ttop = DefBSTop;
#endif
   if (Mpval(lda) < Mpval(m)) tlda = Mpval(m);
   else tlda = Mpval(lda);

   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }

   MatTyp = BI_GetMpiGeType(ctxt, Mpval(m), Mpval(n), tlda,
                            MPI_INT, &BI_AuxBuff.N);
/*
 * If using default topology, use MPI native broadcast
 */
   if (ttop == ' ')
   {
      error=MPI_Bcast(A, BI_AuxBuff.N, MatTyp, ctxt->scp->Iam, ctxt->scp->comm);
      error=MPI_Type_free(&MatTyp);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
   }
/*
 * If MPI handles non-contiguous buffering well, always use MPI data types
 * instead of packing
 */
#ifndef MpiBuffGood
/*
 * If A is contiguous, send directly from it
 */
   else if ( (tlda == Mpval(m)) || (Mpval(n) == 1) )
   {
#endif
      send = BI_Ssend;
      BI_AuxBuff.Buff = (char *) A;
      BI_AuxBuff.dtype = MatTyp;
      bp = &BI_AuxBuff;
#ifndef MpiBuffGood
   }
   else
   {
      send = BI_Asend;
      bp = BI_Pack(ctxt, (BVOID *) A, NULL, MatTyp);
   }
#endif

/*
 * Call correct topology for BS/BR
 */
   switch(ttop)
   {
   case 'h':
      error = BI_HypBS(ctxt, bp, send);
      if (error == NPOW2) BI_TreeBS(ctxt, bp, send, 2);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeBS(ctxt, bp, send, ttop-47);
      break;
   case 't':
      BI_TreeBS(ctxt, bp, send, ctxt->Nb_bs);
      break;
   case 'i':
      BI_IdringBS(ctxt, bp, send, 1);
      break;
   case 'd':
      BI_IdringBS(ctxt, bp, send, -1);
      break;
   case 's':
      BI_SringBS(ctxt, bp, send);
      break;
   case 'f':
      BI_MpathBS(ctxt, bp, send, FULLCON);
      break;
   case 'm':
      BI_MpathBS(ctxt, bp, send, ctxt->Nr_bs);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",ttop);
   }

   error=MPI_Type_free(&MatTyp);
   if (bp == &BI_AuxBuff)
   {
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
   }
   else BI_UpdateBuffs(bp);
}  /* end  igebs2d_  */
F_VOID_FUNC ztrsd2d_(int *ConTxt, F_CHAR uplo, F_CHAR diag, int *m, int *n,
                     double *A, int *lda, int *rdest, int *cdest)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Locally-blocking point-to-point trapezoidal double complex send.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  UPLO    (input) Ptr to char
 *          Specifies the part of the matrix to be sent.
 *          = 'U':      Upper trapezoidal part
 *          ELSE :      Lower trapezoidal part
 *
 *  DIAG    (input) Ptr to char
 *          Specifies whether the matrix is unit diagonal or not.
 *          = 'U':      Matrix is unit diagonal, diagonal not communicated.
 *          ELSE :      Matrix is not unit diagonal, diagonal is communicated.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (input) Ptr to double complex two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *          If UPLO = 'U', only the upper trapezoid is accessed;
 *          if UPLO = 'L', only the lower trapezoid is accessed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *  RDEST   (input) Ptr to int
 *          The process row of the destination process.
 *
 *  CDEST   (input) Ptr to int
 *          The process column of the destination process.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   MPI_Datatype BI_GetMpiTrType(BLACSCONTEXT *, char, char, int, int, int,
                                   MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_Ssend(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_Asend(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);

   char tuplo, tdiag;
   int dest, length, tlda, ierr;
   BLACBUFF *bp;
   BLACSCONTEXT *ctxt;
   MPI_Datatype MatTyp;
   extern BLACBUFF BI_AuxBuff, *BI_ActiveQ;

   MGetConTxt(Mpval(ConTxt), ctxt);
   tuplo = F2C_CharTrans(uplo);
   tdiag = F2C_CharTrans(diag);
   tuplo = Mlowcase(tuplo);
   tdiag = Mlowcase(tdiag);

#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_SD, "ZTRSD2D", 'a', tuplo, tdiag, Mpval(m),
               Mpval(n), Mpval(lda), 1, Mpaddress(rdest), Mpaddress(cdest));
#endif
   if (Mpval(lda) < Mpval(m)) tlda = Mpval(m);
   else tlda = Mpval(lda);
   dest = Mvkpnum(ctxt, Mpval(rdest), Mpval(cdest));
   ctxt->scp = &ctxt->pscp;

   MatTyp = BI_GetMpiTrType(ctxt, tuplo, tdiag, Mpval(m), Mpval(n), tlda,
                            MPI_DOUBLE_COMPLEX, &BI_AuxBuff.N);
#ifdef SndIsLocBlk
   BI_AuxBuff.Buff = (char *) A;
   BI_AuxBuff.dtype = MatTyp;
   BI_Ssend(ctxt, dest, PT2PTID, &BI_AuxBuff);
#else
   bp = BI_Pack(ctxt, (BVOID *) A, NULL, MatTyp);
   BI_Asend(ctxt, Mkpnum(ctxt, Mpval(rdest), Mpval(cdest)), PT2PTID, bp);
#endif
   ierr=MPI_Type_free(&MatTyp);

/*
 * Having started the async send, update the buffers (reform links, check if
 * active buffers have become inactive, etc.)
 */
#ifdef SndIsLocBlk
   if (BI_ActiveQ) BI_UpdateBuffs(NULL);
#else
   BI_UpdateBuffs(bp);
#endif
}  /* end of ztrsd2d */