Exemple #1
0
/*************************************************************************
* This function takes a graph and produces a bisection of it
**************************************************************************/
int MlevelRecursiveBisection(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, floattype *tpwgts, floattype ubfactor, int fpart)
{
  int i, j, nvtxs, cut, tvwgt, tpwgts2[2];
  idxtype *label, *where;
  GraphType lgraph, rgraph;
  floattype wsum;

  nvtxs = graph->nvtxs;
  if (nvtxs == 0) {
    printf("\t***Cannot bisect a graph with 0 vertices!\n\t***You are trying to partition a graph into too many parts!\n");
    return 0;
  }

  /* Determine the weights of the partitions */
  tvwgt = idxsum(nvtxs, graph->vwgt);
  tpwgts2[0] = tvwgt*ssum(nparts/2, tpwgts);
  tpwgts2[1] = tvwgt-tpwgts2[0];

  MlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor);
  cut = graph->mincut;

  /* printf("%5d %5d %5d [%5d %f]\n", tpwgts2[0], tpwgts2[1], cut, tvwgt, ssum(nparts/2, tpwgts));*/

  label = graph->label;
  where = graph->where;
  for (i=0; i<nvtxs; i++)
    part[label[i]] = where[i] + fpart;

  if (nparts > 2) {
    SplitGraphPart(ctrl, graph, &lgraph, &rgraph);
    /* printf("%d %d\n", lgraph.nvtxs, rgraph.nvtxs); */
  }


  /* Free the memory of the top level graph */
  GKfree(&graph->gdata, &graph->rdata, &graph->label, LTERM);

  /* Scale the fractions in the tpwgts according to the true weight */
  wsum = ssum(nparts/2, tpwgts);
  sscale(nparts/2, 1.0/wsum, tpwgts);
  sscale(nparts-nparts/2, 1.0/(1.0-wsum), tpwgts+nparts/2);
  /*
  for (i=0; i<nparts; i++)
    printf("%5.3f ", tpwgts[i]);
  printf("[%5.3f]\n", wsum);
  */

  /* Do the recursive call */
  if (nparts > 3) {
    cut += MlevelRecursiveBisection(ctrl, &lgraph, nparts/2, part, tpwgts, ubfactor, fpart);
    cut += MlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, tpwgts+nparts/2, ubfactor, fpart+nparts/2);
  }
  else if (nparts == 3) {
    cut += MlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, tpwgts+nparts/2, ubfactor, fpart+nparts/2);
    GKfree(&lgraph.gdata, &lgraph.label, LTERM);
  }

  return cut;

}
Exemple #2
0
/*************************************************************************
* This function takes a graph and produces a bisection of it
**************************************************************************/
int MCMlevelRecursiveBisection2(CtrlType *ctrl, GraphType *graph, int nparts,
    float *tpwgts, idxtype *part, float ubfactor, int fpart)
{
  int i, nvtxs, cut;
  float wsum, tpwgts2[2];
  idxtype *label, *where;
  GraphType lgraph, rgraph;

  nvtxs = graph->nvtxs;
  if (nvtxs == 0) 
    return 0;

  /* Determine the weights of the partitions */
  tpwgts2[0] = ssum(nparts/2, tpwgts);
  tpwgts2[1] = 1.0-tpwgts2[0];

  MCMlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor);
  cut = graph->mincut;

  label = graph->label;
  where = graph->where;
  for (i=0; i<nvtxs; i++)
    part[label[i]] = where[i] + fpart;

  if (nparts > 2) 
    SplitGraphPart(ctrl, graph, &lgraph, &rgraph);

  /* Free the memory of the top level graph */
  GKfree((void**)&graph->gdata, &graph->nvwgt, &graph->rdata, &graph->label, &graph->npwgts, LTERM);

  /* Scale the fractions in the tpwgts according to the true weight */
  wsum = ssum(nparts/2, tpwgts);
  sscale(nparts/2, 1.0/wsum, tpwgts);
  sscale(nparts-nparts/2, 1.0/(1.0-wsum), tpwgts+nparts/2);

  /* Do the recursive call */
  if (nparts > 3) {
    cut += MCMlevelRecursiveBisection2(ctrl, &lgraph, nparts/2, tpwgts, part, ubfactor, fpart);
    cut += MCMlevelRecursiveBisection2(ctrl, &rgraph, nparts-nparts/2, tpwgts+nparts/2, part, ubfactor, fpart+nparts/2);
  }
  else if (nparts == 3) {
    cut += MCMlevelRecursiveBisection2(ctrl, &rgraph, nparts-nparts/2, tpwgts+nparts/2, part, ubfactor, fpart+nparts/2);
    GKfree((void**)&lgraph.gdata, &lgraph.nvwgt, &lgraph.label, LTERM);
  }

  return cut;

}
static double numbers_per_second(const uint64_t count)
{
  printf("Generating %s numbers... ", sscale(count));
  fflush(stdout);

  Timer timer;

  for ( uint64_t n = 0; n < count; ++n )
    mt::rand_u32();

  const double secs = timer.elapsed_secs();
  printf("%f seconds\n", secs);

  return count/secs;
}
Exemple #4
0
/*************************************************************************
* This function is the entry point of the initial partition algorithm
* that does recursive bissection.
* This algorithm assembles the graph to all the processors and preceeds
* by parallelizing the recursive bisection step.
**************************************************************************/
void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
    int i, j;
    int ncon, mype, npes, gnvtxs, ngroups;
    idxtype *xadj, *adjncy, *adjwgt, *vwgt;
    idxtype *part, *gwhere0, *gwhere1;
    idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt;
    GraphType *agraph;
    int lnparts, fpart, fpe, lnpes;
    int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut;
    float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum;
    MPI_Comm ipcomm;
    struct {
        float sum;
        int rank;
    } lpesum, gpesum;

    ncon = graph->ncon;
    ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1);

    IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
    IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

    agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace);
    part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part");
    xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj");
    adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy");
    adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt");
    vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt");

    idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt);
    idxcopy(agraph->nvtxs+1, agraph->xadj, xadj);
    idxcopy(agraph->nedges, agraph->adjncy, adjncy);
    idxcopy(agraph->nedges, agraph->adjwgt, adjwgt);

    MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm);
    MPI_Comm_rank(ipcomm, &mype);
    MPI_Comm_size(ipcomm, &npes);

    gnvtxs = agraph->nvtxs;

    gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0");
    gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1");

    /* ADD: this assumes that tpwgts for all constraints is the same */
    /* ADD: this is necessary because serial metis does not support the general case */
    mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts");
    for (i=0; i<ctrl->nparts; i++)
        for (j=0; j<ncon; j++)
            mytpwgts[i] += ctrl->tpwgts[i*ncon+j];
    for (i=0; i<ctrl->nparts; i++)
        mytpwgts[i] /= (float)ncon;

    /* Go into the recursive bisection */
    /* ADD: consider changing this to breadth-first type bisection */
    moptions[0] = 0;
    moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1;

    lnparts = ctrl->nparts;
    fpart = fpe = 0;
    lnpes = npes;
    while (lnpes > 1 && lnparts > 1) {
        /* Determine the weights of the partitions */
        mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart);
        mytpwgts2[1] = 1.0-mytpwgts2[0];

        if (ncon == 1)
            METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy,
                                  agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2,
                                  moptions, &edgecut, part);
        else {
            METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj,
                                        agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag,
                                        &twoparts, mytpwgts2, moptions, &edgecut, part);
        }

        wsum = ssum(lnparts/2, mytpwgts+fpart);
        sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart);
        sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2);

        /* I'm picking the left branch */
        if (mype < fpe+lnpes/2) {
            Mc_KeepPart(agraph, wspace, part, 0);
            lnpes = lnpes/2;
            lnparts = lnparts/2;
        }
        else {
            Mc_KeepPart(agraph, wspace, part, 1);
            fpart = fpart + lnparts/2;
            fpe = fpe + lnpes/2;
            lnpes = lnpes - lnpes/2;
            lnparts = lnparts - lnparts/2;
        }
    }

    /* In case npes is greater than or equal to nparts */
    if (lnparts == 1) {
        /* Only the first process will assign labels (for the reduction to work) */
        if (mype == fpe) {
            for (i=0; i<agraph->nvtxs; i++)
                gwhere0[agraph->label[i]] = fpart;
        }
    }
    /* In case npes is smaller than nparts */
    else {
        if (ncon == 1)
            METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy,
                                  agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart,
                                  moptions, &edgecut, part);
        else
            METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj,
                                        agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag,
                                        &lnparts, mytpwgts+fpart, moptions, &edgecut, part);

        for (i=0; i<agraph->nvtxs; i++)
            gwhere0[agraph->label[i]] = fpart + part[i];
    }

    MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm);

    if (ngroups > 1) {
        tmpxadj = agraph->xadj;
        tmpadjncy = agraph->adjncy;
        tmpadjwgt = agraph->adjwgt;
        tmpvwgt = agraph->vwgt;
        tmpwhere = agraph->where;
        agraph->xadj = xadj;
        agraph->adjncy = adjncy;
        agraph->adjwgt = adjwgt;
        agraph->vwgt = vwgt;
        agraph->where = gwhere1;
        agraph->vwgt = vwgt;
        agraph->nvtxs = gnvtxs;
        Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec);
        lbsum = ssum(ncon, lbvec);

        edgecut = ComputeSerialEdgeCut(agraph);
        MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm);
        MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm);

        lpesum.sum = lbsum;
        if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
            if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                lpesum.sum = (float) (edgecut);
            else
                lpesum.sum = (float) (max_cut);
        }

        MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank));
        MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm);
        MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm);

        agraph->xadj = tmpxadj;
        agraph->adjncy = tmpadjncy;
        agraph->adjwgt = tmpadjwgt;
        agraph->vwgt = tmpvwgt;
        agraph->where = tmpwhere;
    }

    idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where);

    FreeGraph(agraph);
    MPI_Comm_free(&ipcomm);
    GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM);

    IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
    IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

}
Exemple #5
0
/**
Symmetrize and invert the hessian
*/
void function_minimizer::hess_inv(void)
{
  initial_params::set_inactive_only_random_effects();
  int nvar=initial_params::nvarcalc(); // get the number of active parameters
  independent_variables x(1,nvar);

  initial_params::xinit(x);        // get the initial values into the x vector
  //double f;
  dmatrix hess(1,nvar,1,nvar);
  uistream ifs("admodel.hes");
  int file_nvar = 0;
  ifs >> file_nvar;
  if (nvar != file_nvar)
  {
    cerr << "Number of active variables in file mod_hess.rpt is wrong"
         << endl;
  }

  for (int i = 1;i <= nvar; i++)
  {
    ifs >> hess(i);
    if (!ifs)
    {
      cerr << "Error reading line " << i  << " of the hessian"
           << " in routine hess_inv()" << endl;
      exit(1);
    }
  }
  int hybflag = 0;
  ifs >> hybflag;
  dvector sscale(1,nvar);
  ifs >> sscale;
  if (!ifs)
  {
    cerr << "Error reading sscale"
         << " in routine hess_inv()" << endl;
  }

  double maxerr=0.0;
  for (int i = 1;i <= nvar; i++)
  {
    for (int j=1;j<i;j++)
    {
      double tmp=(hess(i,j)+hess(j,i))/2.;
      double tmp1=fabs(hess(i,j)-hess(j,i));
      tmp1/=(1.e-4+fabs(hess(i,j))+fabs(hess(j,i)));
      if (tmp1>maxerr) maxerr=tmp1;
      hess(i,j)=tmp;
      hess(j,i)=tmp;
    }
  }
  /*
  if (maxerr>1.e-2)
  {
    cerr << "warning -- hessian aprroximation is poor" << endl;
  }
 */

  for (int i = 1;i <= nvar; i++)
  {
    int zero_switch=0;
    for (int j=1;j<=nvar;j++)
    {
      if (hess(i,j)!=0.0)
      {
        zero_switch=1;
      }
    }
    if (!zero_switch)
    {
      cerr << " Hessian is 0 in row " << i << endl;
      cerr << " This means that the derivative if probably identically 0 "
              " for this parameter" << endl;
    }
  }

  int ssggnn;
  ln_det(hess,ssggnn);
  int on1=0;
  {
    ofstream ofs3((char*)(ad_comm::adprogram_name + adstring(".eva")));
    {
      dvector se=eigenvalues(hess);
      ofs3 << setshowpoint() << setw(14) << setprecision(10)
           << "unsorted:\t" << se << endl;
     se=sort(se);
     ofs3 << setshowpoint() << setw(14) << setprecision(10)
     << "sorted:\t" << se << endl;
     if (se(se.indexmin())<=0.0)
      {
        negative_eigenvalue_flag=1;
        cout << "Warning -- Hessian does not appear to be"
         " positive definite" << endl;
      }
    }
    ivector negflags(0,hess.indexmax());
    int num_negflags=0;
    {
      int on = option_match(ad_comm::argc,ad_comm::argv,"-eigvec");
      on1=option_match(ad_comm::argc,ad_comm::argv,"-spmin");
      if (on > -1 || on1 >-1 )
      {
        ofs3 << setshowpoint() << setw(14) << setprecision(10)
          << eigenvalues(hess) << endl;
        dmatrix ev=trans(eigenvectors(hess));
        ofs3 << setshowpoint() << setw(14) << setprecision(10)
          << ev << endl;
        for (int i=1;i<=ev.indexmax();i++)
        {
          double lam=ev(i)*hess*ev(i);
          ofs3 << setshowpoint() << setw(14) << setprecision(10)
            << lam << "  "  << ev(i)*ev(i) << endl;
          if (lam<0.0)
          {
            num_negflags++;
            negflags(num_negflags)=i;
          }
        }
        if ( (on1>-1) && (num_negflags>0))   // we will try to get away from
        {                                     // saddle point
          negative_eigenvalue_flag=0;
          spminflag=1;
          if(negdirections)
          {
            delete negdirections;
          }
          negdirections = new dmatrix(1,num_negflags);
          for (int i=1;i<=num_negflags;i++)
          {
            (*negdirections)(i)=ev(negflags(i));
          }
        }
        int on2 = option_match(ad_comm::argc,ad_comm::argv,"-cross");
        if (on2>-1)
        {                                     // saddle point
          dmatrix cross(1,ev.indexmax(),1,ev.indexmax());
          for (int i = 1;i <= ev.indexmax(); i++)
          {
            for (int j=1;j<=ev.indexmax();j++)
            {
              cross(i,j)=ev(i)*ev(j);
            }
          }
          ofs3 <<  endl << "  e(i)*e(j) ";
          ofs3 << endl << cross << endl;
        }
      }
    }

    if (spminflag==0)
    {
      if (num_negflags==0)
      {
        hess=inv(hess);
        int on=0;
        if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-eigvec"))>-1)
        {
          int i;
          ofs3 << "choleski decomp of correlation" << endl;
          dmatrix ch=choleski_decomp(hess);
          for (i=1;i<=ch.indexmax();i++)
            ofs3 << ch(i)/norm(ch(i)) << endl;
          ofs3 << "parameterization of choleski decomnp of correlation" << endl;
          for (i=1;i<=ch.indexmax();i++)
          {
            dvector tmp=ch(i)/norm(ch(i));
            ofs3 << tmp(1,i)/tmp(i) << endl;
          }
        }
      }
    }
  }
  if (spminflag==0)
  {
    if (on1<0)
    {
      for (int i = 1;i <= nvar; i++)
      {
        if (hess(i,i) <= 0.0)
        {
          hess_errorreport();
          ad_exit(1);
        }
      }
    }
    {
      adstring tmpstring="admodel.cov";
      if (ad_comm::wd_flag)
        tmpstring = ad_comm::adprogram_name + ".cov";
      uostream ofs((char*)tmpstring);
      ofs << nvar << hess;
      ofs << gradient_structure::Hybrid_bounded_flag;
      ofs << sscale;
    }
  }
}
Exemple #6
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
    int i, j, mype, npes, nvtxs, nedges, ncon;
    idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
    idxtype *part, *lwhere, *home;
    GraphType *agraph, cgraph;
    CtrlType myctrl;
    int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype;
    int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut;
    int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls;
    float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
    float rating, max_rating, your_cost = -1.0, your_balance = -1.0;
    float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2];
    MPI_Status status;
    MPI_Comm ipcomm, srcomm;
    struct {
        float cost;
        int rank;
    } lpecost, gpecost;

    IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

    vtxdist = graph->vtxdist;
    agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace);
    nvtxs = cgraph.nvtxs = agraph->nvtxs;
    nedges = cgraph.nedges = agraph->nedges;
    ncon = cgraph.ncon = agraph->ncon;

    xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj");
    vwgt = cgraph.vwgt = xadj + nvtxs+1;
    vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1;
    cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1;
    lwhere = xadj + nvtxs*(3+ncon)+1;
    home = xadj + nvtxs*(4+ncon)+1;
    adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1;
    adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges;

    /* ADD: this assumes that tpwgts for all constraints is the same */
    /* ADD: this is necessary because serial metis does not support the general case */
    mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts");
    for (i=0; i<ctrl->nparts; i++)
        for (j=0; j<ncon; j++)
            mytpwgts[i] += ctrl->tpwgts[i*ncon+j];
    for (i=0; i<ctrl->nparts; i++)
        mytpwgts[i] /= (float)ncon;

    idxcopy(nvtxs+1, agraph->xadj, xadj);
    idxcopy(nvtxs*ncon, agraph->vwgt, vwgt);
    idxcopy(nvtxs, agraph->vsize, vsize);
    idxcopy(nedges, agraph->adjncy, adjncy);
    idxcopy(nedges, agraph->adjwgt, adjwgt);

    /****************************************/
    /****************************************/
    if (ctrl->ps_relation == DISCOUPLED) {
        rcounts = imalloc(ctrl->npes, "rcounts");
        rdispls = imalloc(ctrl->npes+1, "rdispls");

        for (i=0; i<ctrl->npes; i++) {
            rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
        }
        MAKECSR(i, ctrl->npes, rdispls);

        MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE,
                       (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm);

        for (i=0; i<agraph->nvtxs; i++)
            home[i] = part[i];

        GKfree((void **)&rcounts, (void **)&rdispls, LTERM);
    }
    else {
        for (i=0; i<ctrl->npes; i++)
            for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
                part[j] = home[j] = i;
    }

    /* Ensure that the initial partitioning is legal */
    for (i=0; i<agraph->nvtxs; i++) {
        if (part[i] >= ctrl->nparts)
            part[i] = home[i] = part[i] % ctrl->nparts;
        if (part[i] < 0)
            part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
    }
    /****************************************/
    /****************************************/

    IFSET(ctrl->dbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph)));
    for (i=0; i<agraph->ncon; i++)
        IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i]));
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

    /****************************************/
    /* Split the processors into two groups */
    /****************************************/
    sr = (ctrl->mype % 2 == 0) ? 1 : 0;
    gd = (ctrl->mype % 2 == 1) ? 1 : 0;

    if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
        sr = 1;
        gd = 0;
    }

    sr_pe = 0;
    gd_pe = 1;

    MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
    MPI_Comm_rank(ipcomm, &mype);
    MPI_Comm_size(ipcomm, &npes);

    myctrl.dbglvl = 0;
    myctrl.mype = mype;
    myctrl.npes = npes;
    myctrl.comm = ipcomm;
    myctrl.sync = ctrl->sync;
    myctrl.seed = ctrl->seed;
    myctrl.nparts = ctrl->nparts;
    myctrl.ipc_factor = ctrl->ipc_factor;
    myctrl.redist_factor = ctrl->redist_base;
    myctrl.partType = ADAPTIVE_PARTITION;
    myctrl.ps_relation = DISCOUPLED;
    myctrl.tpwgts = ctrl->tpwgts;
    icopy(ncon, ctrl->tvwgts, myctrl.tvwgts);
    icopy(ncon, ctrl->ubvec, myctrl.ubvec);

    if (sr == 1) {
        /*******************************************/
        /* Half of the processors do scratch-remap */
        /*******************************************/
        ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1);
        MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
        MPI_Comm_rank(srcomm, &srmype);
        MPI_Comm_size(srcomm, &srnpes);

        moptions[0] = 0;
        moptions[7] = ctrl->sync + (mype % ngroups) + 1;

        idxset(nvtxs, 0, lwhere);
        lnparts = ctrl->nparts;
        fpart = fpe = 0;
        lnpes = srnpes;
        while (lnpes > 1 && lnparts > 1) {
            ASSERT(ctrl, agraph->nvtxs > 1);
            /* Determine the weights of the partitions */
            mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart);
            mytpwgts2[1] = 1.0-mytpwgts2[0];


            if (agraph->ncon == 1) {
                METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt,
                                      agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut,
                                      part);
            }
            else {
                METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy,
                                            agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2,
                                            moptions, &edgecut, part);
            }

            wsum = ssum(lnparts/2, mytpwgts+fpart);
            sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart);
            sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2);

            /* I'm picking the left branch */
            if (srmype < fpe+lnpes/2) {
                Mc_KeepPart(agraph, wspace, part, 0);
                lnpes = lnpes/2;
                lnparts = lnparts/2;
            }
            else {
                Mc_KeepPart(agraph, wspace, part, 1);
                fpart = fpart + lnparts/2;
                fpe = fpe + lnpes/2;
                lnpes = lnpes - lnpes/2;
                lnparts = lnparts - lnparts/2;
            }
        }

        /* In case srnpes is greater than or equal to nparts */
        if (lnparts == 1) {
            /* Only the first process will assign labels (for the reduction to work) */
            if (srmype == fpe) {
                for (i=0; i<agraph->nvtxs; i++)
                    lwhere[agraph->label[i]] = fpart;
            }
        }
        /* In case srnpes is smaller than nparts */
        else {
            if (ncon == 1)
                METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt,
                                      agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions,
                                      &edgecut, part);
            else
                METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy,
                                            agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart,
                                            moptions, &edgecut, part);

            for (i=0; i<agraph->nvtxs; i++)
                lwhere[agraph->label[i]] = fpart + part[i];
        }

        MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm);

        edgecut = ComputeSerialEdgeCut(&cgraph);
        Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
        lbsum = ssum(ncon, lbvec);
        MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm);
        MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm);
        lpecost.rank = ctrl->mype;
        lpecost.cost = lbsum;
        if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
            if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                lpecost.cost = (float)edgecut;
            else
                lpecost.cost = (float)max_cut + lbsum;
        }
        MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm);

        if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) {
            MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm);
        }

        if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) {
            MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status);
        }

        if (ctrl->mype == sr_pe) {
            idxcopy(nvtxs, part, lwhere);
            SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
        }

        MPI_Comm_free(&srcomm);
    }
    /**************************************/
    /* The other half do global diffusion */
    /**************************************/
    else {
        /******************************************************************/
        /* The next stmt is required to balance out the sr MPI_Comm_split */
        /******************************************************************/
        MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm);

        if (ncon == 1) {
            rating = WavefrontDiffusion(&myctrl, agraph, home);
            Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
            lbsum = ssum(ncon, lbvec);

            /* Determine which PE computed the best partitioning */
            MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm);
            MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm);

            lpecost.rank = ctrl->mype;
            lpecost.cost = lbsum;
            if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
                if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                    lpecost.cost = rating;
                else
                    lpecost.cost = max_rating + lbsum;
            }

            MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm);

            /* Now send this to the coordinating processor */
            if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe)
                MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm);

            if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe)
                MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status);

            if (ctrl->mype == gd_pe) {
                idxcopy(nvtxs, part, lwhere);
                SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
            }
        }
        else {
            Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES);
        }
    }

    if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) {
        if (ctrl->mype == sr_pe  || ctrl->mype == gd_pe) {
            /********************************************************************/
            /* The coordinators from each group decide on the best partitioning */
            /********************************************************************/
            my_cut = (float) ComputeSerialEdgeCut(&cgraph);
            my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home);
            Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
            my_balance = ssum(cgraph.ncon, lbvec);
            my_balance /= (float) cgraph.ncon;
            my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv;

            IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n",
                    (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance));

            if (ctrl->mype == gd_pe) {
                buffer[0] = my_cost;
                buffer[1] = my_balance;
                MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm);
            }
            else {
                MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status);
                your_cost = buffer[0];
                your_balance = buffer[1];
            }
        }

        if (ctrl->mype == sr_pe) {
            who_wins = gd_pe;
            if ((my_balance < 1.1 && your_balance > 1.1) ||
                    (my_balance < 1.1 && your_balance < 1.1 && my_cost < your_cost) ||
                    (my_balance > 1.1 && your_balance > 1.1 && my_balance < your_balance)) {
                who_wins = sr_pe;
            }
        }

        MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm);
    }
    else {
        who_wins = sr_pe;
    }

    MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm);
    idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where);

    MPI_Comm_free(&ipcomm);
    GKfree((void **)&xadj, (void **)&mytpwgts, LTERM);

    /* For whatever reason, FreeGraph crashes here...so explicitly free the memory.
      FreeGraph(agraph);
    */
    GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, (void **)&agraph->nvwgt, LTERM);
    GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM);
    GKfree((void **)&agraph, LTERM);

    IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

}
int main()
{
  printf("Mersenne Twister MT19937 non-rigorous benchmarking\n");
  printf("\n");

  mt::seed(5769);

  // Find out how many numbers we expect to generate per second
  printf("Priming system performance... ");
  fflush(stdout);

  double speed = estimate_calls_per_second();
  printf("ca. %s / second\n\n", sscale(speed, 2));

  // Multiply up an amount and benchmark again in batches
  uint64_t part = 40;
  uint64_t count = part*speed;

  printf("Will generate %" PRIu64 " batches of numbers\n", part);
  printf("Using getrusage(), i.e., not wall-clock time\n");
  printf("\n");

  std::vector<double> persec;
  uint64_t total = 0;
  Timer timer;

  // smaller batches
  for ( uint64_t n=0; n<part-30; ++n ) {
    persec.push_back(numbers_per_second(count/(2*part)));
    total += count/(2*part);
  }

  // normal batches
  for ( uint64_t n=0; n<part-30; ++n ) {
    persec.push_back(numbers_per_second(count/part));
    total += count/part;
  }

  // bigger batches
  for ( uint64_t n=0; n<10; ++n ) {
    persec.push_back(numbers_per_second(2*count/part));
    total += 2*count/part;
  }

  printf("\n");
  printf("RESULTS\n");
  printf("\n");
  printf("  Total numbers generated: %s\n", sscale(total, 2));
  printf("  Total speed: %s numbers/second\n", sscale(total/timer.elapsed_secs(), 4));
  printf("\n");
  printf("  Worst performance: %s numbers/second\n", sscale(min(persec), 4));
  printf("  Best performance:  %s numbers/second\n", sscale(max(persec), 4));
  printf("\n");
  printf("  Mean performance:  %s numbers/second\n", sscale(mean(persec), 4));
  printf("  Standard deviation: %s\n\n", sscale(stddev(persec), 4));

  printf("If we assume a normal distribution, you can plot the above with R:\n"
         "\n"
         "  mean=%f;\n"
         "  sd=%f;\n"
         "  x=seq(mean-4*sd, mean+4*sd, length=200);\n"
         "  y=dnorm(x, mean=mean, sd=sd);\n"
         "  plot(x, y, type=\"l\", xlab=\"Numbers / second\", ylab=\"\");\n"
         "  title(\"Mersenne Twister performance\");\n"
         "\n", mean(persec), stddev(persec));

  printf("Note that while the mean is quite consistent between runs, standard\n"
         "deviation may not.  Be sure to compile at maximum optimization levels,\n"
         "using your native instruction set.\n\n");

  printf("Update: What you really want to look at is the *best* performance.\n"
         "It shows what is possible given the least amount of interruption,\n"
         "and should therefore be closer to the true performance of the code.\n"
         "\n"
         "The standard deviation may tell you how well the code manages to\n"
         "avoid preemption.\n\n");

  return 0;
}
Exemple #8
0
void function_minimizer::pvm_master_mcmc_routine(int nmcmc,int iseed0,double dscale,
  int restart_flag)
{
  uostream * pofs_psave=NULL;
  dmatrix mcmc_display_matrix;
  int mcmc_save_index=1;
  int mcmc_wrap_flag=0;
  int mcmc_gui_length=10000;
  int no_sd_mcmc=0;

  int on2=-1;
  if ( (on2=option_match(ad_comm::argc,ad_comm::argv,"-nosdmcmc"))>-1)
    no_sd_mcmc=1;

  if (stddev_params::num_stddev_params==0)
  {
    cerr << " You must declare at least one object of type sdreport "
         << endl << " to do the mcmc calculations" << endl;
     return;
  }
  {
    //ofstream of_bf("testbf");

    //if (adjm_ptr) set_labels_for_mcmc();

    ivector number_offsets;
    dvector lkvector;
    //double current_bf=0;
    double lcurrent_bf=0;
    double size_scale=1.0;
    double total_spread=200;
    //double total_spread=2500;
    uostream * pofs_sd = NULL;
    int nvar=initial_params::nvarcalc(); // get the number of active parameters
    int scov_option=0;
    dmatrix s_covar;
    dvector s_mean;
    int on=-1;
    int ncsim=25000;
    int nslots=800;
    //int nslots=3600;
    int initial_nsim=4800;
    int ntmp=0;
    int ncor=0;
    double bfsum=0;
    int ibfcount=0;
    double llbest;
    double lbmax;

    //if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcscov",ntmp))>-1)
    //{
    scov_option=1;
    s_covar.allocate(1,nvar,1,nvar);
    s_mean.allocate(1,nvar);
    s_mean.initialize();
    s_covar.initialize();

    int ndvar=stddev_params::num_stddev_calc();
    int numdvar=stddev_params::num_stddev_number_calc();
    /*
    if (adjm_ptr)
    {
      mcmc_display_matrix.allocate(1,numdvar,1,mcmc_gui_length);
      number_offsets.allocate(1,numdvar);
      number_offsets=stddev_params::copy_all_number_offsets();
    }
    */
    dvector x(1,nvar);
    dvector scale(1,nvar);
    dmatrix values;
    int have_hist_flag=0;
    initial_params::xinit(x);
    dvector pen_vector(1,nvar);
    {
      initial_params::reset(dvar_vector(x),pen_vector);
      cout << pen_vector << endl << endl;
    }

    initial_params::mc_phase=0;
    initial_params::stddev_scale(scale,x);
    initial_params::mc_phase=1;
    dvector bmn(1,nvar);
    dvector mean_mcmc_values(1,ndvar);
    dvector s(1,ndvar);
    dvector h(1,ndvar);
    //dvector h;
    dvector square_mcmc_values(1,ndvar);
    square_mcmc_values.initialize();
    mean_mcmc_values.initialize();
    bmn.initialize();
    int use_empirical_flag=0;
    int diag_option=0;
    int topt=0;
    if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcdiag"))>-1)
    {
      diag_option=1;
      cout << " Setting covariance matrix to diagonal with entries " << dscale
           << endl;
    }
    dmatrix S(1,nvar,1,nvar);
    dvector sscale(1,nvar);
    if (!diag_option)
    {
      int on,nopt;
      int rescale_bounded_flag=0;
      double rescale_bounded_power=0.5;
      if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcrb",nopt))>-1)
      {
        if (nopt)
        {
          int iii=atoi(ad_comm::argv[on+1]);
          if (iii < 1 || iii > 9)
          {
            cerr << " -mcrb argument must be integer between 1 and 9 --"
                    " using default of 5" << endl;
            rescale_bounded_power=0.5;
          }
          else
            rescale_bounded_power=iii/10.0;
        }
        else
        {
          rescale_bounded_power=0.5;
        }
        rescale_bounded_flag=1;
      }
      if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcec"))>-1)
      {
        use_empirical_flag=1;
      }
      if (use_empirical_flag)
      {
        read_empirical_covariance_matrix(nvar,S,ad_comm::adprogram_name);
      }
      else if (!rescale_bounded_flag)
      {
        int tmp;
        read_covariance_matrix(S,nvar,tmp,sscale);
      }
      else
      {
        read_hessian_matrix_and_scale1(nvar,S,rescale_bounded_power,
          mcmc2_flag);
        //read_hessian_matrix_and_scale(nvar,S,pen_vector);
      }

      {  // scale covariance matrix for model space
        dmatrix tmp(1,nvar,1,nvar);
        for (int i=1;i<=nvar;i++)
        {
          tmp(i,i)=S(i,i)*(scale(i)*scale(i));
          for (int j=1;j<i;j++)
          {
            tmp(i,j)=S(i,j)*(scale(i)*scale(j));
            tmp(j,i)=tmp(i,j);
          }
        }
        S=tmp;
      }
    }
    else
    {
      S.initialize();
      for (int i=1;i<=nvar;i++)
      {
        S(i,i)=dscale;
      }
    }

    cout << sort(eigenvalues(S)) << endl;
    dmatrix chd = choleski_decomp( (dscale*2.4/sqrt(double(nvar))) * S);
    dmatrix chdinv=inv(chd);
    int sgn;

    dmatrix symbds(1,2,1,nvar);
    initial_params::set_all_simulation_bounds(symbds);
    ofstream ofs_sd1((char*)(ad_comm::adprogram_name + adstring(".mc2")));

    {
      long int iseed=0;
      int number_sims;
      if (nmcmc<=0)
      {
        number_sims=  100000;
      }
      else
      {
        number_sims=  nmcmc;
      }
      //cin >> iseed;
      if (iseed0<=0)
      {
        iseed=-36519;
      }
      else
      {
        iseed=-iseed0;
      }
      if (iseed>0)
      {
        iseed=-iseed;
      }
      cout << "Initial seed value " << iseed << endl;
      random_number_generator rng(iseed);
      rng.better_rand();
      //better_rand(iseed);
      double lprob=0.0;
      double lpinv=0.0;
      double lprob3=0.0;
      // get lower and upper bounds

      independent_variables y(1,nvar);
      independent_variables parsave(1,nvar);

      // read in the mcmc values to date
      int ii=1;
      dmatrix hist;
      if (restart_flag)
      {
        int tmp=0;
        if (!no_sd_mcmc) {
          hist.allocate(1,ndvar,-nslots,nslots);
          tmp=read_hist_data(hist,h,mean_mcmc_values,s,parsave,iseed,
            size_scale);
          values.allocate(1,ndvar,-nslots,nslots);
          for (int i=1;i<=ndvar;i++)
          {
            values(i).fill_seqadd(mean_mcmc_values(i)-0.5*total_spread*s(i)
              +.5*h(i),h(i));
          }
        }
        if (iseed>0)
        {
          iseed=-iseed;
        }
        double br=rng.better_rand();
        if (tmp) have_hist_flag=1;
        chd=size_scale*chd;
        chdinv=chdinv/size_scale;
      }
      else
      {
        int on=-1;
        int nopt=0;
        if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcpin",nopt))>-1)
        {
          if (nopt)
          {
            cifstream cif((char *)ad_comm::argv[on+1]);
            if (!cif)
            {
              cerr << "Error trying to open mcmc par input file "
                   << ad_comm::argv[on+1] << endl;
              exit(1);
            }
            cif >> parsave;
            if (!cif)
            {
              cerr << "Error reading from mcmc par input file "
                   << ad_comm::argv[on+1] << endl;
              exit(1);
            }
          }
          else
          {
            cerr << "Illegal option with -mcpin" << endl;
          }
        }
        else
        {
          ii=1;
          initial_params::copy_all_values(parsave,ii);
        }
      }

      ii=1;
      initial_params::restore_all_values(parsave,ii);

      gradient_structure::set_NO_DERIVATIVES();
      ofstream ogs("sims");
      ogs << nvar << " " << number_sims << endl;
      initial_params::xinit(y);

      send_int_to_slaves(1);
      double llc=-pvm_master_get_monte_carlo_value(nvar,y);
      send_int_to_slaves(1);
      llbest=-pvm_master_get_monte_carlo_value(nvar,y);



      lbmax=llbest;
      // store current mcmc variable values in param_values
      //dmatrix store_mcmc_values(1,number_sims,1,ndvar);
#if defined(USE_BAYES_FACTORS)
      lkvector.allocate(1,number_sims);
#endif
      dvector mcmc_values(1,ndvar);
      dvector mcmc_number_values;
      //if (adjm_ptr) mcmc_number_values.allocate(1,numdvar);
      int offs=1;
      stddev_params::copy_all_values(mcmc_values,offs);

      /*
      if (adjm_ptr)
      {
        offs=1;
        stddev_params::copy_all_number_values(mcmc_number_values,offs);
      }
      */
      int change_ball=2500;
      int nopt;
      if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcscale",nopt))>-1)
      {
        if (nopt)
        {
          int iii=atoi(ad_comm::argv[on+1]);
          if (iii <=0)
          {
            cerr << " Invalid option following command line option -mcball -- "
              << endl << " ignored" << endl;
          }
          else
            change_ball=iii;
        }
      }
      int iac=0;
      int liac=0;
      int isim=0;
      int itmp=0;
      double logr;
      int u_option=0;
      double ll;
      int s_option=1;
      int psvflag=0;
      if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcu"))>-1)
      {
        u_option=1;
      }
      if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcnoscale"))>-1)
      {
        s_option=0;
      }
      //cout << llc << " " << llc << endl;
      int iac_old=0;
      int i_old=0;

      {
       if (!restart_flag)
       {
         pofs_sd =
           new uostream((char*)(ad_comm::adprogram_name + adstring(".mcm")));
       }

      int mcsave_flag=0;
      int mcrestart_flag=option_match(ad_comm::argc,ad_comm::argv,"-mcr");

      if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcsave"))>-1)
      {
        int jj=(int)atof(ad_comm::argv[on+1]);
        if (jj <=0)
        {
          cerr << " Invalid option following command line option -mcsave -- "
            << endl;
        }
        else
        {
          mcsave_flag=jj;
          if ( mcrestart_flag>-1)
          {
            // check that nvar is correct
            {
              uistream uis((char*)(ad_comm::adprogram_name + adstring(".psv")));
              if (!uis)
              {
                cerr << "Error trying to open file" <<
                  ad_comm::adprogram_name + adstring(".psv") <<
                  " for mcrestart" <<   endl;
                cerr << " I am starting a new file " << endl;
                psvflag=1;
              }
              else
              {
                int nv1;
                uis >> nv1;
                if (nv1 !=nvar)
                {
                  cerr << "wrong number of independent variables in" <<
                    ad_comm::adprogram_name + adstring(".psv") <<
                  cerr << " I am starting a new file " << endl;
                  psvflag=1;
                }
              }
            }

            if (!psvflag) {
              pofs_psave=
                new uostream(
                  (char*)(ad_comm::adprogram_name + adstring(".psv")),ios::app);
            } else {
              pofs_psave=
                new uostream((char*)(ad_comm::adprogram_name + adstring(".psv")));
            }

          } else {