示例#1
0
文件: conpar.c 项目: F-A/pydstool
int 
conpar_mpi_wrapper(integer *nov, integer *na, integer *nra, 
		   integer *nca, doublereal ***a, integer *ncb, 
		   doublereal ***b, integer *nbc, integer *nrc, 
		   doublereal ***c, doublereal *d, integer *irf, integer *icf)

{
    integer loop_start,loop_end;
    integer loop_start_tmp,loop_end_tmp;
    int i,comm_size;
    int *a_counts,*a_displacements;
    int *b_counts,*b_displacements;
    int *c_counts,*c_displacements;
    int *irf_counts,*irf_displacements;
    int *icf_counts,*icf_displacements;


    MPI_Comm_size(MPI_COMM_WORLD,&comm_size);
    a_counts=(int *)MALLOC(sizeof(int)*comm_size);
    a_displacements=(int *)MALLOC(sizeof(int)*comm_size);
    b_counts=(int *)MALLOC(sizeof(int)*comm_size);
    b_displacements=(int *)MALLOC(sizeof(int)*comm_size);
    c_counts=(int *)MALLOC(sizeof(int)*comm_size);
    c_displacements=(int *)MALLOC(sizeof(int)*comm_size);
    irf_counts=(int *)MALLOC(sizeof(int)*comm_size);
    irf_displacements=(int *)MALLOC(sizeof(int)*comm_size);
    icf_counts=(int *)MALLOC(sizeof(int)*comm_size);
    icf_displacements=(int *)MALLOC(sizeof(int)*comm_size);
    a_counts[0] = 0;
    a_displacements[0] = 0;
    b_counts[0] = 0;
    b_displacements[0] = 0;
    c_counts[0] = 0;
    c_displacements[0] = 0;
    irf_counts[0] = 0;
    irf_displacements[0] = 0;
    icf_counts[0] = 0;
    icf_displacements[0] = 0;

    for(i=1;i<comm_size;i++){
      
      /*Send message to get worker into conpar mode*/
      {
	int message=AUTO_MPI_CONPAR_MESSAGE;
	MPI_Send(&message,1,MPI_INT,i,0,MPI_COMM_WORLD);
      }
      loop_start = ((i-1)*(*na))/(comm_size - 1);
      loop_end = ((i)*(*na))/(comm_size - 1);
      a_counts[i] = (*nca)*(*nra)*(loop_end-loop_start);
      a_displacements[i] = (*nca)*(*nra)*loop_start;
      b_counts[i] = (*ncb)*(*nra)*(loop_end-loop_start);
      b_displacements[i] = (*ncb)*(*nra)*loop_start;
      c_counts[i] = (*nca)*(*nrc)*(loop_end-loop_start);
      c_displacements[i] = (*nca)*(*nrc)*loop_start;
      irf_counts[i] = (*nra)*(loop_end-loop_start);
      irf_displacements[i] = (*nra)*loop_start;
      icf_counts[i] = (*nca)*(loop_end-loop_start);
      icf_displacements[i] = (*nca)*loop_start;
      loop_start_tmp = 0;
      loop_end_tmp = loop_end-loop_start;
      MPI_Send(&loop_start_tmp ,1,MPI_LONG,i,0,MPI_COMM_WORLD);
      MPI_Send(&loop_end_tmp   ,1,MPI_LONG,i,0,MPI_COMM_WORLD);
    }
    {
      integer params[6];
      params[0]=*nov;
      params[1]=*nra;
      params[2]=*nca;
      params[3]=*ncb;
      params[4]=*nbc;
      params[5]=*nrc;

      
      MPI_Bcast(params        ,6,MPI_LONG,0,MPI_COMM_WORLD);
    }
    MPI_Scatterv(irf,irf_counts,irf_displacements,MPI_LONG,
		 NULL,0,MPI_LONG,
		 0,MPI_COMM_WORLD);
    MPI_Scatterv(icf,icf_counts,icf_displacements,MPI_LONG,
		 NULL,0,MPI_LONG,
		 0,MPI_COMM_WORLD);

    /* Worker is running now */

    {
      /*I create a temporary send buffer for the MPI_Reduce
	command.  This is because there isn't an
	asymmetric version (like MPI_Scatterv).*/
      double **dtemp;
      dtemp = DMATRIX(*nrc,*ncb);
      for(i=0;i<(*nrc);i++)
        for(j=0;i<(*ncb);i++)
          dtemp[i][j]=d[i][j];
      MPI_Reduce(dtemp[0],d[0],(*ncb)*(*nrc),MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
      FREE_DMATRIX(dtemp);
    }
    MPI_Gatherv(NULL,0,MPI_DOUBLE,
		a[0][0],a_counts,a_displacements,MPI_DOUBLE,
		0,MPI_COMM_WORLD);
    MPI_Gatherv(NULL,0,MPI_DOUBLE,
		b[0][0],b_counts,b_displacements,MPI_DOUBLE,
		0,MPI_COMM_WORLD);
    MPI_Gatherv(NULL,0,MPI_DOUBLE,
		c[0][0],c_counts,c_displacements,MPI_DOUBLE,
		0,MPI_COMM_WORLD);
    MPI_Gatherv(NULL,0,MPI_LONG,
		irf,irf_counts,irf_displacements,MPI_LONG,
		0,MPI_COMM_WORLD);
    MPI_Gatherv(NULL,0,MPI_LONG,
		icf,icf_counts,icf_displacements,MPI_LONG,
		0,MPI_COMM_WORLD);
    return 0;
}
/* Uses Data info to initialize certain aspects of special point, so make sure Data is initialized and
populated with proper parameter values before calling this function. */
int CreateSpecialPoint(AutoData *Data, integer itp, integer lab, doublereal *u, 
                       integer npar, integer *ipar, doublereal *par, integer *icp,
                       doublereal *ups, doublereal *udotps, doublereal *rldot) {
    integer i, j, k, nsp;
    
    nsp = Data->num_sp;
    if (nsp == Data->sp_len) {
        Data->sp_len += 1;
        Data->sp = (AutoSPData *)REALLOC(Data->sp, Data->sp_len*sizeof(AutoSPData));
        Data->sp[nsp].u = (doublereal *)MALLOC((Data->iap.ndim+1)*sizeof(doublereal));
        Data->sp[nsp].ups = NULL;
        Data->sp[nsp].udotps = NULL;
        Data->sp[nsp].rldot = NULL;
        Data->sp[nsp].icp = NULL;
        Data->sp[nsp].a1 = NULL;
        Data->sp[nsp].a2 = NULL;
    }
    
    Data->sp[nsp].ibr = 1;
    Data->sp[nsp].mtot = 1;
    Data->sp[nsp].itp = itp;
    Data->sp[nsp].lab = lab;
    Data->sp[nsp].nfpr = Data->iap.nicp;
    Data->sp[nsp].isw = Data->iap.isw;
    Data->sp[nsp].ntpl = Data->iap.ncol*Data->iap.ntst+1;
    Data->sp[nsp].nar = Data->iap.ndim+1;
    Data->sp[nsp].nrowpr = 0;
    Data->sp[nsp].ntst = Data->iap.ntst;
    Data->sp[nsp].ncol = Data->iap.ncol;
    Data->sp[nsp].nparx = NPARX;
    
    Data->npar = npar-1;      // Number of parameters in model (subtract period T)
    
    // u
    for (i=0; i<Data->iap.ndim+1; i++)
        Data->sp[nsp].u[i] = u[i];
    
    // par
    for (i=0; i<NPARX; i++)
        Data->sp[nsp].par[i] = 0.0;
    for (i=0; i<npar; i++)
        Data->sp[nsp].par[ipar[i]] = par[i];
    
    // icp
    if (icp != NULL) {
        Data->sp[nsp].icp = (integer *)MALLOC(Data->iap.nicp*sizeof(integer));
        for (i=0; i<Data->iap.nicp; i++)
            Data->sp[nsp].icp[i] = icp[i];
    }
    
    // ups
    if (ups != NULL) {
        Data->sp[nsp].ups = DMATRIX(Data->iap.ncol*Data->iap.ntst+1, Data->iap.ndim+1);
        for (i=0; i<Data->iap.ncol*Data->iap.ntst+1; i++)
            for (j=0; j<Data->iap.ndim+1; j++)
                Data->sp[nsp].ups[i][j] = ups[i*(Data->iap.ndim+1)+j];
    }
    
    // udotps
    if (udotps != NULL) {
        Data->sp[nsp].udotps = DMATRIX(Data->iap.ncol*Data->iap.ntst+1, Data->iap.ndim);
        for (i=0; i<Data->iap.ncol*Data->iap.ntst+1; i++)
            for (j=0; j<Data->iap.ndim; j++)
                Data->sp[nsp].udotps[i][j] = udotps[i*(Data->iap.ndim)+j];
    }
    
    // rldot
    if (rldot != NULL) {
        Data->sp[nsp].rldot = (doublereal *)MALLOC((Data->iap.nicp)*sizeof(doublereal));
        for (i=0; i<Data->iap.nicp; i++)
            Data->sp[nsp].rldot[i] = rldot[i];
    }
    
    // a1 and a2
    if (Data->sflow) {
        Data->sp[nsp].a1 = DMATRIX_3D(Data->iap.ntst, Data->iap.ndim, Data->iap.ndim);
        Data->sp[nsp].a2 = DMATRIX_3D(Data->iap.ntst, Data->iap.ndim, Data->iap.ndim);
    }
    
    Data->num_sp += 1;
}
示例#3
0
文件: conpar.c 项目: F-A/pydstool
void *conpar_process(void * arg)
{
  integer icf_dim1, irf_dim1;
  
  /* Local variables */
  integer ipiv, jpiv, itmp;
  doublereal tpiv;
  integer i, j, l, k1, k2, m1, m2, ic, ir;
  doublereal rm;
  integer ir1, irp;
  doublereal piv;
  integer icp1;

  integer *nov, *nra, *nca;
  doublereal ***a;
  integer *ncb;
  doublereal ***b;
  integer *nbc, *nrc;
  doublereal ***c, **d;
  integer *irf, *icf;
  integer loop_start,loop_end;

#ifdef PTHREADS
  doublereal **dlocal;
#endif

#ifdef USAGE
  struct rusage *conpar_process_usage;
  usage_start(&conpar_process_usage);
#endif

  nov = ((conpar_parallel_arglist *)arg)->nov;
  nra = ((conpar_parallel_arglist *)arg)->nra;
  nca = ((conpar_parallel_arglist *)arg)->nca;
  a = ((conpar_parallel_arglist *)arg)->a;
  ncb = ((conpar_parallel_arglist *)arg)->ncb;
  b = ((conpar_parallel_arglist *)arg)->b;
  nbc = ((conpar_parallel_arglist *)arg)->nbc;
  nrc = ((conpar_parallel_arglist *)arg)->nrc;
  c = ((conpar_parallel_arglist *)arg)->c;
  d = ((conpar_parallel_arglist *)arg)->d;
  irf = ((conpar_parallel_arglist *)arg)->irf;
  icf = ((conpar_parallel_arglist *)arg)->icf;
  loop_start = ((conpar_parallel_arglist *)arg)->loop_start;
  loop_end = ((conpar_parallel_arglist *)arg)->loop_end;

#ifdef PTHREADS
  dlocal=DMATRIX(*nrc, *ncb);
#endif
  /* In the default case we don't need to do anything special */
  if(global_conpar_type == CONPAR_DEFAULT) {
    ;
  }
  /* In the message passing case we set d to be
     0.0, do a sum here, and then do the final
     sum (with the true copy of d) in the
     master */
  else if (global_conpar_type == CONPAR_MPI) {
    for(i=0;i<*nrc;i++)
      for (j=0; j<*ncb;j++)
          d[i][j]=0.0;
  }
  /* In the shared memory case we create a local
     variable for doing this threads part of the
     sum, then we do a final sum into shared memory
     at the end */
  else if (global_conpar_type == CONPAR_PTHREADS) {
#ifdef PTHREADS
    for(i=0;i<*nrc;i++)
      for (j=0; j<*ncb;j++)
          dlocal[i][j]=0.0;
#else
    ;
#endif
  }

  /* Note that the summation of the adjacent overlapped part of C */
  /* is delayed until REDUCE, in order to merge it with other communications.*/
  /* NA is the local NTST. */
  
  irf_dim1 = *nra;
  icf_dim1 = *nca;
  
  /* Condensation of parameters (Elimination of local variables). */
  m1 = *nov + 1;
  m2 = *nca - *nov;

  for (i = loop_start;i < loop_end; i++) {
    for (ic = m1; ic <= m2; ++ic) {
      ir1 = ic - *nov + 1;
      irp = ir1 - 1;
      icp1 = ic + 1;
      /*	     **Search for pivot (Complete pivoting) */
      piv = 0.0;
      ipiv = irp;
      jpiv = ic;
      for (k1 = irp; k1 <= *nra; ++k1) {
	int irf_k1_i = irf[-1 + k1 + i*irf_dim1];
	for (k2 = ic; k2 <= m2; ++k2) {
	  int icf_k2_i = icf[-1 + k2 + i*icf_dim1];
	  tpiv = a[i][-1 + irf_k1_i][-1 + icf_k2_i];
	  if (tpiv < 0.0) {
	    tpiv = -tpiv;
	  }
	  if (piv < tpiv) {
	    piv = tpiv;
	    ipiv = k1;
	    jpiv = k2;
	  }
	}
      }
      /*	     **Move indices */
      itmp = icf[-1 + ic + i*icf_dim1];
      icf[-1 + ic + i*icf_dim1] = icf[-1 + jpiv + i*icf_dim1];
      icf[-1 + jpiv + i*icf_dim1] = itmp;
      itmp = irf[-1 + irp + i*irf_dim1];
      irf[-1 + irp + i*irf_dim1] = irf[-1 + ipiv + i*irf_dim1];
      irf[-1 + ipiv + i*irf_dim1] = itmp;
      {
	int icf_ic_i = icf[-1 + ic + i*icf_dim1];
	int irf_irp_i = irf[-1 + irp + i*irf_dim1];
	doublereal *a_offset2 = a[i][-1 + irf_irp_i];
	doublereal *b_offset2 = b[i][-1 + irf_irp_i];
	/*	     **End of pivoting; elimination starts here */
	for (ir = ir1; ir <= *nra; ++ir) {
	  int irf_ir_i = irf[-1 + ir + i*irf_dim1];
	  doublereal *a_offset1 = a[i][-1 + irf_ir_i];
          doublereal *b_offset1 = b[i][-1 + irf_ir_i];
	  rm = a_offset1[-1 + icf_ic_i]/a_offset2[-1 + icf_ic_i];
	  a_offset1[-1 + icf_ic_i] = rm;
	  if (rm != (double)0.) {
	    for (l = 0; l < *nov; ++l) {
	      a_offset1[l] -= rm * a_offset2[l];
	    }
	    for (l = icp1 -1; l < *nca; ++l) {
	      int icf_l_i = icf[l + i*icf_dim1];
	      a_offset1[-1 + icf_l_i] -= rm * a_offset2[-1 + icf_l_i];
	    }
	    for (l = 0; l < *ncb; ++l) {
	      b_offset1[l] -= rm * b_offset2[l];
	    }
	  }
	}
	for (ir = *nbc + 1; ir <= *nrc; ++ir) {
          doublereal *c_offset1 = c[i][-1 + ir];
          doublereal *d_offset1 = d[-1 + ir];
	  rm = c_offset1[-1 + icf_ic_i]/a_offset2[-1 + icf_ic_i];
	  c_offset1[-1 + icf_ic_i]=rm;
	  if (rm != (double)0.) {
	    for (l = 0; l < *nov; ++l) {
	      c_offset1[l] -= rm * a_offset2[l];
	    }
	    for (l = icp1 -1 ; l < *nca; ++l) {
	      int icf_l_i = icf[l + i*icf_dim1];
	      c_offset1[-1 + icf_l_i] -= rm * a_offset2[-1 + icf_l_i];
	    }
	    for (l = 0; l < *ncb; ++l) {
	      /* 
		 A little explanation of what is going on here
		 is in order I believe.  This array is
		 created by a summation across all workers,
		 hence it needs a mutex to avoid concurrent
		 writes (in the shared memory case) or a summation
		 in the master (in the message passing case).
		 Since mutex's can be somewhat slow, we will do the
		 summation into a local variable, and then do a
		 final summation back into global memory when the
		 main loop is done.
	      */
	      /* Nothing special for the default case */
	      if(global_conpar_type == CONPAR_DEFAULT) {
		d_offset1[l] -= rm * b_offset2[l];
	      }
	      /* In the message passing case we sum into d,
		 which is a local variable initialized to 0.0.
		 We then sum our part with the masters part
		 in the master. */
	      else if (global_conpar_type == CONPAR_MPI) {
		d_offset1[l] -= rm * b_offset2[l];
	      }
	      /* In the shared memory case we sum into a local
		 variable our contribution, and then sum
		 into shared memory at the end (inside a mutex */
	      else if (global_conpar_type == CONPAR_PTHREADS) {
#ifdef PTHREADS
		dlocal[-1 + ir][l] -= rm * b_offset2[l];

#else
		;
#endif
	      }
	    }
	  }
	}
      }
    }
  }
#ifdef PTHREADS
  /* This is were we sum into the global copy of the d
     array, in the shared memory case */
  if(global_conpar_type == CONPAR_PTHREADS) {
#ifdef PTHREADS
    pthread_mutex_lock(&mutex_for_d);
    for(i=0;i<*nrc;i++)
      for (j=0; j<*ncb;j++)
        d[i][j] += dlocal[i][j];
    pthread_mutex_unlock(&mutex_for_d);
    FREE_DMATRIX(dlocal);
#else
    ;
#endif
  }
#endif
#ifdef USAGE
  usage_end(conpar_process_usage,"in conpar worker");
#endif
  return NULL;
}
int AUTO(AutoData *Data)
{
  struct timeval  *time0,*time1;
  integer icp[NPARX2];
  doublereal par[NPARX2], thl[NPARX];
  iap_type *iap;
  rap_type *rap;
  doublereal *thu;
  integer *iuz;
  doublereal *vuz;
  function_list list;
  
  integer i, j, k;

  // Initialize structures and constants
  gData = Data;
  
  iap = &(Data->iap);
  rap = &(Data->rap);

  Data->sp_len = Data->num_sp + (1 + floor(iap->nmx/iap->npr));
  Data->sp_inc = 5;

#ifdef USAGE
  struct rusage *init_usage,*total_usage;
  usage_start(&init_usage);
  usage_start(&total_usage);
#endif

#ifdef FLOATING_POINT_TRAP
  trapfpe();
#endif

#ifdef PTHREADS
  global_conpar_type = CONPAR_PTHREADS;
  global_setubv_type = SETUBV_PTHREADS;
  global_reduce_type = REDUCE_PTHREADS;
#endif

  fp9 = fopen("fort.9","w");
  if(fp9 == NULL) {
    fprintf(stderr,"Error:  Could not open fort.9\n");
    exit(1);
  }
  

  /* Initialization : */

  iap->mynode = mynode();
  iap->numnodes = numnodes();
  if (iap->numnodes > 1) {
    iap->parallel_flag = 1;
  } else {
    iap->parallel_flag = 0;
  }


    /* NOTE:  thu is allocated inside this function, and the
       pointer is passed back.  I know this is ugly, but
       this function does a bit of work to get thu setup correctly,
       as well as figuring out the size the array should be.
       What really should happen is to have one function which
       reads fort.2 and another fuction which initializes the array.
       That way the allocation could happen between the two calls.
    */
    init0(iap, rap, par, icp, thl, &thu, &iuz, &vuz);      

    /* Find restart label and determine type of restart point. */
    if (iap->irs > 0) {
      logical found = FALSE_;

      findlb(iap, rap, iap->irs, &(iap->nfpr), &found);
      if (! found) {
	if (iap->mynode == 0) {
	  fprintf(stderr,"\nRestart label %4ld not found\n",iap->irs);
	}
	exit(0);
      }
    }
    set_function_pointers(*iap,&list);
    init1(iap, rap, icp, par);
    chdim(iap);

    /* Create the allocations for the global structures used in 
       autlib3.c and autlib5.c.  These are purely an efficiency thing.
       The allocation and deallocation of these scratch areas takes
       up a nontrivial amount of time if done directly in the
       wrapper functions in autlib3.c*/
    allocate_global_memory(*iap);

    /* ---------------------------------------------------------- */
    /* ---------------------------------------------------------- */
    /*  One-parameter continuations */
    /* ---------------------------------------------------------- */
    /* ---------------------------------------------------------- */

#ifdef USAGE
    usage_end(init_usage,"main initialization");
#endif
    
    if (Data->print_input)
        PrintInput(Data, par, icp);
    
    // Initialize output variables
    if(list.type==AUTOAE)
        Data->u = DMATRIX(iap->nmx, iap->ndim);
    else {
        // Solution measures
        Data->usm = (doublereal ***)MALLOC((2+(int)(log2(Data->nsm)))*sizeof(doublereal **));
        Data->usm[0] = DMATRIX(iap->nmx, iap->ndim);    // MAX
        Data->usm[1] = DMATRIX(iap->nmx, iap->ndim);    // MIN
        for (i=0; i<(int)(log2(Data->nsm)); i++)
            Data->usm[2+i] = DMATRIX(iap->nmx, iap->ndim);
        
        // Jacobian of flow
        if (Data->sjac) {
            Data->c0 = DMATRIX_3D(iap->nmx, iap->ndim, iap->ndim);
            Data->c1 = DMATRIX_3D(iap->nmx, iap->ndim, iap->ndim);
        }
        
        // Jacobian of flow along cycles (temporary storage)
        if (Data->sflow) {
            Data->a1 = DMATRIX_3D(iap->ntst, iap->ndim, iap->ndim);
            Data->a2 = DMATRIX_3D(iap->ntst, iap->ndim, iap->ndim);
        }
    }
    Data->par = DMATRIX(iap->nmx, iap->nicp);
    if (iap->isp >= 1) {
        Data->ev = DCMATRIX(iap->nmx, iap->ndim);
        for (i=0; i<iap->nmx; i++) {
            for (j=0; j<iap->ndim; j++) {
                Data->ev[i][j].r = NAN; // This is a flag for bad floquet multipliers
                Data->ev[i][j].i = NAN;
            }
        }
    }
    Data->num_u = 0;

    if (Data->sp == NULL)
        Data->num_sp = 0;
    
    Data->sp = (AutoSPData *)REALLOC(Data->sp, (Data->sp_len)*sizeof(AutoSPData));

    for (i=Data->num_sp; i<Data->sp_len; i++) {
        Data->sp[i].u = NULL;
        Data->sp[i].icp = NULL;
        Data->sp[i].ups = NULL;
        Data->sp[i].udotps = NULL;
        Data->sp[i].rldot = NULL;
        Data->sp[i].a1 = NULL;
        Data->sp[i].a2 = NULL;
    }

    if(list.type==AUTOAE)
      autoae(iap, rap, par, icp, list.aelist.funi, list.aelist.stpnt, list.aelist.pvli, thl, thu, iuz, vuz);
    if(list.type==AUTOBV)
      autobv(iap, rap, par, icp, list.bvlist.funi, list.bvlist.bcni, 
	     list.bvlist.icni, list.bvlist.stpnt, list.bvlist.pvli, thl, thu, iuz, vuz);
     
    // Testing output
    if (Data->print_output)
        PrintOutput(Data);
     
#ifdef USAGE
    usage_end(total_usage,"total");

#endif
    //time_end(time0,"Total Time ",fp9);
    fprintf(fp9,"----------------------------------------------");
    fprintf(fp9,"----------------------------------------------\n");
    //time_end(time1,"",stdout);


  //}
  FREE(thu);
  FREE(iuz);
  FREE(vuz);
  fclose(fp9);

  // Clean up special solution points that were allocated and not used
  Data->sp = (AutoSPData *)REALLOC(Data->sp, (Data->num_sp)*sizeof(AutoSPData));
  assert(Data->sp);
  Data->sp_len = Data->num_sp;

  return 1;
}