コード例 #1
0
ファイル: conpar.c プロジェクト: F-A/pydstool
void *conpar_process(void * arg)
{
  integer icf_dim1, irf_dim1;
  
  /* Local variables */
  integer ipiv, jpiv, itmp;
  doublereal tpiv;
  integer i, j, l, k1, k2, m1, m2, ic, ir;
  doublereal rm;
  integer ir1, irp;
  doublereal piv;
  integer icp1;

  integer *nov, *nra, *nca;
  doublereal ***a;
  integer *ncb;
  doublereal ***b;
  integer *nbc, *nrc;
  doublereal ***c, **d;
  integer *irf, *icf;
  integer loop_start,loop_end;

#ifdef PTHREADS
  doublereal **dlocal;
#endif

#ifdef USAGE
  struct rusage *conpar_process_usage;
  usage_start(&conpar_process_usage);
#endif

  nov = ((conpar_parallel_arglist *)arg)->nov;
  nra = ((conpar_parallel_arglist *)arg)->nra;
  nca = ((conpar_parallel_arglist *)arg)->nca;
  a = ((conpar_parallel_arglist *)arg)->a;
  ncb = ((conpar_parallel_arglist *)arg)->ncb;
  b = ((conpar_parallel_arglist *)arg)->b;
  nbc = ((conpar_parallel_arglist *)arg)->nbc;
  nrc = ((conpar_parallel_arglist *)arg)->nrc;
  c = ((conpar_parallel_arglist *)arg)->c;
  d = ((conpar_parallel_arglist *)arg)->d;
  irf = ((conpar_parallel_arglist *)arg)->irf;
  icf = ((conpar_parallel_arglist *)arg)->icf;
  loop_start = ((conpar_parallel_arglist *)arg)->loop_start;
  loop_end = ((conpar_parallel_arglist *)arg)->loop_end;

#ifdef PTHREADS
  dlocal=DMATRIX(*nrc, *ncb);
#endif
  /* In the default case we don't need to do anything special */
  if(global_conpar_type == CONPAR_DEFAULT) {
    ;
  }
  /* In the message passing case we set d to be
     0.0, do a sum here, and then do the final
     sum (with the true copy of d) in the
     master */
  else if (global_conpar_type == CONPAR_MPI) {
    for(i=0;i<*nrc;i++)
      for (j=0; j<*ncb;j++)
          d[i][j]=0.0;
  }
  /* In the shared memory case we create a local
     variable for doing this threads part of the
     sum, then we do a final sum into shared memory
     at the end */
  else if (global_conpar_type == CONPAR_PTHREADS) {
#ifdef PTHREADS
    for(i=0;i<*nrc;i++)
      for (j=0; j<*ncb;j++)
          dlocal[i][j]=0.0;
#else
    ;
#endif
  }

  /* Note that the summation of the adjacent overlapped part of C */
  /* is delayed until REDUCE, in order to merge it with other communications.*/
  /* NA is the local NTST. */
  
  irf_dim1 = *nra;
  icf_dim1 = *nca;
  
  /* Condensation of parameters (Elimination of local variables). */
  m1 = *nov + 1;
  m2 = *nca - *nov;

  for (i = loop_start;i < loop_end; i++) {
    for (ic = m1; ic <= m2; ++ic) {
      ir1 = ic - *nov + 1;
      irp = ir1 - 1;
      icp1 = ic + 1;
      /*	     **Search for pivot (Complete pivoting) */
      piv = 0.0;
      ipiv = irp;
      jpiv = ic;
      for (k1 = irp; k1 <= *nra; ++k1) {
	int irf_k1_i = irf[-1 + k1 + i*irf_dim1];
	for (k2 = ic; k2 <= m2; ++k2) {
	  int icf_k2_i = icf[-1 + k2 + i*icf_dim1];
	  tpiv = a[i][-1 + irf_k1_i][-1 + icf_k2_i];
	  if (tpiv < 0.0) {
	    tpiv = -tpiv;
	  }
	  if (piv < tpiv) {
	    piv = tpiv;
	    ipiv = k1;
	    jpiv = k2;
	  }
	}
      }
      /*	     **Move indices */
      itmp = icf[-1 + ic + i*icf_dim1];
      icf[-1 + ic + i*icf_dim1] = icf[-1 + jpiv + i*icf_dim1];
      icf[-1 + jpiv + i*icf_dim1] = itmp;
      itmp = irf[-1 + irp + i*irf_dim1];
      irf[-1 + irp + i*irf_dim1] = irf[-1 + ipiv + i*irf_dim1];
      irf[-1 + ipiv + i*irf_dim1] = itmp;
      {
	int icf_ic_i = icf[-1 + ic + i*icf_dim1];
	int irf_irp_i = irf[-1 + irp + i*irf_dim1];
	doublereal *a_offset2 = a[i][-1 + irf_irp_i];
	doublereal *b_offset2 = b[i][-1 + irf_irp_i];
	/*	     **End of pivoting; elimination starts here */
	for (ir = ir1; ir <= *nra; ++ir) {
	  int irf_ir_i = irf[-1 + ir + i*irf_dim1];
	  doublereal *a_offset1 = a[i][-1 + irf_ir_i];
          doublereal *b_offset1 = b[i][-1 + irf_ir_i];
	  rm = a_offset1[-1 + icf_ic_i]/a_offset2[-1 + icf_ic_i];
	  a_offset1[-1 + icf_ic_i] = rm;
	  if (rm != (double)0.) {
	    for (l = 0; l < *nov; ++l) {
	      a_offset1[l] -= rm * a_offset2[l];
	    }
	    for (l = icp1 -1; l < *nca; ++l) {
	      int icf_l_i = icf[l + i*icf_dim1];
	      a_offset1[-1 + icf_l_i] -= rm * a_offset2[-1 + icf_l_i];
	    }
	    for (l = 0; l < *ncb; ++l) {
	      b_offset1[l] -= rm * b_offset2[l];
	    }
	  }
	}
	for (ir = *nbc + 1; ir <= *nrc; ++ir) {
          doublereal *c_offset1 = c[i][-1 + ir];
          doublereal *d_offset1 = d[-1 + ir];
	  rm = c_offset1[-1 + icf_ic_i]/a_offset2[-1 + icf_ic_i];
	  c_offset1[-1 + icf_ic_i]=rm;
	  if (rm != (double)0.) {
	    for (l = 0; l < *nov; ++l) {
	      c_offset1[l] -= rm * a_offset2[l];
	    }
	    for (l = icp1 -1 ; l < *nca; ++l) {
	      int icf_l_i = icf[l + i*icf_dim1];
	      c_offset1[-1 + icf_l_i] -= rm * a_offset2[-1 + icf_l_i];
	    }
	    for (l = 0; l < *ncb; ++l) {
	      /* 
		 A little explanation of what is going on here
		 is in order I believe.  This array is
		 created by a summation across all workers,
		 hence it needs a mutex to avoid concurrent
		 writes (in the shared memory case) or a summation
		 in the master (in the message passing case).
		 Since mutex's can be somewhat slow, we will do the
		 summation into a local variable, and then do a
		 final summation back into global memory when the
		 main loop is done.
	      */
	      /* Nothing special for the default case */
	      if(global_conpar_type == CONPAR_DEFAULT) {
		d_offset1[l] -= rm * b_offset2[l];
	      }
	      /* In the message passing case we sum into d,
		 which is a local variable initialized to 0.0.
		 We then sum our part with the masters part
		 in the master. */
	      else if (global_conpar_type == CONPAR_MPI) {
		d_offset1[l] -= rm * b_offset2[l];
	      }
	      /* In the shared memory case we sum into a local
		 variable our contribution, and then sum
		 into shared memory at the end (inside a mutex */
	      else if (global_conpar_type == CONPAR_PTHREADS) {
#ifdef PTHREADS
		dlocal[-1 + ir][l] -= rm * b_offset2[l];

#else
		;
#endif
	      }
	    }
	  }
	}
      }
    }
  }
#ifdef PTHREADS
  /* This is were we sum into the global copy of the d
     array, in the shared memory case */
  if(global_conpar_type == CONPAR_PTHREADS) {
#ifdef PTHREADS
    pthread_mutex_lock(&mutex_for_d);
    for(i=0;i<*nrc;i++)
      for (j=0; j<*ncb;j++)
        d[i][j] += dlocal[i][j];
    pthread_mutex_unlock(&mutex_for_d);
    FREE_DMATRIX(dlocal);
#else
    ;
#endif
  }
#endif
#ifdef USAGE
  usage_end(conpar_process_usage,"in conpar worker");
#endif
  return NULL;
}
コード例 #2
0
ファイル: reduce.c プロジェクト: F-A/pydstool
int 
reduce(integer *iam, integer *kwt, logical *par, doublereal ***a1, doublereal ***a2, doublereal ***bb, doublereal ***cc, doublereal **dd, integer *na, integer *nov, integer *ncb, integer *nrc, doublereal ***s1, doublereal ***s2, doublereal ***ca1, integer *icf1, integer *icf2, integer *icf11, integer *ipr, integer *nbc)
{
  /* System generated locals */
    integer icf1_dim1, icf2_dim1, icf11_dim1, ipr_dim1;

  doublereal zero, tpiv;
  real xkwt;
  integer nbcp1, ibuf1, ipiv1, jpiv1, ipiv2, jpiv2, i, k, l;

  integer i1, i2, k1, k2, i3, iprow, k3, l2, l3, ic, ir;
  doublereal rm;
  doublereal tmp;
  integer icp1;
  integer itmp;
  doublereal piv1, piv2;
  doublereal *buf=NULL;

#ifdef USAGE
  struct rusage *init, *mainloop,*pivoting,*elimination;
  usage_start(&init);
#endif

  /* Parameter adjustments */
  ipr_dim1 = *nov;
  icf11_dim1 = *nov;
  icf2_dim1 = *nov;
  icf1_dim1 = *nov;
    
  zero = 0.;
  nbcp1 = *nbc + 1;
  xkwt = (real) (*kwt);


  /* Initialization */

  for (i = 0; i < *na; ++i) {
    for (k1 = 0; k1 < *nov; ++k1) {
      ARRAY2D(icf1, k1, i) = k1 + 1;
      ARRAY2D(icf2, k1, i) = k1 + 1;
      ARRAY2D(ipr, k1, i) = k1 + 1;
      for (k2 = 0; k2 < *nov; ++k2) {
	s2[i][k1][k2] = 0.;
	s1[i][k1][k2] = 0.;
      }
    }
  }

  for (ir = 0; ir < *nov; ++ir) {
    for (ic = 0; ic < *nov; ++ic) {
      s1[0][ir][ic] = a1[0][ir][ic];
    }
  }
#ifdef USAGE
  usage_end(init,"reduce initialization");
  usage_start(&mainloop);
#endif

  /* The reduction process is done concurrently */
  for (i1 = 0; i1 < *na - 1; ++i1) {
    i2 = i1 + 1;
    i3 = i2 + 1;

    for (ic = 0; ic < *nov; ++ic) {
      icp1 = ic + 1;

      /* Complete pivoting; rows are swapped physically, columns swap in
	 dices */
      piv1 = zero;
      ipiv1 = ic + 1;
      jpiv1 = ic + 1;
      for (k1 = ic; k1 < *nov; ++k1) {
	for (k2 = ic; k2 < *nov; ++k2) {
	  tpiv = a2[i1][k1][ARRAY2D(icf2, k2, i1) - 1];
	  if (tpiv < zero) {
	    tpiv = -tpiv;
	  }
	  if (piv1 < tpiv) {
	    piv1 = tpiv;
	    ipiv1 = k1 + 1;
	    jpiv1 = k2 + 1;
	  }
	}
      }

      piv2 = zero;
      ipiv2 = 1;
      jpiv2 = ic + 1;
      for (k1 = 0; k1 < *nov; ++k1) {
	for (k2 = ic; k2 < *nov; ++k2) {
          tpiv = a1[i2][k1][ARRAY2D(icf1, k2, i2) - 1];
	  if (tpiv < zero) {
	    tpiv = -tpiv;
	  }
	  if (piv2 < tpiv) {
	    piv2 = tpiv;
	    ipiv2 = k1 + 1;
	    jpiv2 = k2 + 1;
	  }
	}
      }
      if (piv1 >= piv2) {
	ARRAY2D(ipr, ic, i1) = ipiv1;
	itmp = ARRAY2D(icf2, ic, i1);
	ARRAY2D(icf2, ic, i1) = ARRAY2D(icf2, (jpiv1 - 1), i1);
	ARRAY2D(icf2, (jpiv1 - 1), i1) = itmp;
	itmp = ARRAY2D(icf1, ic, i2);
	ARRAY2D(icf1, ic, i2) = ARRAY2D(icf1, (jpiv1 - 1), i2);
	ARRAY2D(icf1, (jpiv1 - 1), i2) = itmp;
	/* Swapping */
	for (l = 0; l < *nov; ++l) {
	  tmp = s1[i1][ic][l];
	  s1[i1][ic][l] = s1[i1][ipiv1 - 1][l];
	  s1[i1][ipiv1 - 1][l] = tmp;
	  if (l >= ic) {
	    tmp = a2[i1][ic][ARRAY2D(icf2, l, i1) - 1];
	    a2[i1][ic][ARRAY2D(icf2, l, i1) - 1] = 
	      a2[i1][ipiv1 - 1][ARRAY2D(icf2, l, i1) - 1];
	    a2[i1][ipiv1 - 1][ARRAY2D(icf2, l, i1) - 1] = tmp;
	  }
	  tmp = s2[i1][ic][l];
	  s2[i1][ic][l] = s2[i1][ipiv1 - 1][l];
	  s2[i1][ipiv1 - 1][l] = tmp;
	}

	for (l = 0; l < *ncb; ++l) {
	  tmp = bb[i1][ic][l];
	  bb[i1][ic][l] = bb[i1][ipiv1 - 1][l];
	  bb[i1][ipiv1 - 1][l] = tmp;
	}
      } else {
	ARRAY2D(ipr, ic, i1) = *nov + ipiv2;
	itmp = ARRAY2D(icf2, ic, i1);
	ARRAY2D(icf2, ic, i1) = ARRAY2D(icf2, (jpiv2 - 1), i1);
	ARRAY2D(icf2, (jpiv2 - 1), i1) = itmp;
	itmp = ARRAY2D(icf1, ic, i2);
	ARRAY2D(icf1, ic, i2) = ARRAY2D(icf1, (jpiv2 - 1), i2);
	ARRAY2D(icf1, (jpiv2 - 1), i2) = itmp;
	/* Swapping */
	for (l = 0; l < *nov; ++l) {
	  if (l >= ic) {
	    tmp = a2[i1][ic][ARRAY2D(icf2, l, i1) - 1];
	    a2[i1][ic][ARRAY2D(icf2, l, i1) - 1] = 
                a1[i2][ipiv2 - 1][ARRAY2D(icf2, l, i1) - 1];
	    a1[i2][ipiv2 - 1][ARRAY2D(icf2, l, i1) - 1] = tmp;
	  }
	  tmp = s2[i1][ic][l];
	  s2[i1][ic][l] = a2[i2][ipiv2 - 1][l];
	  a2[i2][ipiv2 - 1][l] = tmp;
	  tmp = s1[i1][ic][l];
	  s1[i1][ic][l] = s1[i2][ipiv2 - 1][l];
	  s1[i2][ipiv2 - 1][l] = tmp;
	}
	for (l = 0; l < *ncb; ++l) {
	  tmp = bb[i1][ic][l];
	  bb[i1][ic][l] = bb[i2][ipiv2 - 1][l];
	  bb[i2][ipiv2 - 1][l] = tmp;
	}
      }
      /* End of pivoting; Elimination starts here */

      for (ir = icp1; ir < *nov; ++ir) {
	/*for (ir = *nov - 1; ir >= icp1; ir--) {*/
	rm = a2[i1][ir][ARRAY2D(icf2, ic, i1) - 1] / 
	  a2[i1][ic][ARRAY2D(icf2, ic, i1) - 1];
	a2[i1][ir][ARRAY2D(icf2, ic, i1) - 1] = rm;

	if (rm != (double)0.) {
	  for (l = icp1; l < *nov; ++l) {
	    a2[i1][ir][ARRAY2D(icf2, l, i1) - 1] -= 
	      rm * a2[i1][ic][ARRAY2D(icf2, l, i1) - 1];
	  }

	  for (l = 0; l < *nov; ++l) {
	    s1[i1][ir][l] -= rm * s1[i1][ic][l];
	    s2[i1][ir][l] -= rm * s2[i1][ic][l];
	  }

	  for (l = 0; l < *ncb; ++l) {
	    bb[i1][ir][l] -= rm * bb[i1][ic][l];
	  }
	}
      }

      for (ir = 0; ir < *nov; ++ir) {
	/*for (ir = *nov - 1; ir >= 0; ir--) {*/
	rm = a1[i2][ir][ARRAY2D(icf1, ic, i2) - 1] / 
	  a2[i1][ic][ARRAY2D(icf2, ic, i1) - 1];
	a1[i2][ir][ARRAY2D(icf1, ic, i2) - 1] = rm;

	if (rm != (double)0.) {
	  for (l = icp1; l < *nov; ++l) {
	    a1[i2][ir][ARRAY2D(icf1, l, i2) - 1] -= 
	      rm * a2[i1][ic][ARRAY2D(icf2, l, i1) - 1];
	  }
	  for (l = 0; l < *nov; ++l) {
	    s1[i2][ir][l] -= rm * s1[i1][ic][l];
	    a2[i2][ir][l] -= rm * s2[i1][ic][l];
	  }
	  for (l = 0; l < *ncb; ++l) {
            bb[i2][ir][l] -= rm * bb[i1][ic][l];
	  }
	}
      }

      for (ir = nbcp1 - 1; ir < *nrc; ++ir) {
	/*for (ir = *nrc - 1; ir >= nbcp1 - 1; ir--) {*/
	rm = cc[i2][ir][ARRAY2D(icf2, ic, i1) - 1] / 
	  a2[i1][ic][ARRAY2D(icf2, ic, i1) - 1];
	cc[i2][ir][ARRAY2D(icf2, ic, i1) - 1] = rm;

	if (rm != (double)0.) {
	  for (l = icp1; l < *nov; ++l) {
	    cc[i2][ir][ARRAY2D(icf2, l, i1) - 1] -= 
	      rm * a2[i1][ic][ARRAY2D(icf2, l, i1) - 1];
	  }
	  for (l = 0; l < *nov; ++l) {
	    cc[0][ir][l] -= rm * s1[i1][ic][l];
	    cc[i3][ir][l] -= rm * s2[i1][ic][l];
	  }
	  for (l = 0; l < *ncb; ++l) {
	    dd[ir][l] -= rm * bb[i1][ic][l];
	  }
	}
      }

      /* L2: */
    }
    /* L3: */
  }

  /* Initialization */
  for (i = 0; i < *nov; ++i) {
    ARRAY2D(icf2, i, (*na - 1)) = i + 1;
  }
#ifdef USAGE
  usage_end(mainloop,"reduce mainloop");
#endif    

#ifdef DEBUG
  {
    FILE *icf1_fp,*icf2_fp,*ipr_fp,*s1_fp,*s2_fp;
    FILE *a1_fp,*a2_fp,*bb_fp,*cc_fp,*dd_fp;
    int i,j,k;
    char *prefix="test";
    char filename[80];

    strcpy(filename,prefix);
    strcat(filename,".icf1");
    icf1_fp = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".icf2");
    icf2_fp = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".ipr");
    ipr_fp  = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".s1");
    s1_fp   = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".s2");
    s2_fp   = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".a1");
    a1_fp   = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".a2");
    a2_fp   = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".bb");
    bb_fp   = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".cc");
    cc_fp   = fopen(filename,"w");

    strcpy(filename,prefix);
    strcat(filename,".dd");
    dd_fp   = fopen(filename,"w");

    for (i = 0; i < *na; ++i) {
      for (j = 0; j < *nov; ++j) {
	fprintf(icf1_fp,"%d \n",ARRAY2D(icf1, j, i));
	fprintf(icf2_fp,"%d \n",ARRAY2D(icf2, j, i));
	fprintf(ipr_fp, "%d \n",ARRAY2D(ipr, j, i));
	for (k = 0; k < *nov; ++k) {
	  fprintf(s1_fp,"%d \n",s1[i][j][k]);
	  fprintf(s2_fp,"%d \n",s2[i][j][k]);
	  fprintf(a1_fp,"%d \n",a1[i][j][k]);
	  fprintf(a2_fp,"%d \n",a2[i][j][k]);
	}
	for (k = 0; k < *ncb;k++) {
	  fprintf(bb_fp,"%d \n",bb[i][j][k]);
	}
	for (k = 0; k < *nrc;k++) {
	  fprintf(cc_fp,"%d \n",cc[i][k][j]);
	}
      }
    }
    for(i=0;i < *nrc;i++) {
      for(j=0;j < *ncb;j++) {
	  fprintf(dd_fp,"%d \n",dd[i][j]);
      }
    }
  }
  exit(0);
#endif
  return 0;
} 
コード例 #3
0
ファイル: setubv.c プロジェクト: pratikmallya/AUTO
int 
setubv(integer ndim, integer ips, integer na, integer ncol, integer nint, 
#ifdef MANIFOLD
       integer nalc,
#endif
       integer ncb, integer nrc, integer nra, integer nca, 
       FUNI_TYPE((*funi)), ICNI_TYPE((*icni)), integer ndxloc, iap_type *iap, rap_type *rap, doublereal *par, integer *icp, doublereal *rds,
       doublereal ***aa, doublereal ***bb, doublereal ***cc, doublereal **dd, doublereal **fa, doublereal *fc, doublereal *rlcur, 
       doublereal *rlold, doublereal *rldot, doublereal **ups, doublereal **uoldps, doublereal **udotps, doublereal **upoldp, doublereal **dups, 
       doublereal *dtm, doublereal *thl, doublereal *thu)
{
  doublereal *wi, **wp, **wt;
  
#ifdef USAGE
  struct rusage *initialization_usage,*fc_usage,*parallel_overhead_usage;
  usage_start(&initialization_usage);
#endif
  wi   = (doublereal *)malloc(sizeof(doublereal)*(ncol+1) );
  wp   = dmatrix(ncol+1, ncol);
  wt   = dmatrix(ncol+1, ncol);

  wint(ncol + 1, wi);
  genwts(ncol, ncol + 1, wt, wp);
  
#ifdef USAGE
  usage_end(initialization_usage,"setubv initialization");
#endif

#ifdef USAGE
  usage_start(&fc_usage);
#endif

  setubv_pseudo_arclength(ndim,
#ifdef MANIFOLD
		    nalc,
#endif
		    ncb, nrc, iap, rds, dd, fc, rlcur, rlold, rldot,
		    udotps, dups, dtm, thl, thu);

#ifdef USAGE
  usage_end(fc_usage,"setubv make fc");
#endif

  switch(global_setubv_type) {

#ifdef PTHREADS
  case SETUBV_PTHREADS:
    {
      setubv_threads_wrapper(ndim, na, ncol, nint,
#ifdef MANIFOLD
			     nalc,
#endif
			     ncb, nrc, nra, funi, icni, iap, rap, par, icp,
			     aa, bb, cc, dd, fa,
			     fc, ups, uoldps, udotps, upoldp, dtm,
			     thu, wi, wp, wt);
      break;
    }
#endif

#ifdef MPI
    case SETUBV_MPI:
      if(global_verbose_flag)
	printf("Setubv MPI start\n");
      setubv_mpi_wrapper(ndim, na, ncol, nint,
#ifdef MANIFOLD
       nalc,
#endif
       ncb, nrc, nra, nca, ndxloc, iap, rap, par, icp,
       fa, fc, rldot, ups, uoldps,
       udotps, upoldp, dtm, thl, thu, wi, wp, wt);
      if(global_verbose_flag)
	printf("Setubv MPI end\n");
      break;
#endif

    default:
      setubv_make_aa_bb_cc_dd(ndim, na, ncol, nint, 
#ifdef MANIFOLD
       nalc,
#endif
       ncb, nrc, nra, funi, icni, iap, rap, par, icp,
       aa, bb, cc, dd, fa,
       fc, ups, uoldps, udotps, upoldp, dtm,
       thu, wi, wp, wt);
      break;
    }

  free(wi   );
  free_dmatrix(wp);
  free_dmatrix(wt);
  return 0;
}
コード例 #4
0
ファイル: setubv.c プロジェクト: pratikmallya/AUTO
int 
setubv_make_aa_bb_cc_dd(integer ndim, integer na, integer ncol, integer nint, 
#ifdef MANIFOLD
			integer nalc,
#endif
			integer ncb, integer nrc, integer nra, 
			FUNI_TYPE((*funi)), ICNI_TYPE((*icni)),
			iap_type *iap, rap_type *rap, doublereal *par, integer *icp,
			doublereal ***aa, doublereal ***bb, doublereal ***cc, doublereal **dd,
			doublereal **fa, doublereal *fc,
			doublereal **ups, doublereal **uoldps, doublereal **udotps, doublereal **upoldp,
			doublereal *dtm, doublereal *thu, doublereal *wi, doublereal **wp, doublereal **wt)
{
  /* System generated locals */
  integer dicd_dim1, dfdu_dim1, dfdp_dim1;
  
  /* Local variables */
  integer i, j, k, l, m;
  integer k1, l1;
  integer i1,j1;

  integer ib, ic;
  integer ic1;
  doublereal ddt;
#ifdef MANIFOLD
  integer udotps_off;
#endif

  doublereal *dicd, *ficd, *dfdp, *dfdu, *uold;
  doublereal *f;
  doublereal *u, *wploc;
  doublereal *uic, *uio, *prm, *uid, *uip;

#ifdef USAGE
  struct rusage *setubv_make_aa_bb_cc_usage,*fa_usage;
  usage_start(&setubv_make_aa_bb_cc_usage);
#endif

  if (nint > 0) {
      dicd = (doublereal *)malloc(sizeof(doublereal)*nint*(ndim + NPARX));
      ficd = (doublereal *)malloc(sizeof(doublereal)*nint);
  }
  else
      ficd = dicd = NULL;
  
  dfdp = (doublereal *)malloc(sizeof(doublereal)*ndim*NPARX);
  dfdu = (doublereal *)malloc(sizeof(doublereal)*ndim*ndim);
  uold = (doublereal *)malloc(sizeof(doublereal)*ndim);
  f    = (doublereal *)malloc(sizeof(doublereal)*ndim);
  u    = (doublereal *)malloc(sizeof(doublereal)*ndim);
  wploc= (doublereal *)malloc(sizeof(doublereal)*(ncol+1));
  uic  = (doublereal *)malloc(sizeof(doublereal)*ndim);
  uio  = (doublereal *)malloc(sizeof(doublereal)*ndim);
  prm  = (doublereal *)malloc(sizeof(doublereal)*NPARX);
  uid  = (doublereal *)malloc(sizeof(doublereal)*ndim);
  uip  = (doublereal *)malloc(sizeof(doublereal)*ndim);

  dicd_dim1 = nint;
  dfdu_dim1 = ndim;
  dfdp_dim1 = ndim;

  /* Generate AA, BB and DD: */
  
  /* Initialize to zero. */
  for (i = 0; i < nint; ++i) {
    for (k = 0; k < ncb; ++k) {
      dd[i][k] = 0.;
    }
    fc[i] = 0;
  }

  /*      Partition the mesh intervals */
  /*j will be replaced with 0 and na*/
  for (j = 0; j < na; ++j) {
    doublereal *up = ups[j];
    doublereal *up1 = ups[j + 1];
    doublereal *uoldp = uoldps[j];
    doublereal *uoldp1 = uoldps[j + 1];
      
    ddt = 1. / dtm[j];
    for (ic = 0; ic < ncol; ++ic) {
      for (k = 0; k < ndim; ++k) {
	u[k] = wt[ncol][ic] * up1[k];
	uold[k] = wt[ncol][ic] * uoldp1[k];
	for (l = 0; l < ncol; ++l) {
	  l1 = l * ndim + k;
	  u[k] += wt[l][ic] * up[l1];
	  uold[k] += wt[l][ic] * uoldp[l1];
	}
      }

      for (i = 0; i < NPARX; ++i) {
	prm[i] = par[i];
      }
      /*  
	  Ok this is a little wierd, so hold tight.  This function
	  is actually a pointer to a wrapper function, which eventually
	  calls the user defined func_.  Which wrapper is used
	  depends on what kind of problem it is.
      */
      (*(funi))(iap, rap, ndim, u, uold, icp, prm, 2, f, dfdu, dfdp);
      /* transpose dfdu for optimal access */
      {
      integer ii, jj;
      doublereal tmp;
      for (ii = 0; ii < ndim; ++ii) {
        for (jj = 0; jj < ii; ++jj) {
          tmp = dfdu[ii + jj * ndim];
          dfdu[ii + jj * ndim] = dfdu[jj + ii * ndim];
          dfdu[jj + ii * ndim] = tmp;
        }
      }
      ic1 = ic * ndim;
      for (ib = 0; ib < ncol + 1; ++ib) {
	wploc[ib] = ddt * wp[ib][ic];
      }
      for (i = 0; i < ndim; ++i) {
	double *aa_offset = aa[j][ic1 + i];
	double *dfdu_offset = &ARRAY2D(dfdu, 0, i);
	for (ib = 0; ib < ncol + 1; ++ib) {
	  double wt_tmp = -wt[ib][ic];
	  for (k = 0; k < ndim; ++k) {
	    aa_offset[k] = wt_tmp * dfdu_offset[k];
	  }
	  aa_offset[i] += wploc[ib];
	  aa_offset += ndim;
	}
	for (k = 0; k < ncb; ++k) {
	  bb[j][ic1 + i][k] = -ARRAY2D(dfdp, i, icp[k]);
	}
	fa[j][ic1 + i] = f[i] - wploc[ncol] * up1[i];
	for (k = 0; k < ncol; ++k) {
	  k1 = k * ndim + i;
	  fa[j][ic1 + i] -= wploc[k] * up[k1];
	}
      }
      }
    }
  }

  /* generate CC and DD; the boundary conditions are not
     done parallelly */
  
  /*     Integral constraints : */
  if (nint > 0) {
    for (j = 0; j < na; ++j) {
      int jp1 = j + 1;
      for (k = 0; k <= ncol; ++k) {
	for (i = 0; i < ndim; ++i) {
	  i1 = k * ndim + i;
	  j1 = j;
	  if (k == ncol) {
	    i1 = i;
	  }
	  if (k == ncol) {
	    j1 = jp1;
	  }
	  uic[i] = ups[j1][i1];
	  uio[i] = uoldps[j1][i1];
	  uid[i] = udotps[j1][i1];
	  uip[i] = upoldp[j1][i1];
	}
	
	(*(icni))(iap, rap, ndim, par, icp, nint, uic, uio,
		  uid, uip, ficd, 2, dicd);
	
	for (m = 0; m < nint; ++m) {
	  k1 = k * ndim;
	  for (i = 0; i < ndim; ++i) {
	    cc[j][m][k1+i] = 
	      dtm[j] * wi[k ] * ARRAY2D(dicd, m, i);
	  }
	  fc[m] -= dtm[j] * wi[k] * ficd[m];
	  for (i = 0; i < ncb; ++i) {
	    dd[m][i] += dtm[j] * wi[k] * ARRAY2D(dicd, m, ndim + icp[i]);
	  }
	}
      }
    }
  }
  /*     Pseudo-arclength equation : */
#ifdef MANIFOLD
  udotps_off=iap->ntst + 1;
#endif
  for (j = 0; j < na; ++j) {
#ifdef MANIFOLD
    for (m = 0; m < nalc; ++m) {
    doublereal *udot_offset = udotps[j + m * udotps_off];
    doublereal *cc_offset = cc[j][nrc - nalc + m];
#else    
    doublereal *udot_offset = udotps[j];
    doublereal *cc_offset = cc[j][nrc - 1];
#endif
    for (i = 0; i < ndim; ++i) {
      for (k = 0; k < ncol; ++k) {
	k1 = k * ndim + i;
	cc_offset[k1] = 
	  dtm[j] * thu[i] * wi[k] * udot_offset[k1];
      }
      cc_offset[nra + i] = 
	dtm[j] * thu[i] * wi[ncol] * 
#ifndef MANIFOLD
	udotps[j + 1][i];
#else
        udotps[j + 1 + m*udotps_off][i];
      }
#endif
    }
  }

  free(dicd );
  free(ficd );
  free(dfdp );
  free(dfdu );
  free(uold );
  free(f    );
  free(u    );
  free(wploc);
  free(uic  );
  free(uio  );
  free(prm  );
  free(uid  );
  free(uip  );

#ifdef USAGE
  usage_end(setubv_make_aa_bb_cc_usage,"in setubv worker");
#endif
  return 0;
}
コード例 #5
0
int AUTO(AutoData *Data)
{
  struct timeval  *time0,*time1;
  integer icp[NPARX2];
  doublereal par[NPARX2], thl[NPARX];
  iap_type *iap;
  rap_type *rap;
  doublereal *thu;
  integer *iuz;
  doublereal *vuz;
  function_list list;
  
  integer i, j, k;

  // Initialize structures and constants
  gData = Data;
  
  iap = &(Data->iap);
  rap = &(Data->rap);

  Data->sp_len = Data->num_sp + (1 + floor(iap->nmx/iap->npr));
  Data->sp_inc = 5;

#ifdef USAGE
  struct rusage *init_usage,*total_usage;
  usage_start(&init_usage);
  usage_start(&total_usage);
#endif

#ifdef FLOATING_POINT_TRAP
  trapfpe();
#endif

#ifdef PTHREADS
  global_conpar_type = CONPAR_PTHREADS;
  global_setubv_type = SETUBV_PTHREADS;
  global_reduce_type = REDUCE_PTHREADS;
#endif

  fp9 = fopen("fort.9","w");
  if(fp9 == NULL) {
    fprintf(stderr,"Error:  Could not open fort.9\n");
    exit(1);
  }
  

  /* Initialization : */

  iap->mynode = mynode();
  iap->numnodes = numnodes();
  if (iap->numnodes > 1) {
    iap->parallel_flag = 1;
  } else {
    iap->parallel_flag = 0;
  }


    /* NOTE:  thu is allocated inside this function, and the
       pointer is passed back.  I know this is ugly, but
       this function does a bit of work to get thu setup correctly,
       as well as figuring out the size the array should be.
       What really should happen is to have one function which
       reads fort.2 and another fuction which initializes the array.
       That way the allocation could happen between the two calls.
    */
    init0(iap, rap, par, icp, thl, &thu, &iuz, &vuz);      

    /* Find restart label and determine type of restart point. */
    if (iap->irs > 0) {
      logical found = FALSE_;

      findlb(iap, rap, iap->irs, &(iap->nfpr), &found);
      if (! found) {
	if (iap->mynode == 0) {
	  fprintf(stderr,"\nRestart label %4ld not found\n",iap->irs);
	}
	exit(0);
      }
    }
    set_function_pointers(*iap,&list);
    init1(iap, rap, icp, par);
    chdim(iap);

    /* Create the allocations for the global structures used in 
       autlib3.c and autlib5.c.  These are purely an efficiency thing.
       The allocation and deallocation of these scratch areas takes
       up a nontrivial amount of time if done directly in the
       wrapper functions in autlib3.c*/
    allocate_global_memory(*iap);

    /* ---------------------------------------------------------- */
    /* ---------------------------------------------------------- */
    /*  One-parameter continuations */
    /* ---------------------------------------------------------- */
    /* ---------------------------------------------------------- */

#ifdef USAGE
    usage_end(init_usage,"main initialization");
#endif
    
    if (Data->print_input)
        PrintInput(Data, par, icp);
    
    // Initialize output variables
    if(list.type==AUTOAE)
        Data->u = DMATRIX(iap->nmx, iap->ndim);
    else {
        // Solution measures
        Data->usm = (doublereal ***)MALLOC((2+(int)(log2(Data->nsm)))*sizeof(doublereal **));
        Data->usm[0] = DMATRIX(iap->nmx, iap->ndim);    // MAX
        Data->usm[1] = DMATRIX(iap->nmx, iap->ndim);    // MIN
        for (i=0; i<(int)(log2(Data->nsm)); i++)
            Data->usm[2+i] = DMATRIX(iap->nmx, iap->ndim);
        
        // Jacobian of flow
        if (Data->sjac) {
            Data->c0 = DMATRIX_3D(iap->nmx, iap->ndim, iap->ndim);
            Data->c1 = DMATRIX_3D(iap->nmx, iap->ndim, iap->ndim);
        }
        
        // Jacobian of flow along cycles (temporary storage)
        if (Data->sflow) {
            Data->a1 = DMATRIX_3D(iap->ntst, iap->ndim, iap->ndim);
            Data->a2 = DMATRIX_3D(iap->ntst, iap->ndim, iap->ndim);
        }
    }
    Data->par = DMATRIX(iap->nmx, iap->nicp);
    if (iap->isp >= 1) {
        Data->ev = DCMATRIX(iap->nmx, iap->ndim);
        for (i=0; i<iap->nmx; i++) {
            for (j=0; j<iap->ndim; j++) {
                Data->ev[i][j].r = NAN; // This is a flag for bad floquet multipliers
                Data->ev[i][j].i = NAN;
            }
        }
    }
    Data->num_u = 0;

    if (Data->sp == NULL)
        Data->num_sp = 0;
    
    Data->sp = (AutoSPData *)REALLOC(Data->sp, (Data->sp_len)*sizeof(AutoSPData));

    for (i=Data->num_sp; i<Data->sp_len; i++) {
        Data->sp[i].u = NULL;
        Data->sp[i].icp = NULL;
        Data->sp[i].ups = NULL;
        Data->sp[i].udotps = NULL;
        Data->sp[i].rldot = NULL;
        Data->sp[i].a1 = NULL;
        Data->sp[i].a2 = NULL;
    }

    if(list.type==AUTOAE)
      autoae(iap, rap, par, icp, list.aelist.funi, list.aelist.stpnt, list.aelist.pvli, thl, thu, iuz, vuz);
    if(list.type==AUTOBV)
      autobv(iap, rap, par, icp, list.bvlist.funi, list.bvlist.bcni, 
	     list.bvlist.icni, list.bvlist.stpnt, list.bvlist.pvli, thl, thu, iuz, vuz);
     
    // Testing output
    if (Data->print_output)
        PrintOutput(Data);
     
#ifdef USAGE
    usage_end(total_usage,"total");

#endif
    //time_end(time0,"Total Time ",fp9);
    fprintf(fp9,"----------------------------------------------");
    fprintf(fp9,"----------------------------------------------\n");
    //time_end(time1,"",stdout);


  //}
  FREE(thu);
  FREE(iuz);
  FREE(vuz);
  fclose(fp9);

  // Clean up special solution points that were allocated and not used
  Data->sp = (AutoSPData *)REALLOC(Data->sp, (Data->num_sp)*sizeof(AutoSPData));
  assert(Data->sp);
  Data->sp_len = Data->num_sp;

  return 1;
} 
コード例 #6
0
ファイル: conpar.c プロジェクト: pratikmallya/Multifario
int conpar_process(integer nov, integer na, integer nra, integer nca, doublereal ***a, integer ncb, doublereal ***b, integer nrc, doublereal ***c, doublereal **d, integer *irf, integer *icf)
{
  /* Local variables */
  integer ipiv, jpiv, itmp;
  doublereal tpiv;
  integer i, j, l, k1, m2, ic, ir;
  doublereal rm;
  doublereal piv;
  int *amaxima;

#ifdef USAGE
  struct rusage *conpar_process_usage;
  usage_start(&conpar_process_usage);
#endif

  amaxima = malloc(nra*sizeof(*amaxima));

  /* Note that the summation of the adjacent overlapped part of C */
  /* is delayed until REDUCE, in order to merge it with other communications.*/
  /* NA is the local NTST. */
  
  /* Condensation of parameters (Elimination of local variables). */
  m2 = nca - nov;

  for (i = 0;i < na; i++) {
    doublereal **a_i = a[i];
    doublereal **b_i = b[i];
    integer *icf_i = &icf[i * nca];
    integer *irf_i = &irf[i * nra];
    for (j = 0; j < nra; ++j) {
      integer one = 1;
      irf_i[j] = j;
      amaxima[j] = nov + auto_idamax(m2 - nov, &a_i[j][nov], &one) - 1;
    }
    for (j = 0; j < nca; ++j) {
      icf_i[j] = j;
    }
    for (ic = nov; ic < m2; ++ic) {
      int irp = ic - nov;
      /*	     **Search for pivot (Complete pivoting) */
      piv = 0.0;
      ipiv = irp;
      jpiv = ic;
      for (k1 = irp; k1 < nra; ++k1) {
	int row = irf_i[k1];
	tpiv = fabs(a_i[row][icf_i[amaxima[row]]]);
	if (piv < tpiv) {
	  piv = tpiv;
	  ipiv = k1;
	  jpiv = amaxima[row];
	}
      }
      /*	     **Move indices */
      itmp = icf_i[ic];
      icf_i[ic] = icf_i[jpiv];
      icf_i[jpiv] = itmp;
      itmp = irf_i[irp];
      irf_i[irp] = irf_i[ipiv];
      irf_i[ipiv] = itmp;
      {
	int icf_ic_i = icf_i[ic];
	int irf_irp_i = irf_i[irp];
	doublereal *a_offset2 = a_i[irf_irp_i];
	doublereal *b_offset2 = b_i[irf_irp_i];
	/*	     **End of pivoting; elimination starts here */
	int icp1 = ic + 1;
	piv = a_offset2[icf_ic_i];
	for (ir = irp + 1; ir < nra; ++ir) {
	  int irf_ir_i = irf_i[ir];
          doublereal *a_offset1 = a_i[irf_ir_i];
	  doublereal *b_offset1 = b_i[irf_ir_i];
	  rm = a_offset1[icf_ic_i] / piv;
	  a_offset1[icf_ic_i] = rm;
	  if (rm != (double)0.) {
	    for (l = 0; l < nov; ++l) {
	      a_offset1[l] -= rm * a_offset2[l];
	    }
	    for (l = icp1; l < nca; ++l) {
	      int icf_l_i = icf_i[l];
	      a_offset1[icf_l_i] -= rm * a_offset2[icf_l_i];
	    }
	    for (l = 0; l < ncb; ++l) {
	      b_offset1[l] -= rm * b_offset2[l];
	    }
	  }
	  if (rm != (double)0. || amaxima[irf_ir_i] == jpiv) {
	    doublereal ppiv = 0;
	    integer jppiv = icp1;
	    /* recalculate absolute maximum for current row */
	    for (l = icp1; l < m2; ++l) {
	      tpiv = fabs(a_offset1[icf_i[l]]);
	      if (ppiv < tpiv) {
		ppiv = tpiv;
		jppiv = l;
	      }
	    }
	    amaxima[irf_ir_i] = jppiv;
	  } else if (amaxima[irf_ir_i] == ic) {
	    amaxima[irf_ir_i] = jpiv;
	  }
	}
	for (ir = 0; ir < nrc; ++ir) {
	  doublereal *c_offset1 = c[i][ir];
	  rm = c_offset1[icf_ic_i] / piv;
	  c_offset1[icf_ic_i]=rm;
	  if (rm != (double)0.) {
	    doublereal *d_offset1 = d[ir];
	    for (l = 0; l < nov; ++l) {
	      c_offset1[l] -= rm * a_offset2[l];
	    }
	    for (l = icp1; l < nca; ++l) {
	      int icf_l_i = icf_i[l];
	      c_offset1[icf_l_i] -= rm * a_offset2[icf_l_i];
	    }
	    for (l = 0; l < ncb; ++l) {
              d_offset1[l] -= rm * b_offset2[l];
	    }
	  }
	}
      }
    }
  }
  free(amaxima);
#ifdef USAGE
  usage_end(conpar_process_usage,"in conpar worker");
#endif
  return 0;
}