void *conpar_process(void * arg) { integer icf_dim1, irf_dim1; /* Local variables */ integer ipiv, jpiv, itmp; doublereal tpiv; integer i, j, l, k1, k2, m1, m2, ic, ir; doublereal rm; integer ir1, irp; doublereal piv; integer icp1; integer *nov, *nra, *nca; doublereal ***a; integer *ncb; doublereal ***b; integer *nbc, *nrc; doublereal ***c, **d; integer *irf, *icf; integer loop_start,loop_end; #ifdef PTHREADS doublereal **dlocal; #endif #ifdef USAGE struct rusage *conpar_process_usage; usage_start(&conpar_process_usage); #endif nov = ((conpar_parallel_arglist *)arg)->nov; nra = ((conpar_parallel_arglist *)arg)->nra; nca = ((conpar_parallel_arglist *)arg)->nca; a = ((conpar_parallel_arglist *)arg)->a; ncb = ((conpar_parallel_arglist *)arg)->ncb; b = ((conpar_parallel_arglist *)arg)->b; nbc = ((conpar_parallel_arglist *)arg)->nbc; nrc = ((conpar_parallel_arglist *)arg)->nrc; c = ((conpar_parallel_arglist *)arg)->c; d = ((conpar_parallel_arglist *)arg)->d; irf = ((conpar_parallel_arglist *)arg)->irf; icf = ((conpar_parallel_arglist *)arg)->icf; loop_start = ((conpar_parallel_arglist *)arg)->loop_start; loop_end = ((conpar_parallel_arglist *)arg)->loop_end; #ifdef PTHREADS dlocal=DMATRIX(*nrc, *ncb); #endif /* In the default case we don't need to do anything special */ if(global_conpar_type == CONPAR_DEFAULT) { ; } /* In the message passing case we set d to be 0.0, do a sum here, and then do the final sum (with the true copy of d) in the master */ else if (global_conpar_type == CONPAR_MPI) { for(i=0;i<*nrc;i++) for (j=0; j<*ncb;j++) d[i][j]=0.0; } /* In the shared memory case we create a local variable for doing this threads part of the sum, then we do a final sum into shared memory at the end */ else if (global_conpar_type == CONPAR_PTHREADS) { #ifdef PTHREADS for(i=0;i<*nrc;i++) for (j=0; j<*ncb;j++) dlocal[i][j]=0.0; #else ; #endif } /* Note that the summation of the adjacent overlapped part of C */ /* is delayed until REDUCE, in order to merge it with other communications.*/ /* NA is the local NTST. */ irf_dim1 = *nra; icf_dim1 = *nca; /* Condensation of parameters (Elimination of local variables). */ m1 = *nov + 1; m2 = *nca - *nov; for (i = loop_start;i < loop_end; i++) { for (ic = m1; ic <= m2; ++ic) { ir1 = ic - *nov + 1; irp = ir1 - 1; icp1 = ic + 1; /* **Search for pivot (Complete pivoting) */ piv = 0.0; ipiv = irp; jpiv = ic; for (k1 = irp; k1 <= *nra; ++k1) { int irf_k1_i = irf[-1 + k1 + i*irf_dim1]; for (k2 = ic; k2 <= m2; ++k2) { int icf_k2_i = icf[-1 + k2 + i*icf_dim1]; tpiv = a[i][-1 + irf_k1_i][-1 + icf_k2_i]; if (tpiv < 0.0) { tpiv = -tpiv; } if (piv < tpiv) { piv = tpiv; ipiv = k1; jpiv = k2; } } } /* **Move indices */ itmp = icf[-1 + ic + i*icf_dim1]; icf[-1 + ic + i*icf_dim1] = icf[-1 + jpiv + i*icf_dim1]; icf[-1 + jpiv + i*icf_dim1] = itmp; itmp = irf[-1 + irp + i*irf_dim1]; irf[-1 + irp + i*irf_dim1] = irf[-1 + ipiv + i*irf_dim1]; irf[-1 + ipiv + i*irf_dim1] = itmp; { int icf_ic_i = icf[-1 + ic + i*icf_dim1]; int irf_irp_i = irf[-1 + irp + i*irf_dim1]; doublereal *a_offset2 = a[i][-1 + irf_irp_i]; doublereal *b_offset2 = b[i][-1 + irf_irp_i]; /* **End of pivoting; elimination starts here */ for (ir = ir1; ir <= *nra; ++ir) { int irf_ir_i = irf[-1 + ir + i*irf_dim1]; doublereal *a_offset1 = a[i][-1 + irf_ir_i]; doublereal *b_offset1 = b[i][-1 + irf_ir_i]; rm = a_offset1[-1 + icf_ic_i]/a_offset2[-1 + icf_ic_i]; a_offset1[-1 + icf_ic_i] = rm; if (rm != (double)0.) { for (l = 0; l < *nov; ++l) { a_offset1[l] -= rm * a_offset2[l]; } for (l = icp1 -1; l < *nca; ++l) { int icf_l_i = icf[l + i*icf_dim1]; a_offset1[-1 + icf_l_i] -= rm * a_offset2[-1 + icf_l_i]; } for (l = 0; l < *ncb; ++l) { b_offset1[l] -= rm * b_offset2[l]; } } } for (ir = *nbc + 1; ir <= *nrc; ++ir) { doublereal *c_offset1 = c[i][-1 + ir]; doublereal *d_offset1 = d[-1 + ir]; rm = c_offset1[-1 + icf_ic_i]/a_offset2[-1 + icf_ic_i]; c_offset1[-1 + icf_ic_i]=rm; if (rm != (double)0.) { for (l = 0; l < *nov; ++l) { c_offset1[l] -= rm * a_offset2[l]; } for (l = icp1 -1 ; l < *nca; ++l) { int icf_l_i = icf[l + i*icf_dim1]; c_offset1[-1 + icf_l_i] -= rm * a_offset2[-1 + icf_l_i]; } for (l = 0; l < *ncb; ++l) { /* A little explanation of what is going on here is in order I believe. This array is created by a summation across all workers, hence it needs a mutex to avoid concurrent writes (in the shared memory case) or a summation in the master (in the message passing case). Since mutex's can be somewhat slow, we will do the summation into a local variable, and then do a final summation back into global memory when the main loop is done. */ /* Nothing special for the default case */ if(global_conpar_type == CONPAR_DEFAULT) { d_offset1[l] -= rm * b_offset2[l]; } /* In the message passing case we sum into d, which is a local variable initialized to 0.0. We then sum our part with the masters part in the master. */ else if (global_conpar_type == CONPAR_MPI) { d_offset1[l] -= rm * b_offset2[l]; } /* In the shared memory case we sum into a local variable our contribution, and then sum into shared memory at the end (inside a mutex */ else if (global_conpar_type == CONPAR_PTHREADS) { #ifdef PTHREADS dlocal[-1 + ir][l] -= rm * b_offset2[l]; #else ; #endif } } } } } } } #ifdef PTHREADS /* This is were we sum into the global copy of the d array, in the shared memory case */ if(global_conpar_type == CONPAR_PTHREADS) { #ifdef PTHREADS pthread_mutex_lock(&mutex_for_d); for(i=0;i<*nrc;i++) for (j=0; j<*ncb;j++) d[i][j] += dlocal[i][j]; pthread_mutex_unlock(&mutex_for_d); FREE_DMATRIX(dlocal); #else ; #endif } #endif #ifdef USAGE usage_end(conpar_process_usage,"in conpar worker"); #endif return NULL; }
int reduce(integer *iam, integer *kwt, logical *par, doublereal ***a1, doublereal ***a2, doublereal ***bb, doublereal ***cc, doublereal **dd, integer *na, integer *nov, integer *ncb, integer *nrc, doublereal ***s1, doublereal ***s2, doublereal ***ca1, integer *icf1, integer *icf2, integer *icf11, integer *ipr, integer *nbc) { /* System generated locals */ integer icf1_dim1, icf2_dim1, icf11_dim1, ipr_dim1; doublereal zero, tpiv; real xkwt; integer nbcp1, ibuf1, ipiv1, jpiv1, ipiv2, jpiv2, i, k, l; integer i1, i2, k1, k2, i3, iprow, k3, l2, l3, ic, ir; doublereal rm; doublereal tmp; integer icp1; integer itmp; doublereal piv1, piv2; doublereal *buf=NULL; #ifdef USAGE struct rusage *init, *mainloop,*pivoting,*elimination; usage_start(&init); #endif /* Parameter adjustments */ ipr_dim1 = *nov; icf11_dim1 = *nov; icf2_dim1 = *nov; icf1_dim1 = *nov; zero = 0.; nbcp1 = *nbc + 1; xkwt = (real) (*kwt); /* Initialization */ for (i = 0; i < *na; ++i) { for (k1 = 0; k1 < *nov; ++k1) { ARRAY2D(icf1, k1, i) = k1 + 1; ARRAY2D(icf2, k1, i) = k1 + 1; ARRAY2D(ipr, k1, i) = k1 + 1; for (k2 = 0; k2 < *nov; ++k2) { s2[i][k1][k2] = 0.; s1[i][k1][k2] = 0.; } } } for (ir = 0; ir < *nov; ++ir) { for (ic = 0; ic < *nov; ++ic) { s1[0][ir][ic] = a1[0][ir][ic]; } } #ifdef USAGE usage_end(init,"reduce initialization"); usage_start(&mainloop); #endif /* The reduction process is done concurrently */ for (i1 = 0; i1 < *na - 1; ++i1) { i2 = i1 + 1; i3 = i2 + 1; for (ic = 0; ic < *nov; ++ic) { icp1 = ic + 1; /* Complete pivoting; rows are swapped physically, columns swap in dices */ piv1 = zero; ipiv1 = ic + 1; jpiv1 = ic + 1; for (k1 = ic; k1 < *nov; ++k1) { for (k2 = ic; k2 < *nov; ++k2) { tpiv = a2[i1][k1][ARRAY2D(icf2, k2, i1) - 1]; if (tpiv < zero) { tpiv = -tpiv; } if (piv1 < tpiv) { piv1 = tpiv; ipiv1 = k1 + 1; jpiv1 = k2 + 1; } } } piv2 = zero; ipiv2 = 1; jpiv2 = ic + 1; for (k1 = 0; k1 < *nov; ++k1) { for (k2 = ic; k2 < *nov; ++k2) { tpiv = a1[i2][k1][ARRAY2D(icf1, k2, i2) - 1]; if (tpiv < zero) { tpiv = -tpiv; } if (piv2 < tpiv) { piv2 = tpiv; ipiv2 = k1 + 1; jpiv2 = k2 + 1; } } } if (piv1 >= piv2) { ARRAY2D(ipr, ic, i1) = ipiv1; itmp = ARRAY2D(icf2, ic, i1); ARRAY2D(icf2, ic, i1) = ARRAY2D(icf2, (jpiv1 - 1), i1); ARRAY2D(icf2, (jpiv1 - 1), i1) = itmp; itmp = ARRAY2D(icf1, ic, i2); ARRAY2D(icf1, ic, i2) = ARRAY2D(icf1, (jpiv1 - 1), i2); ARRAY2D(icf1, (jpiv1 - 1), i2) = itmp; /* Swapping */ for (l = 0; l < *nov; ++l) { tmp = s1[i1][ic][l]; s1[i1][ic][l] = s1[i1][ipiv1 - 1][l]; s1[i1][ipiv1 - 1][l] = tmp; if (l >= ic) { tmp = a2[i1][ic][ARRAY2D(icf2, l, i1) - 1]; a2[i1][ic][ARRAY2D(icf2, l, i1) - 1] = a2[i1][ipiv1 - 1][ARRAY2D(icf2, l, i1) - 1]; a2[i1][ipiv1 - 1][ARRAY2D(icf2, l, i1) - 1] = tmp; } tmp = s2[i1][ic][l]; s2[i1][ic][l] = s2[i1][ipiv1 - 1][l]; s2[i1][ipiv1 - 1][l] = tmp; } for (l = 0; l < *ncb; ++l) { tmp = bb[i1][ic][l]; bb[i1][ic][l] = bb[i1][ipiv1 - 1][l]; bb[i1][ipiv1 - 1][l] = tmp; } } else { ARRAY2D(ipr, ic, i1) = *nov + ipiv2; itmp = ARRAY2D(icf2, ic, i1); ARRAY2D(icf2, ic, i1) = ARRAY2D(icf2, (jpiv2 - 1), i1); ARRAY2D(icf2, (jpiv2 - 1), i1) = itmp; itmp = ARRAY2D(icf1, ic, i2); ARRAY2D(icf1, ic, i2) = ARRAY2D(icf1, (jpiv2 - 1), i2); ARRAY2D(icf1, (jpiv2 - 1), i2) = itmp; /* Swapping */ for (l = 0; l < *nov; ++l) { if (l >= ic) { tmp = a2[i1][ic][ARRAY2D(icf2, l, i1) - 1]; a2[i1][ic][ARRAY2D(icf2, l, i1) - 1] = a1[i2][ipiv2 - 1][ARRAY2D(icf2, l, i1) - 1]; a1[i2][ipiv2 - 1][ARRAY2D(icf2, l, i1) - 1] = tmp; } tmp = s2[i1][ic][l]; s2[i1][ic][l] = a2[i2][ipiv2 - 1][l]; a2[i2][ipiv2 - 1][l] = tmp; tmp = s1[i1][ic][l]; s1[i1][ic][l] = s1[i2][ipiv2 - 1][l]; s1[i2][ipiv2 - 1][l] = tmp; } for (l = 0; l < *ncb; ++l) { tmp = bb[i1][ic][l]; bb[i1][ic][l] = bb[i2][ipiv2 - 1][l]; bb[i2][ipiv2 - 1][l] = tmp; } } /* End of pivoting; Elimination starts here */ for (ir = icp1; ir < *nov; ++ir) { /*for (ir = *nov - 1; ir >= icp1; ir--) {*/ rm = a2[i1][ir][ARRAY2D(icf2, ic, i1) - 1] / a2[i1][ic][ARRAY2D(icf2, ic, i1) - 1]; a2[i1][ir][ARRAY2D(icf2, ic, i1) - 1] = rm; if (rm != (double)0.) { for (l = icp1; l < *nov; ++l) { a2[i1][ir][ARRAY2D(icf2, l, i1) - 1] -= rm * a2[i1][ic][ARRAY2D(icf2, l, i1) - 1]; } for (l = 0; l < *nov; ++l) { s1[i1][ir][l] -= rm * s1[i1][ic][l]; s2[i1][ir][l] -= rm * s2[i1][ic][l]; } for (l = 0; l < *ncb; ++l) { bb[i1][ir][l] -= rm * bb[i1][ic][l]; } } } for (ir = 0; ir < *nov; ++ir) { /*for (ir = *nov - 1; ir >= 0; ir--) {*/ rm = a1[i2][ir][ARRAY2D(icf1, ic, i2) - 1] / a2[i1][ic][ARRAY2D(icf2, ic, i1) - 1]; a1[i2][ir][ARRAY2D(icf1, ic, i2) - 1] = rm; if (rm != (double)0.) { for (l = icp1; l < *nov; ++l) { a1[i2][ir][ARRAY2D(icf1, l, i2) - 1] -= rm * a2[i1][ic][ARRAY2D(icf2, l, i1) - 1]; } for (l = 0; l < *nov; ++l) { s1[i2][ir][l] -= rm * s1[i1][ic][l]; a2[i2][ir][l] -= rm * s2[i1][ic][l]; } for (l = 0; l < *ncb; ++l) { bb[i2][ir][l] -= rm * bb[i1][ic][l]; } } } for (ir = nbcp1 - 1; ir < *nrc; ++ir) { /*for (ir = *nrc - 1; ir >= nbcp1 - 1; ir--) {*/ rm = cc[i2][ir][ARRAY2D(icf2, ic, i1) - 1] / a2[i1][ic][ARRAY2D(icf2, ic, i1) - 1]; cc[i2][ir][ARRAY2D(icf2, ic, i1) - 1] = rm; if (rm != (double)0.) { for (l = icp1; l < *nov; ++l) { cc[i2][ir][ARRAY2D(icf2, l, i1) - 1] -= rm * a2[i1][ic][ARRAY2D(icf2, l, i1) - 1]; } for (l = 0; l < *nov; ++l) { cc[0][ir][l] -= rm * s1[i1][ic][l]; cc[i3][ir][l] -= rm * s2[i1][ic][l]; } for (l = 0; l < *ncb; ++l) { dd[ir][l] -= rm * bb[i1][ic][l]; } } } /* L2: */ } /* L3: */ } /* Initialization */ for (i = 0; i < *nov; ++i) { ARRAY2D(icf2, i, (*na - 1)) = i + 1; } #ifdef USAGE usage_end(mainloop,"reduce mainloop"); #endif #ifdef DEBUG { FILE *icf1_fp,*icf2_fp,*ipr_fp,*s1_fp,*s2_fp; FILE *a1_fp,*a2_fp,*bb_fp,*cc_fp,*dd_fp; int i,j,k; char *prefix="test"; char filename[80]; strcpy(filename,prefix); strcat(filename,".icf1"); icf1_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".icf2"); icf2_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".ipr"); ipr_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".s1"); s1_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".s2"); s2_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".a1"); a1_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".a2"); a2_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".bb"); bb_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".cc"); cc_fp = fopen(filename,"w"); strcpy(filename,prefix); strcat(filename,".dd"); dd_fp = fopen(filename,"w"); for (i = 0; i < *na; ++i) { for (j = 0; j < *nov; ++j) { fprintf(icf1_fp,"%d \n",ARRAY2D(icf1, j, i)); fprintf(icf2_fp,"%d \n",ARRAY2D(icf2, j, i)); fprintf(ipr_fp, "%d \n",ARRAY2D(ipr, j, i)); for (k = 0; k < *nov; ++k) { fprintf(s1_fp,"%d \n",s1[i][j][k]); fprintf(s2_fp,"%d \n",s2[i][j][k]); fprintf(a1_fp,"%d \n",a1[i][j][k]); fprintf(a2_fp,"%d \n",a2[i][j][k]); } for (k = 0; k < *ncb;k++) { fprintf(bb_fp,"%d \n",bb[i][j][k]); } for (k = 0; k < *nrc;k++) { fprintf(cc_fp,"%d \n",cc[i][k][j]); } } } for(i=0;i < *nrc;i++) { for(j=0;j < *ncb;j++) { fprintf(dd_fp,"%d \n",dd[i][j]); } } } exit(0); #endif return 0; }
int setubv(integer ndim, integer ips, integer na, integer ncol, integer nint, #ifdef MANIFOLD integer nalc, #endif integer ncb, integer nrc, integer nra, integer nca, FUNI_TYPE((*funi)), ICNI_TYPE((*icni)), integer ndxloc, iap_type *iap, rap_type *rap, doublereal *par, integer *icp, doublereal *rds, doublereal ***aa, doublereal ***bb, doublereal ***cc, doublereal **dd, doublereal **fa, doublereal *fc, doublereal *rlcur, doublereal *rlold, doublereal *rldot, doublereal **ups, doublereal **uoldps, doublereal **udotps, doublereal **upoldp, doublereal **dups, doublereal *dtm, doublereal *thl, doublereal *thu) { doublereal *wi, **wp, **wt; #ifdef USAGE struct rusage *initialization_usage,*fc_usage,*parallel_overhead_usage; usage_start(&initialization_usage); #endif wi = (doublereal *)malloc(sizeof(doublereal)*(ncol+1) ); wp = dmatrix(ncol+1, ncol); wt = dmatrix(ncol+1, ncol); wint(ncol + 1, wi); genwts(ncol, ncol + 1, wt, wp); #ifdef USAGE usage_end(initialization_usage,"setubv initialization"); #endif #ifdef USAGE usage_start(&fc_usage); #endif setubv_pseudo_arclength(ndim, #ifdef MANIFOLD nalc, #endif ncb, nrc, iap, rds, dd, fc, rlcur, rlold, rldot, udotps, dups, dtm, thl, thu); #ifdef USAGE usage_end(fc_usage,"setubv make fc"); #endif switch(global_setubv_type) { #ifdef PTHREADS case SETUBV_PTHREADS: { setubv_threads_wrapper(ndim, na, ncol, nint, #ifdef MANIFOLD nalc, #endif ncb, nrc, nra, funi, icni, iap, rap, par, icp, aa, bb, cc, dd, fa, fc, ups, uoldps, udotps, upoldp, dtm, thu, wi, wp, wt); break; } #endif #ifdef MPI case SETUBV_MPI: if(global_verbose_flag) printf("Setubv MPI start\n"); setubv_mpi_wrapper(ndim, na, ncol, nint, #ifdef MANIFOLD nalc, #endif ncb, nrc, nra, nca, ndxloc, iap, rap, par, icp, fa, fc, rldot, ups, uoldps, udotps, upoldp, dtm, thl, thu, wi, wp, wt); if(global_verbose_flag) printf("Setubv MPI end\n"); break; #endif default: setubv_make_aa_bb_cc_dd(ndim, na, ncol, nint, #ifdef MANIFOLD nalc, #endif ncb, nrc, nra, funi, icni, iap, rap, par, icp, aa, bb, cc, dd, fa, fc, ups, uoldps, udotps, upoldp, dtm, thu, wi, wp, wt); break; } free(wi ); free_dmatrix(wp); free_dmatrix(wt); return 0; }
int setubv_make_aa_bb_cc_dd(integer ndim, integer na, integer ncol, integer nint, #ifdef MANIFOLD integer nalc, #endif integer ncb, integer nrc, integer nra, FUNI_TYPE((*funi)), ICNI_TYPE((*icni)), iap_type *iap, rap_type *rap, doublereal *par, integer *icp, doublereal ***aa, doublereal ***bb, doublereal ***cc, doublereal **dd, doublereal **fa, doublereal *fc, doublereal **ups, doublereal **uoldps, doublereal **udotps, doublereal **upoldp, doublereal *dtm, doublereal *thu, doublereal *wi, doublereal **wp, doublereal **wt) { /* System generated locals */ integer dicd_dim1, dfdu_dim1, dfdp_dim1; /* Local variables */ integer i, j, k, l, m; integer k1, l1; integer i1,j1; integer ib, ic; integer ic1; doublereal ddt; #ifdef MANIFOLD integer udotps_off; #endif doublereal *dicd, *ficd, *dfdp, *dfdu, *uold; doublereal *f; doublereal *u, *wploc; doublereal *uic, *uio, *prm, *uid, *uip; #ifdef USAGE struct rusage *setubv_make_aa_bb_cc_usage,*fa_usage; usage_start(&setubv_make_aa_bb_cc_usage); #endif if (nint > 0) { dicd = (doublereal *)malloc(sizeof(doublereal)*nint*(ndim + NPARX)); ficd = (doublereal *)malloc(sizeof(doublereal)*nint); } else ficd = dicd = NULL; dfdp = (doublereal *)malloc(sizeof(doublereal)*ndim*NPARX); dfdu = (doublereal *)malloc(sizeof(doublereal)*ndim*ndim); uold = (doublereal *)malloc(sizeof(doublereal)*ndim); f = (doublereal *)malloc(sizeof(doublereal)*ndim); u = (doublereal *)malloc(sizeof(doublereal)*ndim); wploc= (doublereal *)malloc(sizeof(doublereal)*(ncol+1)); uic = (doublereal *)malloc(sizeof(doublereal)*ndim); uio = (doublereal *)malloc(sizeof(doublereal)*ndim); prm = (doublereal *)malloc(sizeof(doublereal)*NPARX); uid = (doublereal *)malloc(sizeof(doublereal)*ndim); uip = (doublereal *)malloc(sizeof(doublereal)*ndim); dicd_dim1 = nint; dfdu_dim1 = ndim; dfdp_dim1 = ndim; /* Generate AA, BB and DD: */ /* Initialize to zero. */ for (i = 0; i < nint; ++i) { for (k = 0; k < ncb; ++k) { dd[i][k] = 0.; } fc[i] = 0; } /* Partition the mesh intervals */ /*j will be replaced with 0 and na*/ for (j = 0; j < na; ++j) { doublereal *up = ups[j]; doublereal *up1 = ups[j + 1]; doublereal *uoldp = uoldps[j]; doublereal *uoldp1 = uoldps[j + 1]; ddt = 1. / dtm[j]; for (ic = 0; ic < ncol; ++ic) { for (k = 0; k < ndim; ++k) { u[k] = wt[ncol][ic] * up1[k]; uold[k] = wt[ncol][ic] * uoldp1[k]; for (l = 0; l < ncol; ++l) { l1 = l * ndim + k; u[k] += wt[l][ic] * up[l1]; uold[k] += wt[l][ic] * uoldp[l1]; } } for (i = 0; i < NPARX; ++i) { prm[i] = par[i]; } /* Ok this is a little wierd, so hold tight. This function is actually a pointer to a wrapper function, which eventually calls the user defined func_. Which wrapper is used depends on what kind of problem it is. */ (*(funi))(iap, rap, ndim, u, uold, icp, prm, 2, f, dfdu, dfdp); /* transpose dfdu for optimal access */ { integer ii, jj; doublereal tmp; for (ii = 0; ii < ndim; ++ii) { for (jj = 0; jj < ii; ++jj) { tmp = dfdu[ii + jj * ndim]; dfdu[ii + jj * ndim] = dfdu[jj + ii * ndim]; dfdu[jj + ii * ndim] = tmp; } } ic1 = ic * ndim; for (ib = 0; ib < ncol + 1; ++ib) { wploc[ib] = ddt * wp[ib][ic]; } for (i = 0; i < ndim; ++i) { double *aa_offset = aa[j][ic1 + i]; double *dfdu_offset = &ARRAY2D(dfdu, 0, i); for (ib = 0; ib < ncol + 1; ++ib) { double wt_tmp = -wt[ib][ic]; for (k = 0; k < ndim; ++k) { aa_offset[k] = wt_tmp * dfdu_offset[k]; } aa_offset[i] += wploc[ib]; aa_offset += ndim; } for (k = 0; k < ncb; ++k) { bb[j][ic1 + i][k] = -ARRAY2D(dfdp, i, icp[k]); } fa[j][ic1 + i] = f[i] - wploc[ncol] * up1[i]; for (k = 0; k < ncol; ++k) { k1 = k * ndim + i; fa[j][ic1 + i] -= wploc[k] * up[k1]; } } } } } /* generate CC and DD; the boundary conditions are not done parallelly */ /* Integral constraints : */ if (nint > 0) { for (j = 0; j < na; ++j) { int jp1 = j + 1; for (k = 0; k <= ncol; ++k) { for (i = 0; i < ndim; ++i) { i1 = k * ndim + i; j1 = j; if (k == ncol) { i1 = i; } if (k == ncol) { j1 = jp1; } uic[i] = ups[j1][i1]; uio[i] = uoldps[j1][i1]; uid[i] = udotps[j1][i1]; uip[i] = upoldp[j1][i1]; } (*(icni))(iap, rap, ndim, par, icp, nint, uic, uio, uid, uip, ficd, 2, dicd); for (m = 0; m < nint; ++m) { k1 = k * ndim; for (i = 0; i < ndim; ++i) { cc[j][m][k1+i] = dtm[j] * wi[k ] * ARRAY2D(dicd, m, i); } fc[m] -= dtm[j] * wi[k] * ficd[m]; for (i = 0; i < ncb; ++i) { dd[m][i] += dtm[j] * wi[k] * ARRAY2D(dicd, m, ndim + icp[i]); } } } } } /* Pseudo-arclength equation : */ #ifdef MANIFOLD udotps_off=iap->ntst + 1; #endif for (j = 0; j < na; ++j) { #ifdef MANIFOLD for (m = 0; m < nalc; ++m) { doublereal *udot_offset = udotps[j + m * udotps_off]; doublereal *cc_offset = cc[j][nrc - nalc + m]; #else doublereal *udot_offset = udotps[j]; doublereal *cc_offset = cc[j][nrc - 1]; #endif for (i = 0; i < ndim; ++i) { for (k = 0; k < ncol; ++k) { k1 = k * ndim + i; cc_offset[k1] = dtm[j] * thu[i] * wi[k] * udot_offset[k1]; } cc_offset[nra + i] = dtm[j] * thu[i] * wi[ncol] * #ifndef MANIFOLD udotps[j + 1][i]; #else udotps[j + 1 + m*udotps_off][i]; } #endif } } free(dicd ); free(ficd ); free(dfdp ); free(dfdu ); free(uold ); free(f ); free(u ); free(wploc); free(uic ); free(uio ); free(prm ); free(uid ); free(uip ); #ifdef USAGE usage_end(setubv_make_aa_bb_cc_usage,"in setubv worker"); #endif return 0; }
int AUTO(AutoData *Data) { struct timeval *time0,*time1; integer icp[NPARX2]; doublereal par[NPARX2], thl[NPARX]; iap_type *iap; rap_type *rap; doublereal *thu; integer *iuz; doublereal *vuz; function_list list; integer i, j, k; // Initialize structures and constants gData = Data; iap = &(Data->iap); rap = &(Data->rap); Data->sp_len = Data->num_sp + (1 + floor(iap->nmx/iap->npr)); Data->sp_inc = 5; #ifdef USAGE struct rusage *init_usage,*total_usage; usage_start(&init_usage); usage_start(&total_usage); #endif #ifdef FLOATING_POINT_TRAP trapfpe(); #endif #ifdef PTHREADS global_conpar_type = CONPAR_PTHREADS; global_setubv_type = SETUBV_PTHREADS; global_reduce_type = REDUCE_PTHREADS; #endif fp9 = fopen("fort.9","w"); if(fp9 == NULL) { fprintf(stderr,"Error: Could not open fort.9\n"); exit(1); } /* Initialization : */ iap->mynode = mynode(); iap->numnodes = numnodes(); if (iap->numnodes > 1) { iap->parallel_flag = 1; } else { iap->parallel_flag = 0; } /* NOTE: thu is allocated inside this function, and the pointer is passed back. I know this is ugly, but this function does a bit of work to get thu setup correctly, as well as figuring out the size the array should be. What really should happen is to have one function which reads fort.2 and another fuction which initializes the array. That way the allocation could happen between the two calls. */ init0(iap, rap, par, icp, thl, &thu, &iuz, &vuz); /* Find restart label and determine type of restart point. */ if (iap->irs > 0) { logical found = FALSE_; findlb(iap, rap, iap->irs, &(iap->nfpr), &found); if (! found) { if (iap->mynode == 0) { fprintf(stderr,"\nRestart label %4ld not found\n",iap->irs); } exit(0); } } set_function_pointers(*iap,&list); init1(iap, rap, icp, par); chdim(iap); /* Create the allocations for the global structures used in autlib3.c and autlib5.c. These are purely an efficiency thing. The allocation and deallocation of these scratch areas takes up a nontrivial amount of time if done directly in the wrapper functions in autlib3.c*/ allocate_global_memory(*iap); /* ---------------------------------------------------------- */ /* ---------------------------------------------------------- */ /* One-parameter continuations */ /* ---------------------------------------------------------- */ /* ---------------------------------------------------------- */ #ifdef USAGE usage_end(init_usage,"main initialization"); #endif if (Data->print_input) PrintInput(Data, par, icp); // Initialize output variables if(list.type==AUTOAE) Data->u = DMATRIX(iap->nmx, iap->ndim); else { // Solution measures Data->usm = (doublereal ***)MALLOC((2+(int)(log2(Data->nsm)))*sizeof(doublereal **)); Data->usm[0] = DMATRIX(iap->nmx, iap->ndim); // MAX Data->usm[1] = DMATRIX(iap->nmx, iap->ndim); // MIN for (i=0; i<(int)(log2(Data->nsm)); i++) Data->usm[2+i] = DMATRIX(iap->nmx, iap->ndim); // Jacobian of flow if (Data->sjac) { Data->c0 = DMATRIX_3D(iap->nmx, iap->ndim, iap->ndim); Data->c1 = DMATRIX_3D(iap->nmx, iap->ndim, iap->ndim); } // Jacobian of flow along cycles (temporary storage) if (Data->sflow) { Data->a1 = DMATRIX_3D(iap->ntst, iap->ndim, iap->ndim); Data->a2 = DMATRIX_3D(iap->ntst, iap->ndim, iap->ndim); } } Data->par = DMATRIX(iap->nmx, iap->nicp); if (iap->isp >= 1) { Data->ev = DCMATRIX(iap->nmx, iap->ndim); for (i=0; i<iap->nmx; i++) { for (j=0; j<iap->ndim; j++) { Data->ev[i][j].r = NAN; // This is a flag for bad floquet multipliers Data->ev[i][j].i = NAN; } } } Data->num_u = 0; if (Data->sp == NULL) Data->num_sp = 0; Data->sp = (AutoSPData *)REALLOC(Data->sp, (Data->sp_len)*sizeof(AutoSPData)); for (i=Data->num_sp; i<Data->sp_len; i++) { Data->sp[i].u = NULL; Data->sp[i].icp = NULL; Data->sp[i].ups = NULL; Data->sp[i].udotps = NULL; Data->sp[i].rldot = NULL; Data->sp[i].a1 = NULL; Data->sp[i].a2 = NULL; } if(list.type==AUTOAE) autoae(iap, rap, par, icp, list.aelist.funi, list.aelist.stpnt, list.aelist.pvli, thl, thu, iuz, vuz); if(list.type==AUTOBV) autobv(iap, rap, par, icp, list.bvlist.funi, list.bvlist.bcni, list.bvlist.icni, list.bvlist.stpnt, list.bvlist.pvli, thl, thu, iuz, vuz); // Testing output if (Data->print_output) PrintOutput(Data); #ifdef USAGE usage_end(total_usage,"total"); #endif //time_end(time0,"Total Time ",fp9); fprintf(fp9,"----------------------------------------------"); fprintf(fp9,"----------------------------------------------\n"); //time_end(time1,"",stdout); //} FREE(thu); FREE(iuz); FREE(vuz); fclose(fp9); // Clean up special solution points that were allocated and not used Data->sp = (AutoSPData *)REALLOC(Data->sp, (Data->num_sp)*sizeof(AutoSPData)); assert(Data->sp); Data->sp_len = Data->num_sp; return 1; }
int conpar_process(integer nov, integer na, integer nra, integer nca, doublereal ***a, integer ncb, doublereal ***b, integer nrc, doublereal ***c, doublereal **d, integer *irf, integer *icf) { /* Local variables */ integer ipiv, jpiv, itmp; doublereal tpiv; integer i, j, l, k1, m2, ic, ir; doublereal rm; doublereal piv; int *amaxima; #ifdef USAGE struct rusage *conpar_process_usage; usage_start(&conpar_process_usage); #endif amaxima = malloc(nra*sizeof(*amaxima)); /* Note that the summation of the adjacent overlapped part of C */ /* is delayed until REDUCE, in order to merge it with other communications.*/ /* NA is the local NTST. */ /* Condensation of parameters (Elimination of local variables). */ m2 = nca - nov; for (i = 0;i < na; i++) { doublereal **a_i = a[i]; doublereal **b_i = b[i]; integer *icf_i = &icf[i * nca]; integer *irf_i = &irf[i * nra]; for (j = 0; j < nra; ++j) { integer one = 1; irf_i[j] = j; amaxima[j] = nov + auto_idamax(m2 - nov, &a_i[j][nov], &one) - 1; } for (j = 0; j < nca; ++j) { icf_i[j] = j; } for (ic = nov; ic < m2; ++ic) { int irp = ic - nov; /* **Search for pivot (Complete pivoting) */ piv = 0.0; ipiv = irp; jpiv = ic; for (k1 = irp; k1 < nra; ++k1) { int row = irf_i[k1]; tpiv = fabs(a_i[row][icf_i[amaxima[row]]]); if (piv < tpiv) { piv = tpiv; ipiv = k1; jpiv = amaxima[row]; } } /* **Move indices */ itmp = icf_i[ic]; icf_i[ic] = icf_i[jpiv]; icf_i[jpiv] = itmp; itmp = irf_i[irp]; irf_i[irp] = irf_i[ipiv]; irf_i[ipiv] = itmp; { int icf_ic_i = icf_i[ic]; int irf_irp_i = irf_i[irp]; doublereal *a_offset2 = a_i[irf_irp_i]; doublereal *b_offset2 = b_i[irf_irp_i]; /* **End of pivoting; elimination starts here */ int icp1 = ic + 1; piv = a_offset2[icf_ic_i]; for (ir = irp + 1; ir < nra; ++ir) { int irf_ir_i = irf_i[ir]; doublereal *a_offset1 = a_i[irf_ir_i]; doublereal *b_offset1 = b_i[irf_ir_i]; rm = a_offset1[icf_ic_i] / piv; a_offset1[icf_ic_i] = rm; if (rm != (double)0.) { for (l = 0; l < nov; ++l) { a_offset1[l] -= rm * a_offset2[l]; } for (l = icp1; l < nca; ++l) { int icf_l_i = icf_i[l]; a_offset1[icf_l_i] -= rm * a_offset2[icf_l_i]; } for (l = 0; l < ncb; ++l) { b_offset1[l] -= rm * b_offset2[l]; } } if (rm != (double)0. || amaxima[irf_ir_i] == jpiv) { doublereal ppiv = 0; integer jppiv = icp1; /* recalculate absolute maximum for current row */ for (l = icp1; l < m2; ++l) { tpiv = fabs(a_offset1[icf_i[l]]); if (ppiv < tpiv) { ppiv = tpiv; jppiv = l; } } amaxima[irf_ir_i] = jppiv; } else if (amaxima[irf_ir_i] == ic) { amaxima[irf_ir_i] = jpiv; } } for (ir = 0; ir < nrc; ++ir) { doublereal *c_offset1 = c[i][ir]; rm = c_offset1[icf_ic_i] / piv; c_offset1[icf_ic_i]=rm; if (rm != (double)0.) { doublereal *d_offset1 = d[ir]; for (l = 0; l < nov; ++l) { c_offset1[l] -= rm * a_offset2[l]; } for (l = icp1; l < nca; ++l) { int icf_l_i = icf_i[l]; c_offset1[icf_l_i] -= rm * a_offset2[icf_l_i]; } for (l = 0; l < ncb; ++l) { d_offset1[l] -= rm * b_offset2[l]; } } } } } } free(amaxima); #ifdef USAGE usage_end(conpar_process_usage,"in conpar worker"); #endif return 0; }