/* ------------------------------------------------------------ PERMUTEP - Let L = tril(P(perm,perm)) INPUT Ljc, Lir - sparsity structure of output matrix L = tril(P(perm,perm)). Pjc, Pir, Ppr - Input matrix, before ordering. perm - length m pivot ordering. m - order: P is m x m. WORKING ARRAY Pj - Length m float work array. IMPORTANT: L, P and PERM in C style. ------------------------------------------------------------ */ void permuteP(const int *Ljc,const int *Lir,double *Lpr, const int *Pjc,const int *Pir,const double *Ppr, const int *perm, double *Pj, const int m) { int j,inz,jcol; /* ------------------------------------------------------------ Let Pj = all-0 ------------------------------------------------------------ */ fzeros(Pj,m); /* ------------------------------------------------------------ For each column j, let Pj(:) = P(:,PERM(j)) and L(:,j) = Pj(PERM(:)) (L sparse) ------------------------------------------------------------ */ for(j = 0; j < m; j++){ jcol = perm[j]; for(inz = Pjc[jcol]; inz < Pjc[jcol+1]; inz++) Pj[Pir[inz]] = Ppr[inz]; for(inz = Ljc[j]; inz < Ljc[j+1]; inz++) Lpr[inz] = Pj[perm[Lir[inz]]]; /* ------------------------------------------------------------ Let Pj = all-0 ------------------------------------------------------------ */ for(inz = Pjc[jcol]; inz < Pjc[jcol+1]; inz++) Pj[Pir[inz]] = 0.0; } }
/* ************************************************************ PROCEDURE: getada2 - Let ADA += ddota'*ddota. INPUT ada.{jc,ir} - sparsity structure of ada. ddota - sparse lorN x m matrix. perm, invperm - length(m) array, ordering in which ADA should be computed, and its inverse. We compute in order triu(ADA(perm,perm)), but store at original places. OPTIMAL PERM: sort(sum(spones(ddota))), i.e. start with sparsest. m - order of ADA, number of constraints. lorN - length(K.q), number of Lorentz blocks. UPDATED ada.pr - ada(i,j) += ddotai'*ddotaj. ONLY triu(ADA(perm,perm)) is updated. (So caller typically should symmetrize afterwards.) WORKING ARRAYS ddotaj - work vector, size lorN. ************************************************************ */ void getada2(jcir ada, jcir ddota, const mwIndex *perm, const mwIndex *invperm, const mwIndex m, const mwIndex lorN, double *ddotaj) { mwIndex i,j, knz,inz, permj; double adaij; /* ------------------------------------------------------------ Init ddotaj = all-0 (for Lorentz) ------------------------------------------------------------ */ fzeros(ddotaj, lorN); /* ============================================================ MAIN getada LOOP: loop over nodes perm(0:m-1) ============================================================ */ for(j = 0; j < m; j++){ permj = perm[j]; if(ddota.jc[permj] < ddota.jc[permj+1]){ /* Only work if nonempty */ /* ------------------------------------------------------------ Let ddotaj = ddota(:,j) in full ------------------------------------------------------------ */ for(i = ddota.jc[permj]; i < ddota.jc[permj+1]; i++) ddotaj[ddota.ir[i]] = ddota.pr[i]; /* ------------------------------------------------------------ For all i with invpermi < j: ada_ij += ddota_i'*ddotaj. ------------------------------------------------------------ */ for(inz = ada.jc[permj]; inz < ada.jc[permj+1]; inz++){ i = ada.ir[inz]; if(invperm[i] <= j){ adaij = ada.pr[inz]; if(invperm[i] < j) for(knz = ddota.jc[i]; knz < ddota.jc[i+1]; knz++) adaij += ddota.pr[knz] * ddotaj[ddota.ir[knz]]; else /* diag entry: += ||ddota(:,permj)||^2 */ adaij += realssqr(ddota.pr + ddota.jc[i], ddota.jc[i+1]-ddota.jc[i]); ada.pr[inz] = adaij; } } /* ------------------------------------------------------------ Re-initialize ddotaj = 0. ------------------------------------------------------------ */ for(i = ddota.jc[permj]; i < ddota.jc[permj+1]; i++) /* Lorentz */ ddotaj[ddota.ir[i]] = 0.0; } } /* j = 0:m-1 */ }
/* ************************************************************ isminoutprod -- Computes update from a column "xk" and stores it in "xj", using dense computations. If "xkk<=0", then let xj = 0. INPUT mk, nj - output "xj" is mk x nj - nj*(nj-1)/2. Its column lengths are {mk, mk-1, ..., mk-(nj-1)}. xkk - scalar, the 1st nj entries in xk are divided by this number. OUTPUT xj - On return, xj = -xk*xk(0:nj-1)'/xkk (NOTE THE MINUS !) BUT: if xkk <= 0, then xj = zeros(nj*(2m-nj+1)/2,1). UPDATED xk - On return, xk(0:nj-1) /= xkk if xkk > 0, otherwise unchanged. ************************************************************ */ void isminoutprod(double *xj, const mwIndex nj, double *xk, const double xkk, mwIndex mk) { mwIndex j; double xjk; if(xkk > 0.0) /* if not phase 2 node */ for(j = 0; j < nj; j++){ xjk = xk[0] / xkk; memcpy(xj,xk,mk * sizeof(double)); isscalarmul(xj, -xjk, mk); /* xj = -xjk * xk */ xk[0] = xjk; /* FINAL entry ljk */ xj += mk; /* point to next column which is 1 shorter */ --mk; ++xk; } else /* initialize to all-0 if phase-2 node */ fzeros(xj,(nj * (mk + mk-nj + 1))/2); }
int main(int argc, char ** argv) { /* BEGIN DECLARATIONS */ WINFO wi; /* struct for command line input */ /* workspace */ float * v; /* velocity field */ float * p1; /* pressure field, current time step */ float * p0; /* pressure field, last time step */ float * tr; /* storage for traces */ float * tmp; /* used to swap p1 and p0 */ int ix, it; /* counters */ int isrc; /* source counter */ int imf; /* movie frame counter */ int isx; /* source location, in units of dx */ int nxz; /* number of spatial grid points */ /* int nz; local number of gridpoints */ int ntr; /* number of traces */ int nsam; /* number of trace samples */ int nsrc; /* number of shots */ float rz,rx,s; /* precomputed coefficients */ float vmax,vmin; /* max, min velocity values */ /* float two; two */ /* END DECLARATIONS */ sf_init(argc,argv); /* read inputs from command line */ getinputs(true,&wi); /* compute number of shots */ nsrc = (wi.isxend-wi.isxbeg)/(wi.iskip); nsrc++; /* compute number of spatial grid points */ nxz=wi.nx * wi.nz; /* compute number of traces, samples in each record */ ntr=wi.igxend-wi.igxbeg+1; nsam=ntr*wi.nt; /* allocate, initialize p0, p1, v, traces */ p0=sf_floatalloc(nxz); p1=sf_floatalloc(nxz); v =sf_floatalloc(nxz); tr=sf_floatalloc(nsam); /* read velocity */ sf_floatread(v,nxz,wi.vfile); /* CFL, sanity checks */ vmax=fgetmax(v,nxz); vmin=fgetmin(v,nxz); if (vmax*wi.dt>CFL*fmaxf(wi.dx,wi.dz)) { sf_warning("CFL criterion violated"); sf_warning("vmax=%e dx=%e dz=%e dt=%e\n",vmax,wi.dx,wi.dz,wi.dt); sf_error("max permitted dt=%e\n",CFL*fmaxf(wi.dx,wi.dz)/vmax); } if (vmin<=0.0) sf_error("min velocity nonpositive"); /* only square of velocity array needed from here on */ fsquare(v,nxz); /* precalculate some coefficients */ rz=wi.dt*wi.dt/(wi.dz*wi.dz); rx=wi.dt*wi.dt/(wi.dx*wi.dx); s =2.0*(rz+rx); /* two=2.0; nz=wi.nz; */ /* shot loop */ isrc=0; isx=wi.isxbeg; while (isx <= wi.isxend) { /* initialize pressure fields, traces */ fzeros(p0,nxz); fzeros(p1,nxz); fzeros(tr,nsam); /* initialize movie frame counter */ imf=0; /* time loop */ for (it=0;it<wi.nt;it++) { /* construct next time step, overwrite on p0 */ step_forward(p0,p1,v,wi.nz,wi.nx,rz,rx,s); /* tack on source */ p0[wi.isz+isx*wi.nz]+=fgetrick(it*wi.dt,wi.freq); /* swap pointers */ tmp=p0; p0=p1; p1=tmp; /* store trace samples if necessary */ if (NULL != wi.tfile) for (ix=0;ix<ntr;ix++) tr[ix*wi.nt+it]=p1[(wi.igxbeg+ix)*wi.nz+wi.igz]; /* write movie snap to file if necessary */ if (NULL != wi.mfile && wi.nm && !(it%wi.nm)) { sf_floatwrite(p1,nxz,wi.mfile); imf++; } /* next t */ } /* write traces to file if necessary */ if (NULL != wi.tfile) sf_floatwrite(tr,nsam,wi.tfile); isx += wi.iskip; isrc++; } exit(0); }
/* ************************************************************ PROCEDURE mexFunction - Entry for Matlab y = bwblksolve(L,b, [y]) y(L.fullperm) = L.L' \ b ************************************************************ */ void mexFunction(const int nlhs, mxArray *plhs[], const int nrhs, const mxArray *prhs[]) { const mxArray *L_FIELD; mwIndex m,n, j, k, nsuper, inz; double *y, *fwork; const double *permPr, *b, *xsuperPr; const mwIndex *yjc, *yir, *bjc, *bir; mwIndex *perm, *xsuper, *iwork, *snode; jcir L; char bissparse; /* ------------------------------------------------------------ Check for proper number of arguments ------------------------------------------------------------ */ mxAssert(nrhs >= MINNPARIN, "fwblkslv requires more input arguments."); mxAssert(nlhs == 1, "fwblkslv generates only 1 output argument."); /* ------------------------------------------------------------ Disassemble block Cholesky structure L ------------------------------------------------------------ */ mxAssert(mxIsStruct(L_IN), "Parameter `L' should be a structure."); L_FIELD = mxGetField(L_IN,(mwIndex)0,"perm"); /* L.perm */ mxAssert( L_FIELD != NULL, "Missing field L.perm."); m = mxGetM(L_FIELD) * mxGetN(L_FIELD); permPr = mxGetPr(L_FIELD); L_FIELD = mxGetField(L_IN,(mwIndex)0,"L"); /* L.L */ mxAssert( L_FIELD != NULL, "Missing field L.L."); mxAssert( m == mxGetM(L_FIELD) && m == mxGetN(L_FIELD), "Size L.L mismatch."); mxAssert(mxIsSparse(L_FIELD), "L.L should be sparse."); L.jc = mxGetJc(L_FIELD); L.ir = mxGetIr(L_FIELD); L.pr = mxGetPr(L_FIELD); L_FIELD = mxGetField(L_IN,(mwIndex)0,"xsuper"); /* L.xsuper */ mxAssert( L_FIELD != NULL, "Missing field L.xsuper."); nsuper = mxGetM(L_FIELD) * mxGetN(L_FIELD) - 1; mxAssert( nsuper <= m, "Size L.xsuper mismatch."); xsuperPr = mxGetPr(L_FIELD); /* ------------------------------------------------------------ Get rhs matrix b. If it is sparse, then we also need the sparsity structure of y. ------------------------------------------------------------ */ b = mxGetPr(B_IN); mxAssert( mxGetM(B_IN) == m, "Size mismatch b."); n = mxGetN(B_IN); if( (bissparse = mxIsSparse(B_IN)) ){ bjc = mxGetJc(B_IN); bir = mxGetIr(B_IN); mxAssert(nrhs >= NPARIN, "bwblkslv requires more inputs in case of sparse b."); mxAssert(mxGetM(Y_IN) == m && mxGetN(Y_IN) == n, "Size mismatch y."); mxAssert(mxIsSparse(Y_IN), "y should be sparse."); } /* ------------------------------------------------------------ Allocate output y. If bissparse, then Y_IN gives the sparsity structure. ------------------------------------------------------------ */ if(!bissparse) Y_OUT = mxCreateDoubleMatrix(m, n, mxREAL); else{ yjc = mxGetJc(Y_IN); yir = mxGetIr(Y_IN); Y_OUT = mxCreateSparse(m,n, yjc[n],mxREAL); memcpy(mxGetJc(Y_OUT), yjc, (n+1) * sizeof(mwIndex)); memcpy(mxGetIr(Y_OUT), yir, yjc[n] * sizeof(mwIndex)); } y = mxGetPr(Y_OUT); /* ------------------------------------------------------------ Allocate working arrays ------------------------------------------------------------ */ fwork = (double *) mxCalloc(m, sizeof(double)); iwork = (mwIndex *) mxCalloc(2*m+nsuper+1, sizeof(mwIndex)); perm = iwork; /* m */ xsuper = iwork + m; /*nsuper+1*/ snode = xsuper + (nsuper+1); /* m */ /* ------------------------------------------------------------ Convert real to integer array, and from Fortran to C style. ------------------------------------------------------------ */ for(k = 0; k < m; k++) perm[k] = permPr[k] - 1; for(k = 0; k <= nsuper; k++) xsuper[k] = xsuperPr[k] - 1; /* ------------------------------------------------------------ In case of sparse b, we also create snode, which maps each subnode to the supernode containing it. ------------------------------------------------------------ */ if(bissparse) for(j = 0, k = 0; k < nsuper; k++) while(j < xsuper[k+1]) snode[j++] = k; /* ------------------------------------------------------------ The actual job is done here: y(perm) = L'\b. ------------------------------------------------------------ */ if(!bissparse) for(j = 0; j < n; j++){ memcpy(fwork,b, m * sizeof(double)); bwsolve(fwork,L.jc,L.ir,L.pr,xsuper,nsuper,y); /* y(m) as work */ for(k = 0; k < m; k++) /* y(perm) = fwork */ y[perm[k]] = fwork[k]; y += m; b += m; } else{ /* sparse y,b: don't use perm */ fzeros(fwork,m); for(j = 0; j < n; j++){ inz = yjc[j]; for(k = bjc[j]; k < bjc[j+1]; k++) /* fwork = b */ fwork[bir[k]] = b[k]; selbwsolve(fwork,L.jc,L.ir,L.pr,xsuper,nsuper, snode, yir+inz,yjc[j+1]-inz); for(k = inz; k < yjc[j+1]; k++) y[k] = fwork[yir[k]]; for(k = inz; k < yjc[j+1]; k++) /* fwork = all-0 */ fwork[yir[k]] = 0.0; } } /* ------------------------------------------------------------ RELEASE WORKING ARRAYS. ------------------------------------------------------------ */ mxFree(fwork); mxFree(iwork); }
/* ************************************************************ PROCEDURE mexFunction - Entry for Matlab ************************************************************ */ void mexFunction(const int nlhs, mxArray *plhs[], const int nrhs, const mxArray *prhs[]) { const mxArray *L_FIELD; mxArray *myplhs[NPAROUT]; int m, i, j, inz, iwsiz, nsuper, tmpsiz, fwsiz, nskip, nadd, m1; double *fwork, *d, *skipPr, *orgd; const double *permPr,*xsuperPr,*Ppr,*absd; int *perm, *snode, *xsuper, *iwork, *xlindx, *skip, *skipJc; const int *LINir, *Pjc, *Pir; double canceltol, maxu, abstol; jcir L; char useAbsd, useDelay; /* ------------------------------------------------------------ Check for proper number of arguments blkchol(L,P, pars,absd) with nparinmin=2. ------------------------------------------------------------ */ if(nrhs < NPARINMIN) mexErrMsgTxt("blkchol requires more input arguments"); if(nlhs > NPAROUT) mexErrMsgTxt("blkchol produces less output arguments"); /* ------------------------------------------------------------ Get input matrix P to be factored. ------------------------------------------------------------ */ if( (m = mxGetM(P_IN)) != mxGetN(P_IN)) mexErrMsgTxt("P must be square"); if(!mxIsSparse(P_IN)) mexErrMsgTxt("P must be sparse"); Pjc = mxGetJc(P_IN); Pir = mxGetIr(P_IN); Ppr = mxGetPr(P_IN); /* ------------------------------------------------------------ Disassemble block Cholesky structure L ------------------------------------------------------------ */ if(!mxIsStruct(L_IN)) mexErrMsgTxt("Parameter `L' should be a structure."); if( (L_FIELD = mxGetField(L_IN,0,"perm")) == NULL) /* L.perm */ mexErrMsgTxt("Missing field L.perm."); if(m != mxGetM(L_FIELD) * mxGetN(L_FIELD)) mexErrMsgTxt("perm size mismatch"); permPr = mxGetPr(L_FIELD); if( (L_FIELD = mxGetField(L_IN,0,"L")) == NULL) /* L.L */ mexErrMsgTxt("Missing field L.L."); if( m != mxGetM(L_FIELD) || m != mxGetN(L_FIELD) ) mexErrMsgTxt("Size L.L mismatch."); if(!mxIsSparse(L_FIELD)) mexErrMsgTxt("L.L should be sparse."); L.jc = mxGetJc(L_FIELD); LINir = mxGetIr(L_FIELD); if( (L_FIELD = mxGetField(L_IN,0,"xsuper")) == NULL) /* L.xsuper */ mexErrMsgTxt("Missing field L.xsuper."); nsuper = mxGetM(L_FIELD) * mxGetN(L_FIELD) - 1; if( nsuper > m ) mexErrMsgTxt("Size L.xsuper mismatch."); xsuperPr = mxGetPr(L_FIELD); if( (L_FIELD = mxGetField(L_IN,0,"tmpsiz")) == NULL) /* L.tmpsiz */ mexErrMsgTxt("Missing field L.tmpsiz."); tmpsiz = mxGetScalar(L_FIELD); /* ------------------------------------------------------------ Disassemble pars structure: canceltol, maxu ------------------------------------------------------------ */ canceltol = 1E-15; /* supply with defaults */ maxu = 5E5; abstol = 1E-20; useAbsd = 0; useDelay = 0; if(nrhs >= NPARINMIN + 1){ /* 3rd argument = pars */ if(!mxIsStruct(PARS_IN)) mexErrMsgTxt("Parameter `pars' should be a structure."); if( (L_FIELD = mxGetField(PARS_IN,0,"canceltol")) != NULL) canceltol = mxGetScalar(L_FIELD); /* pars.canceltol */ if( (L_FIELD = mxGetField(PARS_IN,0,"maxu")) != NULL) maxu = mxGetScalar(L_FIELD); /* pars.maxu */ if( (L_FIELD = mxGetField(PARS_IN,0,"abstol")) != NULL){ abstol = mxGetScalar(L_FIELD); /* pars.abstol */ abstol = MAX(abstol, 0.0); } if( (L_FIELD = mxGetField(PARS_IN,0,"delay")) != NULL) useDelay = mxGetScalar(L_FIELD); /* pars.delay */ /* ------------------------------------------------------------ Get optional vector absd ------------------------------------------------------------ */ if(nrhs >= NPARIN){ useAbsd = 1; absd = mxGetPr(ABSD_IN); if(m != mxGetM(ABSD_IN) * mxGetN(ABSD_IN)) mexErrMsgTxt("absd size mismatch"); } } /* ------------------------------------------------------------ Create sparse output matrix L(m x m). ------------------------------------------------------------ */ L_OUT = mxCreateSparse(m,m, L.jc[m],mxREAL); L.ir = mxGetIr(L_OUT); L.pr = mxGetPr(L_OUT); memcpy(mxGetJc(L_OUT), L.jc, (m+1) * sizeof(int)); memcpy(L.ir, LINir, L.jc[m] * sizeof(int)); /* ------------------------------------------------------------ Create ouput vector d(m). ------------------------------------------------------------ */ D_OUT = mxCreateDoubleMatrix(m,1,mxREAL); d = mxGetPr(D_OUT); /* ------------------------------------------------------------ Compute required sizes of working arrays: iwsiz = 2*(m + nsuper). fwsiz = tmpsiz. ------------------------------------------------------------ */ iwsiz = MAX(2*(m+nsuper), 1); fwsiz = MAX(tmpsiz, 1); /* ------------------------------------------------------------ Allocate working arrays: integer: perm(m), snode(m), xsuper(nsuper+1), iwork(iwsiz), xlindx(m+1), skip(m), double: orgd(m), fwork(fwsiz). ------------------------------------------------------------ */ m1 = MAX(m,1); /* avoid alloc to 0 */ perm = (int *) mxCalloc(m1,sizeof(int)); snode = (int *) mxCalloc(m1,sizeof(int)); xsuper = (int *) mxCalloc(nsuper+1,sizeof(int)); iwork = (int *) mxCalloc(iwsiz,sizeof(int)); xlindx = (int *) mxCalloc(m+1,sizeof(int)); skip = (int *) mxCalloc(m1, sizeof(int)); orgd = (double *) mxCalloc(m1,sizeof(double)); fwork = (double *) mxCalloc(fwsiz,sizeof(double)); /* ------------------------------------------------------------ Convert PERM, XSUPER to integer and C-Style ------------------------------------------------------------ */ for(i = 0; i < m; i++){ j = permPr[i]; perm[i] = --j; } for(i = 0; i <= nsuper; i++){ j = xsuperPr[i]; xsuper[i] = --j; } /* ------------------------------------------------------------ Let L = tril(P(PERM,PERM)), uses orgd(m) as temp working storage. ------------------------------------------------------------ */ permuteP(L.jc,L.ir,L.pr, Pjc,Pir,Ppr, perm, orgd, m); /* ------------------------------------------------------------ If no orgd has been supplied, take orgd = diag(L on input) Otherwise, let orgd = absd(perm). ------------------------------------------------------------ */ if(useAbsd) for(j = 0; j < m; j++) orgd[j] = absd[perm[j]]; else for(j = 0; j < m; j++) orgd[j] = L.pr[L.jc[j]]; /* ------------------------------------------------------------ Create "snode" and "xlindx"; change L.ir to the compact subscript array (with xlindx), and do BLOCK SPARSE CHOLESKY. ------------------------------------------------------------ */ nskip = spchol(m, nsuper, xsuper, snode, xlindx, L.ir, orgd, L.jc, L.pr, d, perm, abstol, canceltol, maxu, skip, &nadd, iwsiz, iwork, fwsiz, fwork); if(nskip < 0) mexErrMsgTxt("Insufficient workspace in pblkchol"); /* ------------------------------------------------------------ Copy original row-indices from LINir to L.ir. ------------------------------------------------------------ */ memcpy(L.ir, LINir, L.jc[m] * sizeof(int)); /* ------------------------------------------------------------ Create output matrices skip = sparse([],[],[],m,1,nskip), diagadd = sparse([],[],[],m,1,nadd), ------------------------------------------------------------ */ SKIP_OUT = mxCreateSparse(m,1, MAX(1,nskip),mxREAL); memcpy(mxGetIr(SKIP_OUT), skip, nskip * sizeof(int)); skipJc = mxGetJc(SKIP_OUT); skipJc[0] = 0; skipJc[1] = nskip; skipPr = mxGetPr(SKIP_OUT); /* ------------------------------------------------------------ useDelay = 1 then L(:,i) is i-th column before ith pivot; useful for pivot-delaying strategy. (Fwslv(L, L(:,i)) still required.) ------------------------------------------------------------ */ if(useDelay == 1) for(j = 0; j < nskip; j++) skipPr[j] = 1.0; else for(j = 0; j < nskip; j++){ i = skip[j]; skipPr[j] = L.pr[L.jc[i]]; /* Set skipped l(:,i)=ei. */ L.pr[L.jc[i]] = 1.0; fzeros(L.pr+L.jc[i]+1,L.jc[i+1]-L.jc[i]-1); } DIAGADD_OUT = mxCreateSparse(m,1, MAX(1,nadd),mxREAL); memcpy(mxGetIr(DIAGADD_OUT), iwork, nadd * sizeof(int)); skipJc = mxGetJc(DIAGADD_OUT); skipJc[0] = 0; skipJc[1] = nadd; skipPr = mxGetPr(DIAGADD_OUT); for(j = 0; j < nadd; j++) skipPr[j] = orgd[iwork[j]]; /* ------------------------------------------------------------ Release working arrays. ------------------------------------------------------------ */ mxFree(fwork); mxFree(orgd); mxFree(skip); mxFree(xlindx); mxFree(iwork); mxFree(xsuper); mxFree(snode); mxFree(perm); /* ------------------------------------------------------------ Copy requested output parameters (at least 1), release others. ------------------------------------------------------------ */ i = MAX(nlhs, 1); memcpy(plhs,myplhs, i * sizeof(mxArray *)); for(; i < NPAROUT; i++) mxDestroyArray(myplhs[i]); }