static int DTPUMatMult(void* AA, double x[], double y[], int n){ dtpumat* A=(dtpumat*) AA; ffinteger ione=1,N=n; double BETA=0.0,ALPHA=1.0; double *AP=A->val,*Y=y,*X=x; char UPLO=A->UPLO; if (A->n != n) return 1; if (x==0 && n>0) return 3; dspmv(&UPLO,&N,&ALPHA,AP,X,&ione,&BETA,Y,&ione); return 0; }
static int DTPUMatInverseMult(void* AA, int indx[], int nind, double x[], double y[], int n){ dtpumat* A=(dtpumat*) AA; ffinteger ione=1,N=n; double BETA=0.0,ALPHA=1.0; double *AP=A->v2,*Y=y,*X=x; int i,ii; char UPLO=A->UPLO; if (A->n != n) return 1; if (x==0 && n>0) return 3; if (nind<n/4 ){ memset((void*)y,0,n*sizeof(double)); for (ii=0;ii<nind;ii++){ i=indx[ii]; ALPHA=x[i]; daddrow(AP,ALPHA,i,y,n); } } else { ALPHA=1.0; dspmv(&UPLO,&N,&ALPHA,AP,X,&ione,&BETA,Y,&ione); } return 0; }
void A_Stokes(Element_List **V, Bsystem **B, double *p, double *w, double **wk) { int eDIM = V[0]->flist[0]->dim(); int nel = B[0]->nel, Nbmodes; double **a = B[eDIM]->Gmat->a; int **bmap = B[eDIM]->bmap; double *sc = B[0]->signchange; register int i,j,k; dzero(B[eDIM]->nsolve,w,1); for(k = 0; k < nel; ++k) { Nbmodes = V[0]->flist[k]->Nbmodes; /* Put p boundary modes into wk[0] and impose continuity */ dgathr(eDIM*Nbmodes+1, p, bmap[k], wk[0]); for(j = 0; j < eDIM; ++j) dvmul (Nbmodes, sc, 1, wk[0] + j*Nbmodes, 1, wk[0] + j*Nbmodes,1); dspmv('U',eDIM*Nbmodes+1,1.0,a[V[0]->flist[k]->geom->id], wk[0],1,0.0,wk[1],1); /* do sign change back and put into w */ for(j = 0; j < eDIM; ++j) for(i = 0; i < Nbmodes; ++i) w[bmap[k][i+j*Nbmodes]] += sc[i]*wk[1][i+j*Nbmodes]; w[bmap[k][eDIM*Nbmodes]] += wk[1][eDIM*Nbmodes]; sc += Nbmodes; } /* set first pressure constant to zero */ if(B[eDIM]->singular) w[eDIM*B[0]->nsolve] = 0.0; }
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { mxArray *A, *B, *C, *X; int A_m, A_n, B_m, B_n; mxClassID class_id; size_t sizeofel; char diag = 'N'; char trans; char type, uplow; void *A_data, *B_data, *C_data, *temp_data; int j; /**********/ if (nrhs < 2) mexErrMsgIdAndTxt ("packed:mtimes:notEnoughInputs", \ "Not enough input parameters."); if (nrhs > 2) mexErrMsgIdAndTxt ("packed:mtimes:tooManyInputs", \ "Too many input parameters."); if (nlhs > 1) mexErrMsgIdAndTxt ("packed:mtimes:tooManyOutputs", \ "Too many output parameters."); A = prhs[0]; B = prhs[1]; /**********/ if (mx2IsEmpty(A) || mx2IsEmpty(B)) { plhs[0] = mxCreateNumericMatrix (0, 0, mxDOUBLE_CLASS, mxREAL); return; } else if (mx2IsScalar(A) || mx2IsScalar(B)) { int temp2; /*mxArray *temp1[] = {A, B};*/ mxArray *temp1[2]; temp1[0] = A; temp1[1] = B; temp2 = mexCallMATLAB (1, plhs, 2, temp1, "times"); if (temp2 != 0) mexErrMsgIdAndTxt ("packed:mtimes:badCall", \ "Error calling times."); return; } check_input (A, B); /**********/ /* BLAS makes available only packed pre-multiplication, i.e., * A * B, where A must be packed. * If A is not packed and B is packed, we do (B' * A')'. */ if (mx2IsPacked(A)) trans = 'N'; else { trans = 'T'; { mxArray *temp1; int temp2; temp2 = mexCallMATLAB (1, &temp1, 1, &A, "transpose"); if (temp2 != 0) mexErrMsgIdAndTxt ("packed:mtimes:badCall", \ "Error calling transpose."); A = temp1; } { mxArray *temp; temp = A; A = B; B = temp; } } mxAssert (mx2IsPacked(A), ""); class_id = mx2GetClassID (A); sizeofel = mx2GetElementSize (A); A_m = mx2GetN (A); A_n = mx2GetN (A); B_m = mx2GetM (B); B_n = mx2GetN (B); mxAssert (A_m == A_n, ""); mxAssert (A_n == B_m, ""); /**********/ if (mx2IsSparse(B) || mx2IsPacked(B)) { int temp2; temp2 = mexCallMATLAB (1, &C, 1, &B, "full"); if (temp2 != 0) mexErrMsgIdAndTxt ("packed:mtimes:badCall", \ "Error calling full."); C_data = mx2GetData (C); B_data = C_data; /* We need B in full. Please note that making B full * is not a waste of memory -- it is exactly the * amount of memory needed to hold C, and we are * going to overwrite B full with C. */ } else { B_data = mx2GetData (B); C_data = mxMalloc (A_m * B_n * sizeofel); C = mxCreateNumericMatrix (0, 0, class_id, mxREAL); mxSetM (C, A_m); mxSetN (C, B_n); mxSetData (C, C_data); } A_data = mx2GetData (A); temp_data = mxMalloc (B_m * sizeofel); /**********/ type = mx2GetPackedType (A); uplow = mx2GetPackedUplow (A); for (j=0; j<B_n; j++) { /* for each column of B */ void *B_col, *C_col, *temp_col; int one_int = 1; /*int temp[] = {0, j};*/ int temp[2]; temp[0] = 0; temp[1] = j; B_col = (void *) (((char *)B_data) + (j*B_m)*sizeofel); C_col = (void *) (((char *)C_data) + (j*B_m)*sizeofel); if (type == 't') { /* xTPMV's vector input is in C_col * xTPMV's vector output is in C_col, too. */ if (C_col != B_col) memcpy (C_col, B_col, B_m * sizeofel); if (class_id == mxSINGLE_CLASS) { single zero = 0.0F; single one = 1.0F; stpmv (&uplow, &trans, &diag, &A_n, A_data, \ C_col, &one_int); } else if (class_id == mxDOUBLE_CLASS) { double zero = 0.0; double one = 1.0; dtpmv (&uplow, &trans, &diag, &A_n, A_data, \ C_col, &one_int); } } else if (type == 's') { /* xSPMV's vector input is in B_col * * xSPMV's vector output is in temp_col */ if (C_col == B_col) temp_col = temp_data; else temp_col = C_col; if (class_id == mxSINGLE_CLASS) { single zero = 0.0F; single one = 1.0F; sspmv (&uplow, &A_n, &one, A_data, \ B_col, &one_int, &zero, temp_col, &one_int); } else if (class_id == mxDOUBLE_CLASS) { double zero = 0.0; double one = 1.0; dspmv (&uplow, &A_n, &one, A_data, \ B_col, &one_int, &zero, temp_col, &one_int); } if (C_col == B_col) memcpy (C_col, temp_col, B_m * sizeofel); } else { mexErrMsgIdAndTxt ("packed:mtimes:unknownMatrixType", \ "Unknown matrix type."); } } mxFree(temp_data); /**********/ if (mx2IsPacked(A) && mx2IsPacked(B)) { char A_type = mx2GetPackedType (A); char A_uplow = mx2GetPackedUplow (A); char B_type = mx2GetPackedType (B); char B_uplow = mx2GetPackedUplow (B); if ( (A_type == 't' && B_type == 't' && A_uplow == B_uplow) || (A_type == 's' && B_type == 's') ) C = create_packed (C, A_type, A_uplow); } if (trans == 'T') { mxArray *temp1; int temp2; temp2 = mexCallMATLAB (1, &temp1, 1, &C, "transpose"); if (temp2 != 0) mexErrMsgIdAndTxt ("packed:mtimes:badCall", \ "Error calling transpose."); C = temp1; } plhs[0] = C; }
static void setupRHS (Element_List **V, Element_List **Vf,double *rhs, double *u0, Bndry **Vbc, Bsystem **Vbsys){ register int i,k; int N,nbl; int eDIM = V[0]->flist[0]->dim(); Bsystem *PB = Vbsys[eDIM],*B = Vbsys[0]; int nel = B->nel,info; int **ipiv = B->Gmat->cipiv; double **binvc = B->Gmat->binvc; double **invc = B->Gmat->invc; double ***dbinvc = B->Gmat->dbinvc; double **p_binvc = PB->Gmat->binvc; Element *E,*E1; Bndry *Ebc; double *tmp; if(eDIM == 2) tmp = dvector(0,max(8*LGmax,(LGmax-2)*(LGmax-2))); else tmp = dvector(0,18*LGmax*LGmax); B = Vbsys[0]; PB = Vbsys[eDIM]; #ifdef __LIBCATAMOUNT__ st1 = dclock(); #else st1 = clock(); #endif /* save initial condition */ saveinit(V,u0,Vbsys); Timing1("saveinit.........."); /* take inner product if in physical space */ for(i = 0; i < eDIM; ++i){ if(Vf[i]->fhead->state == 'p') Vf[i]->Iprod(Vf[i]); } /* zero pressure field */ dzero(Vf[eDIM]->hjtot,Vf[eDIM]->base_hj,1); Timing1("zeroing..........."); /* condense out interior from u-vel + p */ for(i = 0; i < eDIM; ++i) for(E=Vf[i]->fhead;E;E=E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; if(N) dgemv('T', N, nbl, -1., binvc[E->geom->id], N, E->vert->hj+nbl, 1, 1., E->vert->hj,1); } Timing1("first condense(v)."); for(i = 0; i < eDIM; ++i) for(E=Vf[i]->fhead;E;E=E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; if(N) { E1 = Vf[eDIM]->flist[E->id]; if(B->lambda->wave){ dcopy(N,E->vert->hj+nbl,1,tmp,1); dgetrs('N', N, 1, invc[E->geom->id], N,ipiv[E->geom->id],tmp,N,info); dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, tmp, 1, 1., E1->vert->hj,1); } else{ dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, E->vert->hj+nbl, 1, 1., E1->vert->hj,1); } } } Timing1("first condense(p)."); /* add flux terms */ for(i = 0; i < eDIM; ++i) for(Ebc = Vbc[i]; Ebc; Ebc = Ebc->next) if(Ebc->type == 'F' || Ebc->type == 'R') Vf[i]->flist[Ebc->elmt->id]->Add_flux_terms(Ebc); /* second level of factorisation to orthogonalise basis to p */ for(E=Vf[eDIM]->fhead;E;E=E->next){ E1 = Vf[0]->flist[E->id]; nbl = eDIM*E1->Nbmodes + 1; N = E->Nmodes-1; dgemv('T', N, nbl, -1.0, p_binvc[E->geom->id], N, E->vert->hj+1, 1, 0.0, tmp,1); for(i = 0; i < eDIM; ++i){ E1 = Vf[i]->flist[E->id]; dvadd(E1->Nbmodes,tmp+i*E1->Nbmodes,1,E1->vert->hj,1,E1->vert->hj,1); } E->vert->hj[0] += tmp[nbl-1]; } Timing1("second condense..."); /* subtract boundary initial conditions */ if(PB->smeth == iterative){ double **wk; double **a = PB->Gmat->a; if(eDIM == 2) wk = dmatrix(0,1,0,eDIM*4*LGmax); else wk = dmatrix(0,1,0,eDIM*6*LGmax*LGmax); for(k = 0; k < nel; ++k){ nbl = V[0]->flist[k]->Nbmodes; /* gather vector */ for(i = 0; i < eDIM; ++i) dcopy(nbl,V[i]->flist[k]->vert->hj,1,wk[0]+i*nbl,1); dspmv('U',eDIM*nbl+1,1.0,a[V[0]->flist[k]->geom->id], wk[0],1,0.0,wk[1],1); /* subtract of Vf */ for(i = 0; i < eDIM; ++i) dvsub(nbl,Vf[i]->flist[k]->vert->hj,1,wk[1]+i*nbl,1, Vf[i]->flist[k]->vert->hj,1); Vf[eDIM]->flist[k]->vert->hj[0] -= wk[1][eDIM*nbl]; } GathrBndry_Stokes(Vf,rhs,Vbsys); free_dmatrix(wk,0,0); } else{ if(Vbc[0]->DirRHS){ GathrBndry_Stokes(Vf,rhs,Vbsys); /* subtract of bcs */ dvsub(PB->nsolve,rhs,1,Vbc[0]->DirRHS,1,rhs,1); /* zero ic vector */ dzero(PB->nsolve,u0,1); } else{ /* zero out interior components since only deal with boundary initial conditions (interior is always direct) */ for(i = 0; i < eDIM; ++i) for(E = V[i]->fhead; E; E = E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; dzero(N, E->vert->hj + nbl, 1); } /* inner product of divergence for pressure forcing */ for(i = 0; i < eDIM; ++i) V[i]->Trans(V[i], J_to_Q); V[0]->Grad(V[eDIM],0,0,'x'); V[1]->Grad(0,Vf[eDIM],0,'y'); dvadd(V[1]->htot,V[eDIM]->base_h,1,Vf[eDIM]->base_h,1, V[eDIM]->base_h,1); if(eDIM == 3){ V[2]->Grad(0,V[eDIM],0,'z'); dvadd(V[2]->htot,V[eDIM]->base_h,1,Vf[eDIM]->base_h,1, V[eDIM]->base_h,1); } #ifndef PCONTBASE for(k = 0; k < nel; ++k) V[eDIM]->flist[k]->Ofwd(*V[eDIM]->flist[k]->h, V[eDIM]->flist[k]->vert->hj, V[eDIM]->flist[k]->dgL); #else V[eDIM]->Iprod(V[eDIM]); #endif for(i = 0; i < eDIM; ++i){ for(k = 0; k < nel; ++k){ E = V[i]->flist[k]; nbl = E->Nbmodes; N = E->Nmodes - nbl; E->HelmHoltz(PB->lambda+k); dscal(E->Nmodes, -B->lambda[k].d, E->vert->hj, 1); if(N) { /* condense out interior terms in velocity */ dgemv('T', N, nbl, -1., binvc[E->geom->id], N, E->vert->hj+nbl, 1, 1., E->vert->hj,1); /* condense out interior terms in pressure*/ E1 = V[eDIM]->flist[k]; if(B->lambda->wave){ dcopy(N,E->vert->hj+nbl,1,tmp,1); dgetrs('N',N,1,invc[E->geom->id],N,ipiv[E->geom->id],tmp,N,info); dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, tmp, 1, 1., E1->vert->hj,1); } else{ dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, E->vert->hj+nbl, 1, 1., E1->vert->hj,1); } } } } /* second level of factorisation to orthogonalise basis to p */ /* p - vel */ for(E=V[eDIM]->fhead;E;E=E->next){ E1 = V[0]->flist[E->id]; nbl = eDIM*E1->Nbmodes + 1; N = E->Nmodes-1; dgemv('T', N, nbl, -1.0, p_binvc[E->geom->id], N, E->vert->hj+1, 1, 0.0, tmp,1); for(i = 0; i < eDIM; ++i){ E1 = V[i]->flist[E->id]; dvadd(E1->Nbmodes,tmp+i*E1->Nbmodes,1,E1->vert->hj,1,E1->vert->hj,1); dvadd(E1->Nbmodes,E1->vert->hj,1,Vf[i]->flist[E->id]->vert->hj,1, Vf[i]->flist[E->id]->vert->hj,1); } Vf[eDIM]->flist[E->id]->vert->hj[0] += E->vert->hj[0] + tmp[nbl-1]; } Timing1("bc condense......."); GathrBndry_Stokes(Vf,rhs,Vbsys); Timing1("GatherBndry......."); } } /* finally copy inner product of f into v for inner solve */ for(i = 0; i < eDIM; ++i) for(E = V[i]->fhead; E; E= E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; E1 = Vf[i]->flist[E->id]; dcopy(N, E1->vert->hj+nbl, 1, E->vert->hj+nbl, 1); } for(E = Vf[eDIM]->fhead; E; E = E->next){ E1 = V[eDIM]->flist[E->id]; dcopy(E->Nmodes,E->vert->hj,1,E1->vert->hj,1); } dzero(PB->nglobal-PB->nsolve, rhs + PB->nsolve, 1); free(tmp); }
static void divergence_free_init(Element_List **V, double *u0, double *r, Bsystem **B, double **wk){ register int i,j,k,cnt; int eDIM = V[0]->fhead->dim(); int asize,info,Nbmodes; int **bmap = B[eDIM]->bmap; int nel = B[eDIM]->nel; int nglobal = B[eDIM]->nglobal; int nsolve = B[eDIM]->nsolve; int vsolve = B[0]->nsolve; double **a = B[eDIM]->Gmat->a,*x,*b,*sc,*sc1; static double *invb; x = dvector(0,nel-1); /* form and invert B' B system */ if(!invb){ invb = dvector(0,nel*(nel+1)/2-1); sc = B[0]->signchange; for(cnt = 0,k = 0; k < nel; ++k){ /* gather B from local systems */ Nbmodes = V[0]->flist[k]->Nbmodes; asize = Nbmodes*eDIM+1; b = a[V[0]->flist[k]->geom->id] + asize*(asize-1)/2; dzero(nglobal,u0,1); for(j = 0; j < eDIM; ++j){ for(i = 0; i < Nbmodes; ++i) u0[bmap[k][i+j*Nbmodes]] += sc[i]*b[i+j*Nbmodes]; } dzero(nglobal-nsolve,u0+nsolve,1); /* take inner product with B' */ sc1 = B[0]->signchange; for(j = k; j < nel; ++j,cnt++){ dzero(asize,wk[0],1); dgathr(asize-1, u0, bmap[j], wk[0]); for(i = 0; i < eDIM; ++i) dvmul (Nbmodes, sc1, 1, wk[0]+i*Nbmodes, 1, wk[0]+i*Nbmodes, 1); Nbmodes = V[0]->flist[j]->Nbmodes; asize = Nbmodes*eDIM+1; b = a[V[0]->flist[j]->geom->id] + asize*(asize-1)/2; invb[cnt] = ddot(asize-1,b,1,wk[0],1); sc1 += Nbmodes; } sc += V[0]->flist[k]->Nbmodes; } /* take out first row to deal with singularity */ if(B[eDIM]->singular) dzero(nel,invb,1); invb[0] = 1.0; dpptrf('L', nel, invb, info); } /* solve B' B x = r */ dcopy (nel,r+eDIM*vsolve,1,x,1); dpptrs('L', nel, 1, invb, x, nel, info); dzero(B[eDIM]->nglobal,u0,1); /* generate initial vector as u0 = B x */ sc = B[0]->signchange; for(k = 0; k < nel; ++k){ Nbmodes = V[0]->flist[k]->Nbmodes; asize = Nbmodes*eDIM+1; b = a[V[0]->flist[k]->geom->id] + asize*(asize-1)/2; dsmul(asize-1,x[k],b,1,wk[0],1); for(j = 0; j < eDIM; ++j){ for(i = 0; i < Nbmodes; ++i) u0[bmap[k][i+j*Nbmodes]] += sc[i]*wk[0][i+j*Nbmodes]; } sc += Nbmodes; } dzero(nglobal-eDIM*vsolve, u0 + eDIM*vsolve, 1); /* subtract off a*u0 from r */ sc = B[0]->signchange; for(k = 0; k < nel; ++k){ Nbmodes = V[0]->flist[k]->Nbmodes; dzero(eDIM*Nbmodes+1,wk[0],1); dgathr(eDIM*Nbmodes+1, u0, bmap[k], wk[0]); for(j = 0; j < eDIM; ++j) dvmul (Nbmodes, sc, 1, wk[0] + j*Nbmodes, 1, wk[0] + j*Nbmodes,1); dspmv('U',eDIM*Nbmodes+1,1.0,a[V[0]->flist[k]->geom->id], wk[0],1,0.0,wk[1],1); for(j = 0; j < eDIM; ++j) for(i = 0; i < Nbmodes; ++i) r[bmap[k][i+j*Nbmodes]] -= sc[i]*wk[1][i+j*Nbmodes]; r[bmap[k][eDIM*Nbmodes]] -= wk[1][eDIM*Nbmodes]; sc += Nbmodes; } r[eDIM*vsolve] = 0.0; dzero(nglobal-nsolve, r + nsolve, 1); free(x); }