/* ----------------------------------------------- purpose -- to write the object to a matlab file return value -- 1 -- normal return -1 -- mtx is NULL -2 -- mtx is NULL -3 -- fp is NULL created -- 98may02, cca ----------------------------------------------- */ int DenseMtx_writeForMatlab ( DenseMtx *mtx, char *mtxname, FILE *fp ) { double *entries ; int inc1, inc2, irow, jcol, ncol, nrow ; int *colind, *rowind ; /* --------------- check the input --------------- */ if ( mtx == NULL ) { fprintf(stderr, "\n fatal error in DenseMtx_writeForMatlab()" "\n mtx is NULL\n") ; return(-1) ; } if ( mtxname == NULL ) { fprintf(stderr, "\n fatal error in DenseMtx_writeForMatlab()" "\n mtxname is NULL\n") ; return(-2) ; } if ( fp == NULL ) { fprintf(stderr, "\n fatal error in DenseMtx_writeForMatlab()" "\n fp is NULL\n") ; return(-3) ; } DenseMtx_rowIndices(mtx, &nrow, &rowind) ; DenseMtx_columnIndices(mtx, &ncol, &colind) ; DenseMtx_dimensions(mtx, &nrow, &ncol) ; inc1 = DenseMtx_rowIncrement(mtx) ; inc2 = DenseMtx_columnIncrement(mtx) ; entries = DenseMtx_entries(mtx) ; if ( DENSEMTX_IS_REAL(mtx) ) { for ( jcol = 0 ; jcol < ncol ; jcol++ ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fprintf(fp, "\n %s(%d,%d) = %24.16e ;", mtxname, rowind[irow]+1, colind[jcol]+1, entries[irow*inc1+jcol*inc2]) ; } } } else if ( DENSEMTX_IS_COMPLEX(mtx) ) { for ( jcol = 0 ; jcol < ncol ; jcol++ ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fprintf(fp, "\n %s(%d,%d) = %24.16e + %24.16e*i ;", mtxname, rowind[irow]+1, colind[jcol]+1, entries[2*(irow*inc1+jcol*inc2)], entries[2*(irow*inc1+jcol*inc2)+1]) ; } } } return(1) ; }
void spooles_solve(void *ptr, double *b, long neq) { /* rhs vector B * Note that there is only one rhs vector, thus * a bit simpler that the AllInOne example */ long size = neq; DenseMtx *mtxB,*mtxX; struct factorinfo *pfi_ = ptr; //printf(" Solving the system of equations using the symmetric spooles solver\n"); { int i; mtxB = DenseMtx_new(); DenseMtx_init(mtxB, SPOOLES_REAL, 0, 0, size, 1, 1, size); DenseMtx_zero(mtxB); for (i = 0; i < size; i++) { DenseMtx_setRealEntry(mtxB, i, 0, b[i]); } if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n rhs matrix in original ordering"); DenseMtx_writeForHumanEye(mtxB, msgFile); fflush(msgFile); } } #ifdef USE_MT //printf(" Using up to %d cpu(s) for spooles.\n\n", num_cpus); if (num_cpus > 1) { /* do not use the multithreaded solver unless * we have multiple threads - avoid the * locking overhead */ mtxX=fsolve_MT(pfi_, mtxB); } else { mtxX=fsolve(pfi_, mtxB); } #else //printf(" Using 1 cpu for spooles.\n\n"); mtxX=fsolve(pfi_, mtxB); #endif /* convert the result back to Calculix representation */ { int i; for (i = 0; i < size; i++) { b[i] = DenseMtx_entries(mtxX)[i]; } } /* cleanup */ DenseMtx_free(mtxX); }
/* ---------------------------------------------------- purpose -- to add a row of the matrix into a vector irow -- local row id vec -- double vector to supply the row entries created -- 98aug12, cca ---------------------------------------------------- */ void DenseMtx_addVectorIntoRow ( DenseMtx *mtx, int irow, double *vec ) { double *entries ; int inc1, inc2, jcol, jj, kk, nrow, ncol ; int *colind, *rowind ; /* --------------- check the input --------------- */ if ( mtx == NULL || irow < 0 || vec == NULL ) { fprintf(stderr, "\n fatal error in DenseMtx_addVectorIntoRow()" "\n bad input, mtx %p, irow %d, vec %p\n", mtx, irow, vec) ; spoolesFatal(); } DenseMtx_rowIndices(mtx, &nrow, &rowind) ; if ( irow >= nrow ) { fprintf(stderr, "\n fatal error in DenseMtx_addVectorIntoRow()" "\n irow = %d, nrow = %d\n", irow, nrow) ; spoolesFatal(); } DenseMtx_columnIndices(mtx, &ncol, &colind) ; inc1 = DenseMtx_rowIncrement(mtx) ; inc2 = DenseMtx_columnIncrement(mtx) ; entries = DenseMtx_entries(mtx) ; if ( DENSEMTX_IS_REAL(mtx) ) { for ( jcol = jj = 0, kk = irow*inc1 ; jcol < ncol ; jcol++, jj++, kk += inc2 ) { entries[kk] += vec[jj] ; } } else if ( DENSEMTX_IS_COMPLEX(mtx) ) { for ( jcol = jj = 0, kk = irow*inc1 ; jcol < ncol ; jcol++, jj++, kk += inc2 ) { entries[2*kk] += vec[2*jj] ; entries[2*kk+1] += vec[2*jj+1] ; } } return ; }
/* ----------------------------------- compute three checksums sums[0] = sum of row indices sums[1] = sum of columns indices sums[2] = sum of entry magnitudes created -- 98may16, cca ----------------------------------- */ void DenseMtx_checksums ( DenseMtx *mtx, double sums[] ) { double *entries ; int ii, ncol, nent, nrow ; int *colind, *rowind ; /* --------------- check the input --------------- */ if ( mtx == NULL || sums == NULL ) { fprintf(stderr, "\n fatal error in DenseMtx_checksums(%p,%p)" "\n bad input\n", mtx, sums) ; spoolesFatal(); } sums[0] = sums[1] = sums[2] = 0.0 ; DenseMtx_rowIndices(mtx, &nrow, &rowind) ; for ( ii = 0 ; ii < nrow ; ii++ ) { sums[0] += rowind[ii] ; } DenseMtx_columnIndices(mtx, &ncol, &colind) ; for ( ii = 0 ; ii < ncol ; ii++ ) { sums[1] += colind[ii] ; } entries = DenseMtx_entries(mtx) ; nent = nrow*ncol ; if ( DENSEMTX_IS_REAL(mtx) ) { for ( ii = 0 ; ii < nent ; ii++ ) { sums[2] += fabs(entries[ii]) ; } } else if ( DENSEMTX_IS_COMPLEX(mtx) ) { for ( ii = 0 ; ii < nent ; ii++ ) { sums[2] += Zabs(entries[2*ii], entries[2*ii+1]) ; } } return ; }
/* --------------------------------------------------- purpose -- move the solution from the individual SubMtx objects into the global solution SubMtx object created -- 98feb20 --------------------------------------------------- */ void FrontMtx_storeSolution ( FrontMtx *frontmtx, int owners[], int myid, SubMtxManager *manager, SubMtx *p_mtx[], DenseMtx *solmtx, int msglvl, FILE *msgFile ) { char localsol ; SubMtx *xmtxJ ; double *sol, *xJ ; int inc1, inc2, irow, jrhs, J, kk, ncolJ, neqns, nfront, nJ, nrhs, nrowInSol, nrowJ ; int *colindJ, *colmap, *rowind ; if ( (nrowInSol = solmtx->nrow) != (neqns = frontmtx->neqns) ) { /* -------------------------------------------------------------- the solution matrix is only part of the total solution matrix. (this happens in an MPI environment where the rhs is partitioned among the processors.) create a map from the global row indices to the indices local to this solution matrix. -------------------------------------------------------------- */ colmap = IVinit(neqns, -1) ; rowind = solmtx->rowind ; if ( msglvl > 1 ) { fprintf(msgFile, "\n solmtx->rowind") ; IVfprintf(msgFile, solmtx->nrow, rowind) ; fflush(msgFile) ; } for ( irow = 0 ; irow < nrowInSol ; irow++ ) { colmap[rowind[irow]] = irow ; } localsol = 'T' ; if ( msglvl > 1 ) { fprintf(msgFile, "\n colmap") ; IVfprintf(msgFile, neqns, colmap) ; fflush(msgFile) ; } } else { localsol = 'F' ; } DenseMtx_dimensions(solmtx, &neqns, &nrhs) ; nfront = FrontMtx_nfront(frontmtx) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (owners == NULL || owners[J] == myid) && (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; xmtxJ = p_mtx[J] ; if ( xmtxJ == NULL ) { fprintf(stderr, "\n fatal error in storeSolution(%d)" "\n thread %d, xmtxJ = NULL", J, myid) ; exit(-1) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n storing solution for front %d", J) ; SubMtx_writeForHumanEye(xmtxJ, msgFile) ; fflush(msgFile) ; } if ( localsol == 'T' ) { /* ------------------------------------------------------ map the global row indices into the local row indices ------------------------------------------------------ */ if ( msglvl > 1 ) { fprintf(msgFile, "\n global row indices") ; IVfprintf(msgFile, nJ, colindJ) ; fflush(msgFile) ; } for ( irow = 0 ; irow < nJ ; irow++ ) { colindJ[irow] = colmap[colindJ[irow]] ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n local row indices") ; IVfprintf(msgFile, nJ, colindJ) ; fflush(msgFile) ; } } /* ---------------------------------- store x_{J,*} into solution matrix ---------------------------------- */ sol = DenseMtx_entries(solmtx) ; SubMtx_denseInfo(xmtxJ, &nrowJ, &ncolJ, &inc1, &inc2, &xJ) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { for ( irow = 0 ; irow < nJ ; irow++ ) { kk = colindJ[irow] ; sol[kk] = xJ[irow] ; } sol += neqns ; xJ += nJ ; } } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { for ( irow = 0 ; irow < nJ ; irow++ ) { kk = colindJ[irow] ; sol[2*kk] = xJ[2*irow] ; sol[2*kk+1] = xJ[2*irow+1] ; } sol += 2*neqns ; xJ += 2*nJ ; } } /* fprintf(msgFile, "\n solution for front %d stored", J) ; */ SubMtxManager_releaseObject(manager, xmtxJ) ; if ( localsol == 'T' ) { /* ----------------------------------------------------------- map the local row indices back into the global row indices ----------------------------------------------------------- */ for ( irow = 0 ; irow < nJ ; irow++ ) { colindJ[irow] = rowind[colindJ[irow]] ; } } } } if ( localsol == 'T' ) { IVfree(colmap) ; } /* fprintf(msgFile, "\n\n SOLUTION") ; DenseMtx_writeForHumanEye(solmtx, msgFile) ; */ return ; }
PetscErrorCode MatSolve_SeqSpooles(Mat A,Vec b,Vec x) { Mat_Spooles *lu = (Mat_Spooles*)A->spptr; PetscScalar *array; DenseMtx *mtxY, *mtxX ; PetscErrorCode ierr; PetscInt irow,neqns=A->cmap->n,nrow=A->rmap->n,*iv; #if defined(PETSC_USE_COMPLEX) double x_real,x_imag; #else double *entX; #endif PetscFunctionBegin; mtxY = DenseMtx_new(); DenseMtx_init(mtxY, lu->options.typeflag, 0, 0, nrow, 1, 1, nrow); /* column major */ ierr = VecGetArray(b,&array);CHKERRQ(ierr); if (lu->options.useQR) { /* copy b to mtxY */ for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, irow, 0, *array++); #else DenseMtx_setComplexEntry(mtxY, irow, 0, PetscRealPart(array[irow]), PetscImaginaryPart(array[irow])); #endif } else { /* copy permuted b to mtxY */ iv = IV_entries(lu->oldToNewIV); for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++); #else DenseMtx_setComplexEntry(mtxY,*iv++,0,PetscRealPart(array[irow]),PetscImaginaryPart(array[irow])); #endif } ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); mtxX = DenseMtx_new(); DenseMtx_init(mtxX, lu->options.typeflag, 0, 0, neqns, 1, 1, neqns); if (lu->options.useQR) { FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n right hand side matrix after permutation");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution matrix in new ordering");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* permute solution into original ordering, then copy to x */ DenseMtx_permuteRows(mtxX, lu->newToOldIV); ierr = VecGetArray(x,&array);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) entX = DenseMtx_entries(mtxX); DVcopy(neqns, array, entX); #else for (irow=0; irow<nrow; irow++){ DenseMtx_complexEntry(mtxX,irow,0,&x_real,&x_imag); array[irow] = x_real+x_imag*PETSC_i; } #endif ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); /* free memory */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); PetscFunctionReturn(0); }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------------------ generate a random matrix and test a matrix-matrix multiply method. the output is a matlab file to test correctness. created -- 98jan29, cca -------------------------------------------------------------------- */ { DenseMtx *X, *Y, *Y2 ; double alpha[2] ; double alphaImag, alphaReal, t1, t2 ; double *zvec ; Drand *drand ; int col, dataType, ii, msglvl, ncolA, nitem, nops, nrhs, nrowA, nrowX, nrowY, nthread, row, seed, storageMode, symflag, transposeflag ; int *colids, *rowids ; InpMtx *A ; FILE *msgFile ; if ( argc != 15 ) { fprintf(stdout, "\n\n %% usage : %s msglvl msgFile symflag storageMode " "\n %% nrow ncol nent nrhs seed alphaReal alphaImag nthread" "\n %% msglvl -- message level" "\n %% msgFile -- message file" "\n %% dataType -- type of matrix entries" "\n %% 1 -- real" "\n %% 2 -- complex" "\n %% symflag -- symmetry flag" "\n %% 0 -- symmetric" "\n %% 1 -- hermitian" "\n %% 2 -- nonsymmetric" "\n %% storageMode -- storage mode" "\n %% 1 -- by rows" "\n %% 2 -- by columns" "\n %% 3 -- by chevrons, (requires nrow = ncol)" "\n %% transpose -- transpose flag" "\n %% 0 -- Y := Y + alpha * A * X" "\n %% 1 -- Y := Y + alpha * A^H * X, nonsymmetric only" "\n %% 2 -- Y := Y + alpha * A^T * X, nonsymmetric only" "\n %% nrowA -- number of rows in A" "\n %% ncolA -- number of columns in A" "\n %% nitem -- number of items" "\n %% nrhs -- number of right hand sides" "\n %% seed -- random number seed" "\n %% alphaReal -- y := y + alpha*A*x" "\n %% alphaImag -- y := y + alpha*A*x" "\n %% nthread -- # of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } dataType = atoi(argv[3]) ; symflag = atoi(argv[4]) ; storageMode = atoi(argv[5]) ; transposeflag = atoi(argv[6]) ; nrowA = atoi(argv[7]) ; ncolA = atoi(argv[8]) ; nitem = atoi(argv[9]) ; nrhs = atoi(argv[10]) ; seed = atoi(argv[11]) ; alphaReal = atof(argv[12]) ; alphaImag = atof(argv[13]) ; nthread = atoi(argv[14]) ; fprintf(msgFile, "\n %% %s " "\n %% msglvl -- %d" "\n %% msgFile -- %s" "\n %% dataType -- %d" "\n %% symflag -- %d" "\n %% storageMode -- %d" "\n %% transposeflag -- %d" "\n %% nrowA -- %d" "\n %% ncolA -- %d" "\n %% nitem -- %d" "\n %% nrhs -- %d" "\n %% seed -- %d" "\n %% alphaReal -- %e" "\n %% alphaImag -- %e" "\n %% nthread -- %d" "\n", argv[0], msglvl, argv[2], dataType, symflag, storageMode, transposeflag, nrowA, ncolA, nitem, nrhs, seed, alphaReal, alphaImag, nthread) ; fflush(msgFile) ; if ( dataType != 1 && dataType != 2 ) { fprintf(stderr, "\n invalid value %d for dataType\n", dataType) ; spoolesFatal(); } if ( symflag != 0 && symflag != 1 && symflag != 2 ) { fprintf(stderr, "\n invalid value %d for symflag\n", symflag) ; spoolesFatal(); } if ( storageMode != 1 && storageMode != 2 && storageMode != 3 ) { fprintf(stderr, "\n invalid value %d for storageMode\n", storageMode) ; spoolesFatal(); } if ( transposeflag < 0 || transposeflag > 2 ) { fprintf(stderr, "\n error, transposeflag = %d, must be 0, 1 or 2", transposeflag) ; spoolesFatal(); } if ( (transposeflag == 1 && symflag != 2) || (transposeflag == 2 && symflag != 2) ) { fprintf(stderr, "\n error, transposeflag = %d, symflag = %d", transposeflag, symflag) ; spoolesFatal(); } if ( transposeflag == 1 && dataType != 2 ) { fprintf(stderr, "\n error, transposeflag = %d, dataType = %d", transposeflag, dataType) ; spoolesFatal(); } if ( symflag == 1 && dataType != 2 ) { fprintf(stderr, "\n symflag = 1 (hermitian), dataType != 2 (complex)") ; spoolesFatal(); } if ( nrowA <= 0 || ncolA <= 0 || nitem <= 0 ) { fprintf(stderr, "\n invalid value: nrow = %d, ncol = %d, nitem = %d", nrowA, ncolA, nitem) ; spoolesFatal(); } if ( symflag < 2 && nrowA != ncolA ) { fprintf(stderr, "\n invalid data: symflag = %d, nrow = %d, ncol = %d", symflag, nrowA, ncolA) ; spoolesFatal(); } alpha[0] = alphaReal ; alpha[1] = alphaImag ; /* ---------------------------- initialize the matrix object ---------------------------- */ A = InpMtx_new() ; InpMtx_init(A, storageMode, dataType, 0, 0) ; drand = Drand_new() ; /* ---------------------------------- generate a vector of nitem triples ---------------------------------- */ rowids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, nrowA) ; Drand_fillIvector(drand, nitem, rowids) ; colids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, ncolA) ; Drand_fillIvector(drand, nitem, colids) ; Drand_setUniform(drand, 0.0, 1.0) ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { zvec = DVinit(nitem, 0.0) ; Drand_fillDvector(drand, nitem, zvec) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { zvec = ZVinit(nitem, 0.0, 0.0) ; Drand_fillDvector(drand, 2*nitem, zvec) ; } /* ----------------------------------- assemble the entries entry by entry ----------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n A = zeros(%d,%d) ;", nrowA, ncolA) ; } if ( symflag == 1 ) { /* ---------------- hermitian matrix ---------------- */ for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] == colids[ii] ) { zvec[2*ii+1] = 0.0 ; } if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } else if ( symflag == 0 ) { /* ---------------- symmetric matrix ---------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputRealEntry(A, row, col, zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } } else { /* ------------------- nonsymmetric matrix ------------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputRealEntry(A, rowids[ii], colids[ii], zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputComplexEntry(A, rowids[ii], colids[ii], zvec[2*ii], zvec[2*ii+1]) ; } } } InpMtx_changeStorageMode(A, INPMTX_BY_VECTORS) ; DVfree(zvec) ; if ( symflag == 0 || symflag == 1 ) { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 4*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 16*A->nent*nrhs ; } } else { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 2*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 8*A->nent*nrhs ; } } if ( msglvl > 1 ) { /* ------------------------------------------- write the assembled matrix to a matlab file ------------------------------------------- */ InpMtx_writeForMatlab(A, "A", msgFile) ; if ( symflag == 0 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = A(k,j) ;" "\n end" "\n end", nrowA, ncolA) ; } else if ( symflag == 1 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = ctranspose(A(k,j)) ;" "\n end" "\n end", nrowA, ncolA) ; } } /* ------------------------------- generate dense matrices X and Y ------------------------------- */ if ( transposeflag == 0 ) { nrowX = ncolA ; nrowY = nrowA ; } else { nrowX = nrowA ; nrowY = ncolA ; } X = DenseMtx_new() ; Y = DenseMtx_new() ; Y2 = DenseMtx_new() ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_REAL, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_COMPLEX, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, 2*nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, 2*nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(2*nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, nrhs) ; DenseMtx_writeForMatlab(X, "X", msgFile) ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowY, nrhs) ; DenseMtx_writeForMatlab(Y, "Y", msgFile) ; } /* -------------------------------------------- perform the matrix-matrix multiply in serial -------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_sym_mmm(A, Y, alpha, X) ; } else if ( symflag == 1 ) { InpMtx_herm_mmm(A, Y, alpha, X) ; } else if ( symflag == 2 ) { InpMtx_nonsym_mmm(A, Y, alpha, X) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_H(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_T(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f serial mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* -------------------------------------------------------- perform the matrix-matrix multiply in multithreaded mode -------------------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_MT_sym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 1 ) { InpMtx_MT_herm_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 2 ) { InpMtx_MT_nonsym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_H(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_T(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f MT mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(A) ; DenseMtx_free(X) ; DenseMtx_free(Y) ; DenseMtx_free(Y2) ; IVfree(rowids) ; IVfree(colids) ; Drand_free(drand) ; fclose(msgFile) ; return(1) ; }
/* ------------------------------------------------------------- purpose --- to compute a matrix-vector multiply y[] = C * x[] where C is the identity, A or B (depending on *pprbtype). *pnrows -- # of rows in x[] *pncols -- # of columns in x[] *pprbtype -- problem type *pprbtype = 1 --> vibration problem, matrix is A *pprbtype = 2 --> buckling problem, matrix is B *pprbtype = 3 --> matrix is identity, y[] = x[] x[] -- vector to be multiplied NOTE: the x[] vector is global, not a portion y[] -- product vector NOTE: the y[] vector is global, not a portion created -- 98aug28, cca & jcp ------------------------------------------------------------- */ void JimMatMulMPI ( int *pnrows, int *pncols, double x[], double y[], int *pprbtype, void *data ) { BridgeMPI *bridge = (BridgeMPI *) data ; int ncols, nent, nrows ; #if MYDEBUG > 0 double t1, t2 ; count_JimMatMul++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(bridge->msgFile) ; #endif nrows = *pnrows ; ncols = *pncols ; nent = nrows*ncols ; if ( *pprbtype == 3 ) { /* -------------------------- ... matrix is the identity -------------------------- */ DVcopy(nent, y, x) ; } else { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jcol, jj, kk, myid, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; /* --------------------------------------------- slide the owned rows of x[] down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { y[kk++] = x[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n inside JimMatMulMPI, calling MatMulMpi" "\n prbtype %d, nrows %d, ncols %d, nowned %d", *pprbtype, *pnrows, *pncols, nowned) ; fflush(bridge->msgFile) ; } MatMulMPI(&nowned, pncols, y, y, pprbtype, data) ; /* ------------------------------------------------- gather all the entries of y[] onto processor zero ------------------------------------------------- */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), y) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n mtx: %d rows x %d columns", mtx->nrow, mtx->ncol) ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n newmtx: %d rows x %d columns", newmtx->nrow, newmtx->ncol) ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { if ( mtx->nrow != neqns || mtx->ncol != ncols ) { fprintf(bridge->msgFile, "\n\n WHOA: mtx->nrows %d, mtx->ncols %d" ", neqns %d, ncols %d", mtx->nrow, mtx->ncol, neqns, ncols) ; exit(-1) ; } DVcopy(neqns*ncols, y, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ MPI_Bcast((void *) y, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n after the broadcast") ; fflush(bridge->msgFile) ; } } MPI_Barrier(bridge->comm) ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimMatMul += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(bridge->msgFile) ; #endif return ; }
/* -------------------------------------------------- purpose -- to solve a linear system (A - sigma*B) sol[] = rhs[] data -- pointer to bridge data object *pnrows -- # of rows in x[] and y[] *pncols -- # of columns in x[] and y[] rhs[] -- vector that holds right hand sides NOTE: the rhs[] vector is global, not a portion sol[] -- vector to hold solutions NOTE: the sol[] vector is global, not a portion note: rhs[] and sol[] can be the same array. on return, *perror holds an error code. created -- 98aug28, cca & jcp -------------------------------------------------- */ void JimSolveMPI ( int *pnrows, int *pncols, double rhs[], double sol[], void *data, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jj, jcol, kk, myid, ncols = *pncols, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; #if MYDEBUG > 0 double t1, t2 ; count_JimSolve++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(bridge->msgFile) ; #endif MPI_Barrier(bridge->comm) ; /* --------------------------------------------- slide the owned rows of rhs down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { sol[kk++] = rhs[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling SolveMPI()") ; fflush(bridge->msgFile) ; } SolveMPI(&nowned, pncols, sol, sol, data, perror) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from SolveMPI()") ; fflush(bridge->msgFile) ; } /* ------------------------------------------ gather all the entries onto processor zero ------------------------------------------ */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), sol) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling DenseMtx_split()()") ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from DenseMtx_split()()") ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { DVcopy(neqns*ncols, sol, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Bcast((void *) sol, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Barrier(bridge->comm) ; /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimSolve += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(bridge->msgFile) ; #endif return ; }
/*--------------------------------------------------------------------*/ static void InpMtx_MT_mmm ( int flag, InpMtx *A, DenseMtx *Y, double alpha[], DenseMtx *X, int nthread, int msglvl, FILE *msgFile ) { double t1, t2 ; int myid, nent, rc ; MTmvmObj *MTmvmObjs, *obj ; /* ------------------------------- set up the nthread data objects ------------------------------- */ MARKTIME(t1) ; MTmvmObjs = setup(A, Y, alpha, X, nthread) ; MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n %% CPU %8.3f : setup time", t2 - t1) ; } #if THREAD_TYPE == TT_POSIX { pthread_t *tids ; pthread_attr_t attr ; void *status ; /* ##### NOTE: for SGI machines, this command must be present ##### for the thread scheduling to be efficient. ##### this is NOT a POSIX call, but SGI needs it anyway pthread_setconcurrency(nthread) ; */ pthread_attr_init(&attr) ; /* pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ; */ pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ; ALLOCATE(tids, pthread_t, nthread) ; MARKTIME(t1) ; for ( myid = 0, obj = MTmvmObjs ; myid < nthread ; myid++, obj++ ) { switch ( flag ) { case NONSYM : rc = pthread_create(&tids[myid], &attr, worker_nonsym_mmm, obj) ; break ; case SYM : rc = pthread_create(&tids[myid], &attr, worker_sym_mmm, obj) ; break ; case HERM : rc = pthread_create(&tids[myid], &attr, worker_herm_mmm, obj) ; break ; case NONSYM_T : rc = pthread_create(&tids[myid], &attr, worker_nonsym_mmm_T, obj); break ; case NONSYM_H : rc = pthread_create(&tids[myid], &attr, worker_nonsym_mmm_H, obj); break ; } if ( rc != 0 ) { fprintf(stderr, "\n fatal error, myid = %d, rc = %d from pthread_create", myid, rc) ; exit(-1) ; } else if ( msglvl > 2 ) { fprintf(stderr, "\n %% thread %d created", myid) ; } } MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n %% CPU %8.3f : thread creation time", t2 - t1) ; } MARKTIME(t1) ; for ( myid = 0 ; myid < nthread ; myid++ ) { pthread_join(tids[myid], &status) ; } MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n %% CPU %8.3f : thread join time", t2 - t1) ; } FREE(tids) ; pthread_attr_destroy(&attr) ; } #endif /* ------------------------------------- accumulate the rhs hand side matrices ------------------------------------- */ MARKTIME(t1) ; nent = Y->nrow * Y->ncol ; for ( myid = 1, obj = MTmvmObjs + 1 ; myid < nthread ; myid++, obj++ ) { if ( INPMTX_IS_REAL_ENTRIES(A) ) { DVadd(nent, DenseMtx_entries(Y), DenseMtx_entries(obj->Y)) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { DVadd(2*nent, DenseMtx_entries(Y), DenseMtx_entries(obj->Y)) ; } } MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n %% CPU %8.3f : time to accumulate rhs", t2 - t1) ; } /* --------------------------- release the data structures --------------------------- */ MARKTIME(t1) ; for ( myid = 0, obj = MTmvmObjs ; myid < nthread ; myid++, obj++ ) { InpMtx_free(obj->A) ; if ( myid > 0 ) { DenseMtx_free(obj->Y) ; } } FREE(MTmvmObjs) ; MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n %% CPU %8.3f : time to release and free data", t2 - t1) ; } return ; }