/* -------------------------------------------------------------------- sort the entries in ivec1[] and ivec2[] into lexicographic order, using dvec[] as a companion vector. then compress out the duplicates and sum the same values of dvec[] return value -- # of compressed entries created -- 97dec18, cca -------------------------------------------------------------------- */ int IV2DVsortUpAndCompress ( int n, int ivec1[], int ivec2[], double dvec[] ) { int first, ierr, ii, key, length, start ; /* --------------- check the input --------------- */ if ( n < 0 || ivec1 == NULL || ivec2 == NULL || dvec == NULL ) { fprintf(stderr, "\n fatal error in IV2DVsortAndCompress(%d,%p,%p,%p)" "\n bad input, n = %d, ivec1 = %p, ivec2 = %p, dvec = %p", n, ivec1, ivec2, dvec, n, ivec1, ivec2, dvec) ; spoolesFatal(); } if ( n == 0 ) { return(0) ; } /* --------------------------------------- sort ivec1[] in ascending order with ivec2[] and dvec[] as companion vectors --------------------------------------- */ IV2DVqsortUp(n, ivec1, ivec2, dvec) ; #if MYDEBUG > 0 fprintf(stdout, "\n ivec1[] after sort up") ; IVfp80(stdout, n, ivec1, 80, &ierr) ; fprintf(stdout, "\n ivec2[] after sort up") ; IVfp80(stdout, n, ivec2, 80, &ierr) ; fprintf(stdout, "\n dvec[] after sort up") ; DVfprintf(stdout, n, dvec) ; #endif first = start = 0 ; key = ivec1[0] ; for ( ii = 1 ; ii < n ; ii++ ) { if ( key != ivec1[ii] ) { length = IVDVsortUpAndCompress(ii - start, ivec2 + start, dvec + start) ; IVfill(length, ivec1 + first, key) ; IVcopy(length, ivec2 + first, ivec2 + start) ; DVcopy(length, dvec + first, dvec + start) ; start = ii ; first += length ; key = ivec1[ii] ; } } length = IVDVsortUpAndCompress(n - start, ivec2 + start, dvec + start) ; IVfill(length, ivec1 + first, key) ; IVcopy(length, ivec2 + first, ivec2 + start) ; DVcopy(length, dvec + first, dvec + start) ; first += length ; return(first) ; }
/* -------------------------------------------------------------------- sort the entries in ivec1[] and ivec2[] into lexicographic order, using zvec[] as a companion vector. then compress out the duplicates and sum the same values of zvec[] return value -- # of compressed entries created -- 98jan28, cca -------------------------------------------------------------------- */ int IV2ZVsortUpAndCompress ( int n, int ivec1[], int ivec2[], double zvec[] ) { int first, ierr, ii, key, length, start ; /* --------------- check the input --------------- */ if ( n < 0 || ivec1 == NULL || ivec2 == NULL || zvec == NULL ) { fprintf(stderr, "\n fatal error in IV2ZVsortAndCompress(%d,%p,%p,%p)" "\n bad input, n = %d, ivec1 = %p, ivec2 = %p, zvec = %p", n, ivec1, ivec2, zvec, n, ivec1, ivec2, zvec) ; spoolesFatal(); } if ( n == 0 ) { return(0) ; } /* --------------------------------------- sort ivec1[] in ascending order with ivec2[] and zvec[] as companion vectors --------------------------------------- */ IV2ZVqsortUp(n, ivec1, ivec2, zvec) ; #if MYDEBUG > 0 fprintf(stdout, "\n\n after IV2ZVqsortUp") ; for ( ii = 0 ; ii < n ; ii++ ) { fprintf(stdout, "\n < %12d, %12d, %12.4e, %12.4e >", ivec1[ii], ivec2[ii], zvec[2*ii], zvec[2*ii+1]) ; } #endif first = start = 0 ; key = ivec1[0] ; for ( ii = 1 ; ii < n ; ii++ ) { if ( key != ivec1[ii] ) { length = IVZVsortUpAndCompress(ii - start, ivec2 + start, zvec + 2*start) ; IVfill(length, ivec1 + first, key) ; IVcopy(length, ivec2 + first, ivec2 + start) ; DVcopy(2*length, zvec + 2*first, zvec + 2*start) ; start = ii ; first += length ; key = ivec1[ii] ; } } length = IVZVsortUpAndCompress(n - start, ivec2 + start, zvec + 2*start) ; IVfill(length, ivec1 + first, key) ; IVcopy(length, ivec2 + first, ivec2 + start) ; DVcopy(2*length, zvec + 2*first, zvec + 2*start) ; first += length ; return(first) ; }
/* -------------------------------------------------------------------- inputComplex a number of (row,column, entry) triples into the matrix created -- 98jan28, cca -------------------------------------------------------------------- */ static void inputTriples ( InpMtx *inpmtx, int ntriples, int rowids[], int colids[], double entries[] ) { int nent ; int *ivec1, *ivec2 ; prepareToAddNewEntries(inpmtx, ntriples) ; nent = inpmtx->nent ; ivec1 = IV_entries(&inpmtx->ivec1IV) ; ivec2 = IV_entries(&inpmtx->ivec2IV) ; IVcopy(ntriples, ivec1 + nent, rowids) ; IVcopy(ntriples, ivec2 + nent, colids) ; IV_setSize(&inpmtx->ivec1IV, nent + ntriples) ; IV_setSize(&inpmtx->ivec2IV, nent + ntriples) ; if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) ; DVcopy(ntriples, dvec + nent, entries) ; DV_setSize(&inpmtx->dvecDV, nent + ntriples) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) ; ZVcopy(ntriples, dvec + 2*nent, entries) ; DV_setSize(&inpmtx->dvecDV, 2*(nent + ntriples)) ; } inpmtx->nent += ntriples ; inpmtx->storageMode = INPMTX_RAW_DATA ; return ; }
/* ----------------------------- input a chevron in the matrix created -- 98jan28, cca ----------------------------- */ static void inputChevron ( InpMtx *inpmtx, int chv, int chvsize, int chvind[], double chvent[] ) { int col, ii, jj, nent, offset, row ; int *ivec1, *ivec2 ; prepareToAddNewEntries(inpmtx, chvsize) ; nent = inpmtx->nent ; ivec1 = IV_entries(&inpmtx->ivec1IV) ; ivec2 = IV_entries(&inpmtx->ivec2IV) ; if ( INPMTX_IS_BY_ROWS(inpmtx) ) { for ( ii = 0, jj = nent ; ii < chvsize ; ii++, jj++ ) { if ( (offset = chvind[ii]) >= 0 ) { row = chv ; col = chv + offset ; } else { col = chv ; row = chv - offset ; } ivec1[jj] = row ; ivec2[jj] = col ; } } else if ( INPMTX_IS_BY_COLUMNS(inpmtx) ) { for ( ii = 0, jj = nent ; ii < chvsize ; ii++, jj++ ) { if ( (offset = chvind[ii]) >= 0 ) { row = chv ; col = chv + offset ; } else { col = chv ; row = chv - offset ; } ivec1[jj] = col ; ivec2[jj] = row ; } } else if ( INPMTX_IS_BY_CHEVRONS(inpmtx) ) { IVfill(chvsize, ivec1 + nent, chv) ; IVcopy(chvsize, ivec2 + nent, chvind) ; } IV_setSize(&inpmtx->ivec1IV, nent + chvsize) ; IV_setSize(&inpmtx->ivec2IV, nent + chvsize) ; if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + nent ; DVcopy(chvsize, dvec, chvent) ; DV_setSize(&inpmtx->dvecDV, nent + chvsize) ; } else if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + 2*nent ; ZVcopy(chvsize, dvec, chvent) ; DV_setSize(&inpmtx->dvecDV, 2*(nent + chvsize)) ; } inpmtx->nent += chvsize ; inpmtx->storageMode = INPMTX_RAW_DATA ; return ; }
/* --------------------------------- input a row in the matrix created -- 98jan28, cca --------------------------------- */ static void inputRow ( InpMtx *inpmtx, int row, int rowsize, int rowind[], double rowent[] ) { int col, ii, jj, nent ; int *ivec1, *ivec2 ; prepareToAddNewEntries(inpmtx, rowsize) ; nent = inpmtx->nent ; ivec1 = IV_entries(&inpmtx->ivec1IV) ; ivec2 = IV_entries(&inpmtx->ivec2IV) ; if ( INPMTX_IS_BY_ROWS(inpmtx) ) { /* row coordinates */ IVfill(rowsize, ivec1 + nent, row) ; IVcopy(rowsize, ivec2 + nent, rowind) ; } else if ( INPMTX_IS_BY_COLUMNS(inpmtx) ) { /* column coordinates */ IVfill(rowsize, ivec2 + nent, row) ; IVcopy(rowsize, ivec1 + nent, rowind) ; } else if ( INPMTX_IS_BY_CHEVRONS(inpmtx) ) { /* chevron coordinates */ for ( ii = 0, jj = nent ; ii < rowsize ; ii++, jj++ ) { col = rowind[ii] ; ivec1[ii] = (row <= col) ? row : col ; ivec2[ii] = col - row ; } } IV_setSize(&inpmtx->ivec1IV, nent + rowsize) ; IV_setSize(&inpmtx->ivec2IV, nent + rowsize) ; /* ----------------- input the entries ----------------- */ if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) ; DVcopy(rowsize, dvec + nent, rowent) ; DV_setSize(&inpmtx->dvecDV, nent + rowsize) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) ; ZVcopy(rowsize, dvec + 2*nent, rowent) ; DV_setSize(&inpmtx->dvecDV, 2*(nent + rowsize)) ; } inpmtx->storageMode = INPMTX_RAW_DATA ; inpmtx->nent += rowsize ; return ; }
/* ------------------------------------ input a complex column in the matrix created -- 98jan28, cca ------------------------------------ */ static void inputColumn ( InpMtx *inpmtx, int col, int colsize, int colind[], double colent[] ) { int ii, jj, nent, row ; int *ivec1, *ivec2 ; prepareToAddNewEntries(inpmtx, colsize) ; nent = inpmtx->nent ; ivec1 = IV_entries(&inpmtx->ivec1IV) ; ivec2 = IV_entries(&inpmtx->ivec2IV) ; if ( INPMTX_IS_BY_ROWS(inpmtx) ) { IVcopy(colsize, ivec1 + nent, colind) ; IVfill(colsize, ivec2 + nent, col) ; } else if ( INPMTX_IS_BY_COLUMNS(inpmtx) ) { IVfill(colsize, ivec1 + nent, col) ; IVcopy(colsize, ivec2 + nent, colind) ; } else if ( INPMTX_IS_BY_CHEVRONS(inpmtx) ) { for ( ii = 0, jj = nent ; ii < colsize ; ii++, jj++ ) { row = colind[jj] ; ivec1[jj] = (row <= col) ? row : col ; ivec2[jj] = col - row ; } } IV_setSize(&inpmtx->ivec1IV, nent + colsize) ; IV_setSize(&inpmtx->ivec2IV, nent + colsize) ; if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + nent ; DVcopy(colsize, dvec, colent) ; DV_setSize(&inpmtx->dvecDV, nent + colsize) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + 2*nent ; ZVcopy(colsize, dvec, colent) ; DV_setSize(&inpmtx->dvecDV, 2*(nent + colsize)) ; } inpmtx->nent = nent + colsize ; inpmtx->storageMode = INPMTX_RAW_DATA ; return ; }
/* ------------------------------ purpose -- to permute a vector y[*] := y[index[*]] created -- 95sep22, cca ------------------------------ */ void DVperm ( int size, double y[], int index[] ) { if ( size > 0 ) { if ( y == NULL || index == NULL ) { fprintf(stderr, "\n fatal error in DVperm, invalid data" "\n size = %d, y = %p, index = %p\n", size, y, index) ; exit(-1) ; } else { double *x ; int i ; x = DVinit2(size) ; DVcopy(size, x, y) ; for ( i = 0 ; i < size ; i++ ) { y[i] = x[index[i]] ; } DVfree(x) ; } } return ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------------------ generate a random matrix and test a matrix-matrix multiply method. the output is a matlab file to test correctness. created -- 98jan29, cca -------------------------------------------------------------------- */ { DenseMtx *X, *Y, *Y2 ; double alpha[2] ; double alphaImag, alphaReal, t1, t2 ; double *zvec ; Drand *drand ; int col, dataType, ii, msglvl, ncolA, nitem, nops, nrhs, nrowA, nrowX, nrowY, nthread, row, seed, storageMode, symflag, transposeflag ; int *colids, *rowids ; InpMtx *A ; FILE *msgFile ; if ( argc != 15 ) { fprintf(stdout, "\n\n %% usage : %s msglvl msgFile symflag storageMode " "\n %% nrow ncol nent nrhs seed alphaReal alphaImag nthread" "\n %% msglvl -- message level" "\n %% msgFile -- message file" "\n %% dataType -- type of matrix entries" "\n %% 1 -- real" "\n %% 2 -- complex" "\n %% symflag -- symmetry flag" "\n %% 0 -- symmetric" "\n %% 1 -- hermitian" "\n %% 2 -- nonsymmetric" "\n %% storageMode -- storage mode" "\n %% 1 -- by rows" "\n %% 2 -- by columns" "\n %% 3 -- by chevrons, (requires nrow = ncol)" "\n %% transpose -- transpose flag" "\n %% 0 -- Y := Y + alpha * A * X" "\n %% 1 -- Y := Y + alpha * A^H * X, nonsymmetric only" "\n %% 2 -- Y := Y + alpha * A^T * X, nonsymmetric only" "\n %% nrowA -- number of rows in A" "\n %% ncolA -- number of columns in A" "\n %% nitem -- number of items" "\n %% nrhs -- number of right hand sides" "\n %% seed -- random number seed" "\n %% alphaReal -- y := y + alpha*A*x" "\n %% alphaImag -- y := y + alpha*A*x" "\n %% nthread -- # of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } dataType = atoi(argv[3]) ; symflag = atoi(argv[4]) ; storageMode = atoi(argv[5]) ; transposeflag = atoi(argv[6]) ; nrowA = atoi(argv[7]) ; ncolA = atoi(argv[8]) ; nitem = atoi(argv[9]) ; nrhs = atoi(argv[10]) ; seed = atoi(argv[11]) ; alphaReal = atof(argv[12]) ; alphaImag = atof(argv[13]) ; nthread = atoi(argv[14]) ; fprintf(msgFile, "\n %% %s " "\n %% msglvl -- %d" "\n %% msgFile -- %s" "\n %% dataType -- %d" "\n %% symflag -- %d" "\n %% storageMode -- %d" "\n %% transposeflag -- %d" "\n %% nrowA -- %d" "\n %% ncolA -- %d" "\n %% nitem -- %d" "\n %% nrhs -- %d" "\n %% seed -- %d" "\n %% alphaReal -- %e" "\n %% alphaImag -- %e" "\n %% nthread -- %d" "\n", argv[0], msglvl, argv[2], dataType, symflag, storageMode, transposeflag, nrowA, ncolA, nitem, nrhs, seed, alphaReal, alphaImag, nthread) ; fflush(msgFile) ; if ( dataType != 1 && dataType != 2 ) { fprintf(stderr, "\n invalid value %d for dataType\n", dataType) ; spoolesFatal(); } if ( symflag != 0 && symflag != 1 && symflag != 2 ) { fprintf(stderr, "\n invalid value %d for symflag\n", symflag) ; spoolesFatal(); } if ( storageMode != 1 && storageMode != 2 && storageMode != 3 ) { fprintf(stderr, "\n invalid value %d for storageMode\n", storageMode) ; spoolesFatal(); } if ( transposeflag < 0 || transposeflag > 2 ) { fprintf(stderr, "\n error, transposeflag = %d, must be 0, 1 or 2", transposeflag) ; spoolesFatal(); } if ( (transposeflag == 1 && symflag != 2) || (transposeflag == 2 && symflag != 2) ) { fprintf(stderr, "\n error, transposeflag = %d, symflag = %d", transposeflag, symflag) ; spoolesFatal(); } if ( transposeflag == 1 && dataType != 2 ) { fprintf(stderr, "\n error, transposeflag = %d, dataType = %d", transposeflag, dataType) ; spoolesFatal(); } if ( symflag == 1 && dataType != 2 ) { fprintf(stderr, "\n symflag = 1 (hermitian), dataType != 2 (complex)") ; spoolesFatal(); } if ( nrowA <= 0 || ncolA <= 0 || nitem <= 0 ) { fprintf(stderr, "\n invalid value: nrow = %d, ncol = %d, nitem = %d", nrowA, ncolA, nitem) ; spoolesFatal(); } if ( symflag < 2 && nrowA != ncolA ) { fprintf(stderr, "\n invalid data: symflag = %d, nrow = %d, ncol = %d", symflag, nrowA, ncolA) ; spoolesFatal(); } alpha[0] = alphaReal ; alpha[1] = alphaImag ; /* ---------------------------- initialize the matrix object ---------------------------- */ A = InpMtx_new() ; InpMtx_init(A, storageMode, dataType, 0, 0) ; drand = Drand_new() ; /* ---------------------------------- generate a vector of nitem triples ---------------------------------- */ rowids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, nrowA) ; Drand_fillIvector(drand, nitem, rowids) ; colids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, ncolA) ; Drand_fillIvector(drand, nitem, colids) ; Drand_setUniform(drand, 0.0, 1.0) ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { zvec = DVinit(nitem, 0.0) ; Drand_fillDvector(drand, nitem, zvec) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { zvec = ZVinit(nitem, 0.0, 0.0) ; Drand_fillDvector(drand, 2*nitem, zvec) ; } /* ----------------------------------- assemble the entries entry by entry ----------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n A = zeros(%d,%d) ;", nrowA, ncolA) ; } if ( symflag == 1 ) { /* ---------------- hermitian matrix ---------------- */ for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] == colids[ii] ) { zvec[2*ii+1] = 0.0 ; } if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } else if ( symflag == 0 ) { /* ---------------- symmetric matrix ---------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputRealEntry(A, row, col, zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } } else { /* ------------------- nonsymmetric matrix ------------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputRealEntry(A, rowids[ii], colids[ii], zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputComplexEntry(A, rowids[ii], colids[ii], zvec[2*ii], zvec[2*ii+1]) ; } } } InpMtx_changeStorageMode(A, INPMTX_BY_VECTORS) ; DVfree(zvec) ; if ( symflag == 0 || symflag == 1 ) { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 4*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 16*A->nent*nrhs ; } } else { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 2*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 8*A->nent*nrhs ; } } if ( msglvl > 1 ) { /* ------------------------------------------- write the assembled matrix to a matlab file ------------------------------------------- */ InpMtx_writeForMatlab(A, "A", msgFile) ; if ( symflag == 0 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = A(k,j) ;" "\n end" "\n end", nrowA, ncolA) ; } else if ( symflag == 1 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = ctranspose(A(k,j)) ;" "\n end" "\n end", nrowA, ncolA) ; } } /* ------------------------------- generate dense matrices X and Y ------------------------------- */ if ( transposeflag == 0 ) { nrowX = ncolA ; nrowY = nrowA ; } else { nrowX = nrowA ; nrowY = ncolA ; } X = DenseMtx_new() ; Y = DenseMtx_new() ; Y2 = DenseMtx_new() ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_REAL, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_COMPLEX, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, 2*nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, 2*nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(2*nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, nrhs) ; DenseMtx_writeForMatlab(X, "X", msgFile) ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowY, nrhs) ; DenseMtx_writeForMatlab(Y, "Y", msgFile) ; } /* -------------------------------------------- perform the matrix-matrix multiply in serial -------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_sym_mmm(A, Y, alpha, X) ; } else if ( symflag == 1 ) { InpMtx_herm_mmm(A, Y, alpha, X) ; } else if ( symflag == 2 ) { InpMtx_nonsym_mmm(A, Y, alpha, X) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_H(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_T(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f serial mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* -------------------------------------------------------- perform the matrix-matrix multiply in multithreaded mode -------------------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_MT_sym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 1 ) { InpMtx_MT_herm_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 2 ) { InpMtx_MT_nonsym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_H(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_T(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f MT mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(A) ; DenseMtx_free(X) ; DenseMtx_free(Y) ; DenseMtx_free(Y2) ; IVfree(rowids) ; IVfree(colids) ; Drand_free(drand) ; fclose(msgFile) ; return(1) ; }
/* ------------------------------------------------------------- purpose --- to compute a matrix-vector multiply y[] = C * x[] where C is the identity, A or B (depending on *pprbtype). *pnrows -- # of rows in x[] *pncols -- # of columns in x[] *pprbtype -- problem type *pprbtype = 1 --> vibration problem, matrix is A *pprbtype = 2 --> buckling problem, matrix is B *pprbtype = 3 --> matrix is identity, y[] = x[] x[] -- vector to be multiplied NOTE: the x[] vector is global, not a portion y[] -- product vector NOTE: the y[] vector is global, not a portion created -- 98aug28, cca & jcp ------------------------------------------------------------- */ void JimMatMulMPI ( int *pnrows, int *pncols, double x[], double y[], int *pprbtype, void *data ) { BridgeMPI *bridge = (BridgeMPI *) data ; int ncols, nent, nrows ; #if MYDEBUG > 0 double t1, t2 ; count_JimMatMul++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(bridge->msgFile) ; #endif nrows = *pnrows ; ncols = *pncols ; nent = nrows*ncols ; if ( *pprbtype == 3 ) { /* -------------------------- ... matrix is the identity -------------------------- */ DVcopy(nent, y, x) ; } else { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jcol, jj, kk, myid, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; /* --------------------------------------------- slide the owned rows of x[] down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { y[kk++] = x[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n inside JimMatMulMPI, calling MatMulMpi" "\n prbtype %d, nrows %d, ncols %d, nowned %d", *pprbtype, *pnrows, *pncols, nowned) ; fflush(bridge->msgFile) ; } MatMulMPI(&nowned, pncols, y, y, pprbtype, data) ; /* ------------------------------------------------- gather all the entries of y[] onto processor zero ------------------------------------------------- */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), y) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n mtx: %d rows x %d columns", mtx->nrow, mtx->ncol) ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n newmtx: %d rows x %d columns", newmtx->nrow, newmtx->ncol) ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { if ( mtx->nrow != neqns || mtx->ncol != ncols ) { fprintf(bridge->msgFile, "\n\n WHOA: mtx->nrows %d, mtx->ncols %d" ", neqns %d, ncols %d", mtx->nrow, mtx->ncol, neqns, ncols) ; exit(-1) ; } DVcopy(neqns*ncols, y, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ MPI_Bcast((void *) y, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n after the broadcast") ; fflush(bridge->msgFile) ; } } MPI_Barrier(bridge->comm) ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimMatMul += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(bridge->msgFile) ; #endif return ; }
/* ---------------------------------- set the maximum size of the vector created -- 96dec08, cca ---------------------------------- */ void DV_setMaxsize ( DV *dv, int newmaxsize ) { /* --------------- check the input --------------- */ if ( dv == NULL || newmaxsize < 0 ) { fprintf(stderr, "\n fatal error in DV_setMaxsize(%p,%d)" "\n bad input\n", dv, newmaxsize) ; spoolesFatal(); } if ( dv->maxsize > 0 && dv->owned == 0 ) { fprintf(stderr, "\n fatal error in DV_setMaxsize(%p,%d)" "\n dv->maxsize = %d, dv->owned = %d\n", dv, newmaxsize, dv->maxsize, dv->owned) ; spoolesFatal(); } if ( dv->maxsize != newmaxsize ) { /* ----------------------------------- allocate new storage for the vector ----------------------------------- */ double *vec ; /* vec = DVinit(newmaxsize, 0.0) ; */ vec = DVinit2(newmaxsize) ; if ( dv->size > 0 ) { /* --------------------------------- copy old entries into new entries --------------------------------- */ if ( dv->vec == NULL ) { fprintf(stderr, "\n fatal error in DV_setMaxsize(%p,%d)" "\n dv->size = %d, dv->vec is NULL\n", dv, newmaxsize, dv->size) ; spoolesFatal(); } if ( dv->size <= newmaxsize ) { DVcopy(dv->size, vec, dv->vec) ; } else { /* ----------------------- note, data is truncated ----------------------- */ DVcopy(newmaxsize, vec, dv->vec) ; dv->size = newmaxsize ; } } if ( dv->vec != NULL ) { /* ---------------- free old entries ---------------- */ DVfree(dv->vec) ; } /* ---------- set fields ---------- */ dv->maxsize = newmaxsize ; dv->owned = 1 ; dv->vec = vec ; } return ; }
/* ------------------------------------------------- sort the columns of the matrix in ascending order of the colids[] vector. on return, colids is in asending order. return value is the number of column swaps made. created -- 98apr15, cca ------------------------------------------------- */ int A2_sortColumnsUp ( A2 *mtx, int ncol, int colids[] ) { int ii, mincol, mincolid, nswap, target ; /* --------------- check the input --------------- */ if ( mtx == NULL || mtx->n2 < ncol || ncol < 0 || colids == NULL ) { fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)" "\n bad input\n", mtx, ncol, colids) ; if ( mtx != NULL ) { A2_writeStats(mtx, stderr) ; } exit(-1) ; } if ( ! (A2_IS_REAL(mtx) || A2_IS_COMPLEX(mtx)) ) { fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)" "\n bad type %d, must be SPOOLES_REAL or SPOOLES_COMPLEX\n", mtx, ncol, colids, mtx->type) ; exit(-1) ; } nswap = 0 ; if ( mtx->inc2 == 1 ) { double *dvtmp ; int irow, nrow ; int *ivtmp ; /* --------------------------------------------------- matrix is stored by rows, so permute each row --------------------------------------------------- */ ivtmp = IVinit(ncol, -1) ; if ( A2_IS_REAL(mtx) ) { dvtmp = DVinit(ncol, 0.0) ; } else if ( A2_IS_COMPLEX(mtx) ) { dvtmp = DVinit(2*ncol, 0.0) ; } IVramp(ncol, ivtmp, 0, 1) ; IV2qsortUp(ncol, colids, ivtmp) ; nrow = mtx->n1 ; for ( irow = 0 ; irow < nrow ; irow++ ) { if ( A2_IS_REAL(mtx) ) { DVcopy(ncol, dvtmp, A2_row(mtx, irow)) ; DVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ; } else if ( A2_IS_COMPLEX(mtx) ) { ZVcopy(ncol, dvtmp, A2_row(mtx, irow)) ; ZVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ; } } IVfree(ivtmp) ; DVfree(dvtmp) ; } else { /* ---------------------------------------- use a simple insertion sort to swap cols ---------------------------------------- */ for ( target = 0 ; target < ncol ; target++ ) { mincol = target ; mincolid = colids[target] ; for ( ii = target + 1 ; ii < ncol ; ii++ ) { if ( mincolid > colids[ii] ) { mincol = ii ; mincolid = colids[ii] ; } } if ( mincol != target ) { colids[mincol] = colids[target] ; colids[target] = mincolid ; A2_swapColumns(mtx, target, mincol) ; nswap++ ; } } } return(nswap) ; }
/* ---------------------------------------------------------------- purpose -- for each U_{J,bnd{J}} matrix, remove from hash table, split into their U_{J,K} submatrices and insert into the hash table. created -- 98may04, cca ---------------------------------------------------------------- */ void FrontMtx_splitUpperMatrices ( FrontMtx *frontmtx, int msglvl, FILE *msgFile ) { SubMtx *mtxUJ, *mtxUJJ, *mtxUJK ; SubMtxManager *manager ; double *entUJ, *entUJK ; int count, first, ii, inc1, inc2, jcol, jj, J, K, nbytes, ncolJ, ncolUJ, ncolUJK, nentUJ, nentUJK, neqns, nfront, nJ, nrowUJ, nrowUJK, offset, v ; int *colindJ, *colindUJ, *colindUJK, *colmap, *indicesUJ, *indicesUJK, *locmap, *rowindUJ, *rowindUJK, *sizesUJ, *sizesUJK ; I2Ohash *upperhash ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_splitUpperMatrices(%p,%d,%p)" "\n bad input\n", frontmtx, msglvl, msgFile) ; spoolesFatal(); } nfront = FrontMtx_nfront(frontmtx) ; neqns = FrontMtx_neqns(frontmtx) ; upperhash = frontmtx->upperhash ; manager = frontmtx->manager ; /* ----------------------------------- construct the column and local maps ----------------------------------- */ colmap = IVinit(neqns, -1) ; locmap = IVinit(neqns, -1) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; if ( ncolJ > 0 && colindJ != NULL ) { for ( ii = 0 ; ii < nJ ; ii++ ) { v = colindJ[ii] ; colmap[v] = J ; locmap[v] = ii ; } } } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n colmap[]") ; IVfprintf(msgFile, neqns, colmap) ; fprintf(msgFile, "\n\n locmap[]") ; IVfprintf(msgFile, neqns, locmap) ; fflush(msgFile) ; } /* --------------------------------------------- move the U_{J,J} matrices into the hash table --------------------------------------------- */ for ( J = 0 ; J < nfront ; J++ ) { if ( (mtxUJJ = FrontMtx_upperMtx(frontmtx, J, J)) != NULL ) { I2Ohash_insert(frontmtx->upperhash, J, J, mtxUJJ) ; } } /* ------------------------------------------------------------ now split the U_{J,bnd{J}} matrices into U_{J,K} matrices. note: columns of U_{J,bnd{J}} are assumed to be in ascending order with respect to the column ordering of the matrix. ------------------------------------------------------------ */ for ( J = 0 ; J < nfront ; J++ ) { mtxUJ = FrontMtx_upperMtx(frontmtx, J, nfront) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ### J = %d, mtxUJ = %p", J, mtxUJ) ; fflush(msgFile) ; } if ( mtxUJ != NULL ) { if ( msglvl > 2 ) { SubMtx_writeForHumanEye(mtxUJ, msgFile) ; fflush(msgFile) ; } SubMtx_columnIndices(mtxUJ, &ncolUJ, &colindUJ) ; SubMtx_rowIndices(mtxUJ, &nrowUJ, &rowindUJ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n column indices for J") ; IVfprintf(msgFile, ncolUJ, colindUJ) ; fprintf(msgFile, "\n row indices for UJ") ; IVfprintf(msgFile, nrowUJ, rowindUJ) ; fflush(msgFile) ; } if ( (K = colmap[colindUJ[0]]) == colmap[colindUJ[ncolUJ-1]] ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n front %d supports only %d", J, K) ; fflush(msgFile) ; } /* ------------------------------------------------- U_{J,bnd{J}} is one submatrix, bnd{J} \subseteq K set row and column indices and change column id ------------------------------------------------- */ IVramp(nrowUJ, rowindUJ, 0, 1) ; for ( ii = 0 ; ii < ncolUJ ; ii++ ) { colindUJ[ii] = locmap[colindUJ[ii]] ; } SubMtx_setFields(mtxUJ, mtxUJ->type, mtxUJ->mode, J, K, mtxUJ->nrow, mtxUJ->ncol, mtxUJ->nent) ; /* mtxUJ->colid = K ; */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting U(%d,%d) ", J, K) ; SubMtx_writeForHumanEye(mtxUJ, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(upperhash, J, K, (void *) mtxUJ) ; } else { /* ----------------------------------- split U_{J,bnd{J}} into submatrices ----------------------------------- */ nJ = FrontMtx_frontSize(frontmtx, J) ; if ( SUBMTX_IS_DENSE_COLUMNS(mtxUJ) ) { SubMtx_denseInfo(mtxUJ, &nrowUJ, &ncolUJ, &inc1, &inc2, &entUJ) ; } else if ( SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { SubMtx_sparseColumnsInfo(mtxUJ, &ncolUJ, &nentUJ, &sizesUJ, &indicesUJ, &entUJ) ; offset = 0 ; count = sizesUJ[0] ; } first = 0 ; K = colmap[colindUJ[0]] ; for ( jcol = 1 ; jcol <= ncolUJ ; jcol++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n jcol = %d", jcol) ; if ( jcol < ncolUJ ) { fprintf(msgFile, ", colmap[%d] = %d", colindUJ[jcol], colmap[colindUJ[jcol]]); } fflush(msgFile) ; } if ( jcol == ncolUJ || K != colmap[colindUJ[jcol]] ) { ncolUJK = jcol - first ; if ( SUBMTX_IS_DENSE_COLUMNS(mtxUJ) ) { nentUJK = nJ*ncolUJK ; } else if ( SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { if ( count == 0 ) { goto no_entries ; } nentUJK = count ; } nbytes = SubMtx_nbytesNeeded(mtxUJ->type, mtxUJ->mode, nJ, ncolUJK, nentUJK) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n ncolUJK %d, nentUJK %d, nbytes %d", ncolUJK, nentUJK, nbytes) ; fflush(msgFile) ; } mtxUJK = SubMtxManager_newObjectOfSizeNbytes(manager, nbytes) ; SubMtx_init(mtxUJK, mtxUJ->type, mtxUJ->mode, J, K, nJ, ncolUJK, nentUJK) ; if ( SUBMTX_IS_DENSE_COLUMNS(mtxUJ) ) { SubMtx_denseInfo(mtxUJK, &nrowUJK, &ncolUJK, &inc1, &inc2, &entUJK) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentUJK, entUJK, entUJ + first*nJ) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentUJK, entUJK, entUJ + 2*first*nJ) ; } } else if ( SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { SubMtx_sparseColumnsInfo(mtxUJK, &ncolUJK, &nentUJK, &sizesUJK, &indicesUJK, &entUJK) ; IVcopy(ncolUJK, sizesUJK, sizesUJ + first) ; IVcopy(nentUJK, indicesUJK, indicesUJ + offset) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentUJK, entUJK, entUJ + offset) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentUJK, entUJK, entUJ + 2*offset) ; } count = 0 ; offset += nentUJK ; } /* ------------------------------------- initialize the row and column indices ------------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n setting row and column indices"); fflush(msgFile) ; } SubMtx_rowIndices(mtxUJK, &nrowUJK, &rowindUJK) ; IVramp(nJ, rowindUJK, 0, 1) ; SubMtx_columnIndices(mtxUJK, &ncolUJK, &colindUJK) ; for ( ii = 0, jj = first ; ii < ncolUJK ; ii++, jj++ ) { colindUJK[ii] = locmap[colindUJ[jj]] ; } /* ---------------------------------- insert U_{J,K} into the hash table ---------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting U(%d,%d) ", J, K) ; SubMtx_writeForHumanEye(mtxUJK, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(upperhash, J, K, (void *) mtxUJK) ; /* ----------------------------------- we jump to here if there were no entries to be stored in the matrix. ----------------------------------- */ no_entries : /* ---------------------------------------------------- reset first and K to new first location and front id ---------------------------------------------------- */ first = jcol ; if ( jcol < ncolUJ ) { K = colmap[colindUJ[jcol]] ; } } if ( jcol < ncolUJ && SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { count += sizesUJ[jcol] ; } } /* -------------------------------------------- give U_{J,bnd{J}} back to the matrix manager -------------------------------------------- */ SubMtxManager_releaseObject(manager, mtxUJ) ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(colmap) ; IVfree(locmap) ; return ; }
/* ---------------------------------- set the maximum size of the vector created -- 98jan22, cca ---------------------------------- */ void ZV_setMaxsize ( ZV *zv, int newmaxsize ) { /* --------------- check the input --------------- */ if ( zv == NULL || newmaxsize < 0 ) { fprintf(stderr, "\n fatal error in ZV_setMaxsize(%p,%d)" "\n bad input\n", zv, newmaxsize) ; exit(-1) ; } if ( zv->maxsize > 0 && zv->owned == 0 ) { fprintf(stderr, "\n fatal error in ZV_setMaxsize(%p,%d)" "\n zv->maxsize = %d, zv->owned = %d\n", zv, newmaxsize, zv->maxsize, zv->owned) ; exit(-1) ; } if ( zv->maxsize != newmaxsize ) { /* ----------------------------------- allocate new storage for the vector ----------------------------------- */ double *vec = DVinit(2*newmaxsize, 0.0) ; if ( zv->size > 0 ) { /* --------------------------------- copy old entries into new entries --------------------------------- */ if ( zv->vec == NULL ) { fprintf(stderr, "\n fatal error in ZV_setMaxsize(%p,%d)" "\n zv->size = %d, zv->vec is NULL\n", zv, newmaxsize, zv->size) ; exit(-1) ; } if ( zv->size <= newmaxsize ) { DVcopy(2*zv->size, vec, zv->vec) ; } else { /* ----------------------- note, data is truncated ----------------------- */ DVcopy(2*newmaxsize, vec, zv->vec) ; zv->size = newmaxsize ; } } if ( zv->vec != NULL ) { /* ---------------- free old entries ---------------- */ DVfree(zv->vec) ; } /* ---------- set fields ---------- */ zv->maxsize = newmaxsize ; zv->owned = 1 ; zv->vec = vec ; } return ; }
/* -------------------------------------------------- purpose -- to solve a linear system (A - sigma*B) sol[] = rhs[] data -- pointer to bridge data object *pnrows -- # of rows in x[] and y[] *pncols -- # of columns in x[] and y[] rhs[] -- vector that holds right hand sides NOTE: the rhs[] vector is global, not a portion sol[] -- vector to hold solutions NOTE: the sol[] vector is global, not a portion note: rhs[] and sol[] can be the same array. on return, *perror holds an error code. created -- 98aug28, cca & jcp -------------------------------------------------- */ void JimSolveMPI ( int *pnrows, int *pncols, double rhs[], double sol[], void *data, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jj, jcol, kk, myid, ncols = *pncols, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; #if MYDEBUG > 0 double t1, t2 ; count_JimSolve++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(bridge->msgFile) ; #endif MPI_Barrier(bridge->comm) ; /* --------------------------------------------- slide the owned rows of rhs down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { sol[kk++] = rhs[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling SolveMPI()") ; fflush(bridge->msgFile) ; } SolveMPI(&nowned, pncols, sol, sol, data, perror) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from SolveMPI()") ; fflush(bridge->msgFile) ; } /* ------------------------------------------ gather all the entries onto processor zero ------------------------------------------ */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), sol) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling DenseMtx_split()()") ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from DenseMtx_split()()") ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { DVcopy(neqns*ncols, sol, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Bcast((void *) sol, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Barrier(bridge->comm) ; /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimSolve += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(bridge->msgFile) ; #endif return ; }
PetscErrorCode MatFactorNumeric_SeqSpooles(Mat F,Mat A,const MatFactorInfo *info) { Mat_Spooles *lu = (Mat_Spooles*)(F)->spptr; ChvManager *chvmanager ; Chv *rootchv ; IVL *adjIVL; PetscErrorCode ierr; PetscInt nz,nrow=A->rmap->n,irow,nedges,neqns=A->cmap->n,*ai,*aj,i,*diag=0,fierr; PetscScalar *av; double cputotal,facops; #if defined(PETSC_USE_COMPLEX) PetscInt nz_row,*aj_tmp; PetscScalar *av_tmp; #else PetscInt *ivec1,*ivec2,j; double *dvec; #endif PetscBool isSeqAIJ,isMPIAIJ; PetscFunctionBegin; if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */ (F)->ops->solve = MatSolve_SeqSpooles; (F)->assembled = PETSC_TRUE; /* set Spooles options */ ierr = SetSpoolesOptions(A, &lu->options);CHKERRQ(ierr); lu->mtxA = InpMtx_new(); } /* copy A to Spooles' InpMtx object */ ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isMPIAIJ);CHKERRQ(ierr); if (isSeqAIJ){ Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; if (lu->options.symflag == SPOOLES_NONSYMMETRIC) { nz=mat->nz; } else { /* SPOOLES_SYMMETRIC || SPOOLES_HERMITIAN */ nz=(mat->nz + A->rmap->n)/2; diag=mat->diag; } } else { /* A is SBAIJ */ Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; nz=mat->nz; } InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0); #if defined(PETSC_USE_COMPLEX) for (irow=0; irow<nrow; irow++) { if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !(isSeqAIJ || isMPIAIJ)){ nz_row = ai[irow+1] - ai[irow]; aj_tmp = aj + ai[irow]; av_tmp = av + ai[irow]; } else { nz_row = ai[irow+1] - diag[irow]; aj_tmp = aj + diag[irow]; av_tmp = av + diag[irow]; } for (i=0; i<nz_row; i++){ InpMtx_inputComplexEntry(lu->mtxA, irow, *aj_tmp++,PetscRealPart(*av_tmp),PetscImaginaryPart(*av_tmp)); av_tmp++; } } #else ivec1 = InpMtx_ivec1(lu->mtxA); ivec2 = InpMtx_ivec2(lu->mtxA); dvec = InpMtx_dvec(lu->mtxA); if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !isSeqAIJ){ for (irow = 0; irow < nrow; irow++){ for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow; } IVcopy(nz, ivec2, aj); DVcopy(nz, dvec, av); } else { nz = 0; for (irow = 0; irow < nrow; irow++){ for (j = diag[irow]; j<ai[irow+1]; j++) { ivec1[nz] = irow; ivec2[nz] = aj[j]; dvec[nz] = av[j]; nz++; } } } InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec); #endif InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 0 ) { int err; printf("\n\n input matrix"); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */ /*--------------------------------------------------- find a low-fill ordering (1) create the Graph object (2) order the graph -------------------------------------------------------*/ if (lu->options.useQR){ adjIVL = InpMtx_adjForATA(lu->mtxA); } else { adjIVL = InpMtx_fullAdjacency(lu->mtxA); } nedges = IVL_tsize(adjIVL); lu->graph = Graph_new(); Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( lu->options.msglvl > 2 ) { int err; if (lu->options.useQR){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of A^T A");CHKERRQ(ierr); } else { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");CHKERRQ(ierr); } Graph_writeForHumanEye(lu->graph, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } switch (lu->options.ordering) { case 0: lu->frontETree = orderViaBestOfNDandMS(lu->graph, lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize, lu->options.seed, lu->options.msglvl, lu->options.msgFile); break; case 1: lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 2: lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 3: lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering"); } if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* get the permutation, permute the front tree */ lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree); lu->oldToNew = IV_entries(lu->oldToNewIV); lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree); if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); /* permute the matrix */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC) { InpMtx_mapToUpperTriangle(lu->mtxA); } #if defined(PETSC_USE_COMPLEX) if ( lu->options.symflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangleH(lu->mtxA); } #endif InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); /* get symbolic factorization */ if (lu->options.useQR){ lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph); IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV); IVL_sortUp(lu->symbfacIVL); ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); } else { lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n old-to-new permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n new-to-old permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree after permutation");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n symbolic factorization");CHKERRQ(ierr); IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->frontmtx = FrontMtx_new(); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); } else { /* new num factorization using previously computed symbolic factor */ if (lu->options.pivotingflag) { /* different FrontMtx is required */ FrontMtx_free(lu->frontmtx); lu->frontmtx = FrontMtx_new(); } else { FrontMtx_clearData (lu->frontmtx); } SubMtxManager_free(lu->mtxmanager); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); /* permute mtxA */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { InpMtx_mapToUpperTriangle(lu->mtxA); } InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 2 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); } } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */ if (lu->options.useQR){ FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag, FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } else if ( lu->options.patchAndGoFlag == 2 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } } /* numerical factorization */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, lu->cpus, 0.0); if (lu->options.useQR){ facops = 0.0 ; FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager, lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 1 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n facops = %9.2f", facops);CHKERRQ(ierr); } } else { IVfill(20, lu->stats, 0); rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0, chvmanager, &fierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile); if (rootchv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"\n matrix found to be singular"); if (fierr >= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"\n error encountered at front %D", fierr); if(lu->options.FrontMtxInfo){ ierr = PetscPrintf(PETSC_COMM_SELF,"\n %8d pivots, %8d pivot tests, %8d delayed rows and columns\n",lu->stats[0], lu->stats[1], lu->stats[2]);CHKERRQ(ierr); cputotal = lu->cpus[8] ; if ( cputotal > 0.0 ) { ierr = PetscPrintf(PETSC_COMM_SELF, "\n cpus cpus/totaltime" "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f \n", lu->cpus[0], 100.*lu->cpus[0]/cputotal, lu->cpus[1], 100.*lu->cpus[1]/cputotal, lu->cpus[2], 100.*lu->cpus[2]/cputotal, lu->cpus[3], 100.*lu->cpus[3]/cputotal, lu->cpus[4], 100.*lu->cpus[4]/cputotal, lu->cpus[5], 100.*lu->cpus[5]/cputotal, lu->cpus[6], 100.*lu->cpus[6]/cputotal, lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal);CHKERRQ(ierr); } } } ChvManager_free(chvmanager); if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { if (lu->options.msglvl > 0 ){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } else if ( lu->options.patchAndGoFlag == 2 ) { if (lu->options.msglvl > 0 ){ if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");CHKERRQ(ierr); DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } } /* post-process the factorization */ FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix after post-processing");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->flg = SAME_NONZERO_PATTERN; lu->CleanUpSpooles = PETSC_TRUE; PetscFunctionReturn(0); }
/* ---------------------------------------------------------------- purpose -- for each L_{bnd{J},J} matrix, remove from hash table, split into their L_{K,J} submatrices and insert into the hash table. created -- 98may04, cca ---------------------------------------------------------------- */ void FrontMtx_splitLowerMatrices ( FrontMtx *frontmtx, int msglvl, FILE *msgFile ) { SubMtx *mtxLJ, *mtxLJJ, *mtxLKJ ; SubMtxManager *manager ; double *entLJ, *entLKJ ; int count, first, ii, inc1, inc2, irow, jj, J, K, nbytes, ncolLJ, ncolLKJ, nentLJ, nentLKJ, neqns, nfront, nJ, nrowJ, nrowLJ, nrowLKJ, offset, v ; int *colindLJ, *colindLKJ, *rowmap, *indicesLJ, *indicesLKJ, *locmap, *rowindJ, *rowindLJ, *rowindLKJ, *sizesLJ, *sizesLKJ ; I2Ohash *lowerhash ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_splitLowerMatrices(%p,%d,%p)" "\n bad input\n", frontmtx, msglvl, msgFile) ; spoolesFatal(); } nfront = FrontMtx_nfront(frontmtx) ; neqns = FrontMtx_neqns(frontmtx) ; lowerhash = frontmtx->lowerhash ; manager = frontmtx->manager ; /* -------------------------------- construct the row and local maps -------------------------------- */ rowmap = IVinit(neqns, -1) ; locmap = IVinit(neqns, -1) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_rowIndices(frontmtx, J, &nrowJ, &rowindJ) ; if ( nrowJ > 0 && rowindJ != NULL ) { for ( ii = 0 ; ii < nJ ; ii++ ) { v = rowindJ[ii] ; rowmap[v] = J ; locmap[v] = ii ; } } } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rowmap[]") ; IVfprintf(msgFile, neqns, rowmap) ; fprintf(msgFile, "\n\n locmap[]") ; IVfprintf(msgFile, neqns, locmap) ; fflush(msgFile) ; } /* --------------------------------------------- move the L_{J,J} matrices into the hash table --------------------------------------------- */ for ( J = 0 ; J < nfront ; J++ ) { if ( (mtxLJJ = FrontMtx_lowerMtx(frontmtx, J, J)) != NULL ) { I2Ohash_insert(frontmtx->lowerhash, J, J, mtxLJJ) ; } } /* ------------------------------------------------------------ now split the L_{bnd{J},J} matrices into L_{K,J} matrices. note: columns of L_{bnd{J},J} are assumed to be in ascending order with respect to the column ordering of the matrix. ------------------------------------------------------------ */ for ( J = 0 ; J < nfront ; J++ ) { mtxLJ = FrontMtx_lowerMtx(frontmtx, nfront, J) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ### J = %d, mtxLJ = %p", J, mtxLJ) ; fflush(msgFile) ; } if ( mtxLJ != NULL ) { if ( msglvl > 2 ) { SubMtx_writeForHumanEye(mtxLJ, msgFile) ; fflush(msgFile) ; } SubMtx_columnIndices(mtxLJ, &ncolLJ, &colindLJ) ; SubMtx_rowIndices(mtxLJ, &nrowLJ, &rowindLJ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n column indices for J") ; IVfprintf(msgFile, ncolLJ, colindLJ) ; fprintf(msgFile, "\n row indices for LJ") ; IVfprintf(msgFile, nrowLJ, rowindLJ) ; fflush(msgFile) ; } if ( (K = rowmap[rowindLJ[0]]) == rowmap[rowindLJ[nrowLJ-1]] ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n front %d supports only %d", J, K) ; fflush(msgFile) ; } /* ------------------------------------------------- L_{bnd{J},J} is one submatrix, bnd{J} \subseteq K set row and column indices and change column id ------------------------------------------------- */ IVramp(ncolLJ, colindLJ, 0, 1) ; for ( ii = 0 ; ii < nrowLJ ; ii++ ) { rowindLJ[ii] = locmap[rowindLJ[ii]] ; } /* mtxLJ->rowid = K ; */ SubMtx_setFields(mtxLJ, mtxLJ->type, mtxLJ->mode, K, J, mtxLJ->nrow, mtxLJ->ncol, mtxLJ->nent) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting L(%d,%d) ", K, J) ; SubMtx_writeForHumanEye(mtxLJ, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(lowerhash, K, J, (void *) mtxLJ) ; } else { /* ----------------------------------- split L_{bnd{J},J} into submatrices ----------------------------------- */ nJ = FrontMtx_frontSize(frontmtx, J) ; if ( SUBMTX_IS_DENSE_ROWS(mtxLJ) ) { SubMtx_denseInfo(mtxLJ, &nrowLJ, &ncolLJ, &inc1, &inc2, &entLJ) ; } else if ( SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { SubMtx_sparseRowsInfo(mtxLJ, &nrowLJ, &nentLJ, &sizesLJ, &indicesLJ, &entLJ) ; offset = 0 ; count = sizesLJ[0] ; } first = 0 ; K = rowmap[rowindLJ[0]] ; for ( irow = 1 ; irow <= nrowLJ ; irow++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n irow = %d", irow) ; if ( irow < nrowLJ ) { fprintf(msgFile, ", rowmap[%d] = %d", rowindLJ[irow], rowmap[rowindLJ[irow]]); } fflush(msgFile) ; } if ( irow == nrowLJ || K != rowmap[rowindLJ[irow]] ) { nrowLKJ = irow - first ; if ( SUBMTX_IS_DENSE_ROWS(mtxLJ) ) { nentLKJ = nJ*nrowLKJ ; } else if ( SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { if ( count == 0 ) { goto no_entries ; } nentLKJ = count ; } nbytes = SubMtx_nbytesNeeded(mtxLJ->type, mtxLJ->mode, nrowLKJ, nJ, nentLKJ) ; mtxLKJ = SubMtxManager_newObjectOfSizeNbytes(manager, nbytes) ; SubMtx_init(mtxLKJ, mtxLJ->type, mtxLJ->mode, K, J, nrowLKJ, nJ, nentLKJ) ; if ( SUBMTX_IS_DENSE_ROWS(mtxLJ) ) { SubMtx_denseInfo(mtxLKJ, &nrowLKJ, &ncolLKJ, &inc1, &inc2, &entLKJ) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentLKJ, entLKJ, entLJ + first*nJ) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentLKJ, entLKJ, entLJ + 2*first*nJ) ; } } else if ( SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { SubMtx_sparseRowsInfo(mtxLKJ, &nrowLKJ, &nentLKJ, &sizesLKJ, &indicesLKJ, &entLKJ) ; IVcopy(nrowLKJ, sizesLKJ, sizesLJ + first) ; IVcopy(nentLKJ, indicesLKJ, indicesLJ + offset) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentLKJ, entLKJ, entLJ + offset) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentLKJ, entLKJ, entLJ + 2*offset) ; } count = 0 ; offset += nentLKJ ; } /* ------------------------------------- initialize the row and column indices ------------------------------------- */ SubMtx_rowIndices(mtxLKJ, &nrowLKJ, &rowindLKJ) ; for ( ii = 0, jj = first ; ii < nrowLKJ ; ii++, jj++ ) { rowindLKJ[ii] = locmap[rowindLJ[jj]] ; } SubMtx_columnIndices(mtxLKJ, &ncolLKJ, &colindLKJ) ; IVramp(ncolLKJ, colindLKJ, 0, 1) ; /* ---------------------------------- insert L_{K,J} into the hash table ---------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting L(%d,%d) ", K, J) ; SubMtx_writeForHumanEye(mtxLKJ, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(lowerhash, K, J, (void *) mtxLKJ) ; /* ----------------------------------- we jump to here if there were no entries to be stored in the matrix. ----------------------------------- */ no_entries : /* ---------------------------------------------------- reset first and K to new first location and front id ---------------------------------------------------- */ first = irow ; if ( irow < nrowLJ ) { K = rowmap[rowindLJ[irow]] ; } } if ( irow < nrowLJ && SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { count += sizesLJ[irow] ; } } /* -------------------------------------------- give L_{bnd{J},J} back to the matrix manager -------------------------------------------- */ SubMtxManager_releaseObject(manager, mtxLJ) ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(rowmap) ; IVfree(locmap) ; return ; }
PetscErrorCode MatSolve_SeqSpooles(Mat A,Vec b,Vec x) { Mat_Spooles *lu = (Mat_Spooles*)A->spptr; PetscScalar *array; DenseMtx *mtxY, *mtxX ; PetscErrorCode ierr; PetscInt irow,neqns=A->cmap->n,nrow=A->rmap->n,*iv; #if defined(PETSC_USE_COMPLEX) double x_real,x_imag; #else double *entX; #endif PetscFunctionBegin; mtxY = DenseMtx_new(); DenseMtx_init(mtxY, lu->options.typeflag, 0, 0, nrow, 1, 1, nrow); /* column major */ ierr = VecGetArray(b,&array);CHKERRQ(ierr); if (lu->options.useQR) { /* copy b to mtxY */ for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, irow, 0, *array++); #else DenseMtx_setComplexEntry(mtxY, irow, 0, PetscRealPart(array[irow]), PetscImaginaryPart(array[irow])); #endif } else { /* copy permuted b to mtxY */ iv = IV_entries(lu->oldToNewIV); for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++); #else DenseMtx_setComplexEntry(mtxY,*iv++,0,PetscRealPart(array[irow]),PetscImaginaryPart(array[irow])); #endif } ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); mtxX = DenseMtx_new(); DenseMtx_init(mtxX, lu->options.typeflag, 0, 0, neqns, 1, 1, neqns); if (lu->options.useQR) { FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n right hand side matrix after permutation");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution matrix in new ordering");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* permute solution into original ordering, then copy to x */ DenseMtx_permuteRows(mtxX, lu->newToOldIV); ierr = VecGetArray(x,&array);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) entX = DenseMtx_entries(mtxX); DVcopy(neqns, array, entX); #else for (irow=0; irow<nrow; irow++){ DenseMtx_complexEntry(mtxX,irow,0,&x_real,&x_imag); array[irow] = x_real+x_imag*PETSC_i; } #endif ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); /* free memory */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); PetscFunctionReturn(0); }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ----------------------------- test the SubMtx_solve() method. created -- 98apr15, cca ----------------------------- */ { SubMtx *mtxA, *mtxB, *mtxX ; double idot, rdot, t1, t2 ; double *entB, *entX ; Drand *drand ; FILE *msgFile ; int inc1, inc2, mode, msglvl, ncolA, nentA, nrowA, ncolB, nrowB, ncolX, nrowX, seed, type ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type mode nrowA nentA ncolB seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of matrix A" "\n 1 -- real" "\n 2 -- complex" "\n mode -- mode of matrix A" "\n 2 -- sparse stored by rows" "\n 3 -- sparse stored by columns" "\n 5 -- sparse stored by subrows" "\n 6 -- sparse stored by subcolumns" "\n 7 -- diagonal" "\n 8 -- block diagonal symmetric" "\n 9 -- block diagonal hermitian" "\n nrowA -- # of rows in matrix A" "\n nentA -- # of entries in matrix A" "\n ncolB -- # of columns in matrix B" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; spoolesFatal(); } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; mode = atoi(argv[4]) ; nrowA = atoi(argv[5]) ; nentA = atoi(argv[6]) ; ncolB = atoi(argv[7]) ; seed = atoi(argv[8]) ; fprintf(msgFile, "\n %% %s:" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% type = %d" "\n %% mode = %d" "\n %% nrowA = %d" "\n %% nentA = %d" "\n %% ncolB = %d" "\n %% seed = %d", argv[0], msglvl, argv[2], type, mode, nrowA, nentA, ncolB, seed) ; ncolA = nrowA ; nrowB = nrowA ; nrowX = nrowA ; ncolX = ncolB ; /* ----------------------------- check for errors in the input ----------------------------- */ if ( nrowA <= 0 || nentA <= 0 || ncolB <= 0 ) { fprintf(stderr, "\n invalid input\n") ; spoolesFatal(); } switch ( type ) { case SPOOLES_REAL : switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : break ; default : fprintf(stderr, "\n invalid mode %d\n", mode) ; spoolesFatal(); } break ; case SPOOLES_COMPLEX : switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : break ; default : fprintf(stderr, "\n invalid mode %d\n", mode) ; spoolesFatal(); } break ; default : fprintf(stderr, "\n invalid type %d\n", type) ; spoolesFatal(); break ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, seed) ; Drand_setNormal(drand, 0.0, 1.0) ; /* ------------------------------ initialize the X SubMtx object ------------------------------ */ MARKTIME(t1) ; mtxX = SubMtx_new() ; SubMtx_initRandom(mtxX, type, SUBMTX_DENSE_COLUMNS, 0, 0, nrowX, ncolX, nrowX*ncolX, ++seed) ; SubMtx_denseInfo(mtxX, &nrowX, &ncolX, &inc1, &inc2, &entX) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize X SubMtx object", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% X SubMtx object") ; fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, ncolX) ; SubMtx_writeForMatlab(mtxX, "X", msgFile) ; fflush(msgFile) ; } /* ------------------------------ initialize the B SubMtx object ------------------------------ */ MARKTIME(t1) ; mtxB = SubMtx_new() ; SubMtx_init(mtxB, type, SUBMTX_DENSE_COLUMNS, 0, 0, nrowB, ncolB, nrowB*ncolB) ; SubMtx_denseInfo(mtxB, &nrowB, &ncolB, &inc1, &inc2, &entB) ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : if ( SUBMTX_IS_REAL(mtxX) ) { DVcopy(nrowB*ncolB, entB, entX) ; } else if ( SUBMTX_IS_COMPLEX(mtxX) ) { ZVcopy(nrowB*ncolB, entB, entX) ; } break ; default : if ( SUBMTX_IS_REAL(mtxX) ) { DVzero(nrowB*ncolB, entB) ; } else if ( SUBMTX_IS_COMPLEX(mtxX) ) { DVzero(2*nrowB*ncolB, entB) ; } break ; } MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize B SubMtx object", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% B SubMtx object") ; fprintf(msgFile, "\n B = zeros(%d,%d) ;", nrowB, ncolB) ; SubMtx_writeForMatlab(mtxB, "B", msgFile) ; fflush(msgFile) ; } /* ------------------------------------- initialize the A matrix SubMtx object ------------------------------------- */ seed++ ; mtxA = SubMtx_new() ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : SubMtx_initRandomLowerTriangle(mtxA, type, mode, 0, 0, nrowA, ncolA, nentA, seed, 1) ; break ; case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : SubMtx_initRandomUpperTriangle(mtxA, type, mode, 0, 0, nrowA, ncolA, nentA, seed, 1) ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : SubMtx_initRandom(mtxA, type, mode, 0, 0, nrowA, ncolA, nentA, seed) ; break ; default : fprintf(stderr, "\n fatal error in test_solve" "\n invalid mode = %d", mode) ; spoolesFatal(); } if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% A SubMtx object") ; fprintf(msgFile, "\n A = zeros(%d,%d) ;", nrowA, ncolA) ; SubMtx_writeForMatlab(mtxA, "A", msgFile) ; fflush(msgFile) ; } /* -------------------------------------------------------- compute B = A * X (for diagonal and block diagonal) or B = (I + A) * X (for lower and upper triangular) -------------------------------------------------------- */ if ( SUBMTX_IS_REAL(mtxA) ) { DV *colDV, *rowDV ; double value, *colX, *rowA, *pBij, *pXij ; int irowA, jcolX ; colDV = DV_new() ; DV_init(colDV, nrowA, NULL) ; colX = DV_entries(colDV) ; rowDV = DV_new() ; DV_init(rowDV, nrowA, NULL) ; rowA = DV_entries(rowDV) ; for ( jcolX = 0 ; jcolX < ncolB ; jcolX++ ) { SubMtx_fillColumnDV(mtxX, jcolX, colDV) ; for ( irowA = 0 ; irowA < nrowA ; irowA++ ) { SubMtx_fillRowDV(mtxA, irowA, rowDV) ; SubMtx_locationOfRealEntry(mtxX, irowA, jcolX, &pXij) ; SubMtx_locationOfRealEntry(mtxB, irowA, jcolX, &pBij) ; value = DVdot(nrowA, rowA, colX) ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : *pBij = *pXij + value ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : *pBij = value ; break ; } } } DV_free(colDV) ; DV_free(rowDV) ; } else if ( SUBMTX_IS_COMPLEX(mtxA) ) { ZV *colZV, *rowZV ; double *colX, *rowA, *pBIij, *pBRij, *pXIij, *pXRij ; int irowA, jcolX ; colZV = ZV_new() ; ZV_init(colZV, nrowA, NULL) ; colX = ZV_entries(colZV) ; rowZV = ZV_new() ; ZV_init(rowZV, nrowA, NULL) ; rowA = ZV_entries(rowZV) ; for ( jcolX = 0 ; jcolX < ncolB ; jcolX++ ) { SubMtx_fillColumnZV(mtxX, jcolX, colZV) ; for ( irowA = 0 ; irowA < nrowA ; irowA++ ) { SubMtx_fillRowZV(mtxA, irowA, rowZV) ; SubMtx_locationOfComplexEntry(mtxX, irowA, jcolX, &pXRij, &pXIij) ; SubMtx_locationOfComplexEntry(mtxB, irowA, jcolX, &pBRij, &pBIij) ; ZVdotU(nrowA, rowA, colX, &rdot, &idot) ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : *pBRij = *pXRij + rdot ; *pBIij = *pXIij + idot ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : *pBRij = rdot ; *pBIij = idot ; break ; } } } ZV_free(colZV) ; ZV_free(rowZV) ; } /* ---------------------- print out the matrices ---------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% X SubMtx object") ; fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, ncolX) ; SubMtx_writeForMatlab(mtxX, "X", msgFile) ; fprintf(msgFile, "\n\n %% A SubMtx object") ; fprintf(msgFile, "\n A = zeros(%d,%d) ;", nrowA, ncolA) ; SubMtx_writeForMatlab(mtxA, "A", msgFile) ; fprintf(msgFile, "\n\n %% B SubMtx object") ; fprintf(msgFile, "\n B = zeros(%d,%d) ;", nrowB, ncolB) ; SubMtx_writeForMatlab(mtxB, "B", msgFile) ; fflush(msgFile) ; } /* ----------------- check with matlab ----------------- */ if ( msglvl > 1 ) { switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : fprintf(msgFile, "\n\n emtx = abs(B - X - A*X) ;" "\n\n condA = cond(eye(%d,%d) + A)" "\n\n maxabsZ = max(max(abs(emtx))) ", nrowA, nrowA) ; fflush(msgFile) ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : fprintf(msgFile, "\n\n emtx = abs(B - A*X) ;" "\n\n condA = cond(A)" "\n\n maxabsZ = max(max(abs(emtx))) ") ; fflush(msgFile) ; break ; } } /* ---------------------------------------- compute the solve DY = B or (I + A)Y = B ---------------------------------------- */ SubMtx_solve(mtxA, mtxB) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% Y SubMtx object") ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowB, ncolB) ; SubMtx_writeForMatlab(mtxB, "Y", msgFile) ; fprintf(msgFile, "\n\n %% solerror = abs(Y - X) ;" "\n\n solerror = abs(Y - X) ;" "\n\n maxabserror = max(max(solerror)) ") ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ SubMtx_free(mtxA) ; SubMtx_free(mtxX) ; SubMtx_free(mtxB) ; Drand_free(drand) ; fprintf(msgFile, "\n") ; return(1) ; }
/* ---------------------------------------------- sort the rows of the matrix in ascending order of the rowids[] vector. on return, rowids is in asending order. return value is the number of row swaps made. created -- 98apr15, cca ---------------------------------------------- */ int A2_sortRowsUp ( A2 *mtx, int nrow, int rowids[] ) { int ii, minrow, minrowid, nswap, target ; /* --------------- check the input --------------- */ if ( mtx == NULL || mtx->n1 < nrow || nrow < 0 || rowids == NULL ) { fprintf(stderr, "\n fatal error in A2_sortRowsUp(%p,%d,%p)" "\n bad input\n", mtx, nrow, rowids) ; if ( mtx != NULL ) { A2_writeStats(mtx, stderr) ; } exit(-1) ; } if ( ! (A2_IS_REAL(mtx) || A2_IS_COMPLEX(mtx)) ) { fprintf(stderr, "\n fatal error in A2_sortRowsUp(%p,%d,%p)" "\n bad type %d, must be SPOOLES_REAL or SPOOLES_COMPLEX\n", mtx, nrow, rowids, mtx->type) ; exit(-1) ; } nswap = 0 ; if ( mtx->inc1 == 1 ) { double *dvtmp ; int jcol, ncol ; int *ivtmp ; /* --------------------------------------------------- matrix is stored by columns, so permute each column --------------------------------------------------- */ ivtmp = IVinit(nrow, -1) ; if ( A2_IS_REAL(mtx) ) { dvtmp = DVinit(nrow, 0.0) ; } else if ( A2_IS_COMPLEX(mtx) ) { dvtmp = DVinit(2*nrow, 0.0) ; } IVramp(nrow, ivtmp, 0, 1) ; IV2qsortUp(nrow, rowids, ivtmp) ; ncol = mtx->n2 ; for ( jcol = 0 ; jcol < ncol ; jcol++ ) { if ( A2_IS_REAL(mtx) ) { DVcopy(nrow, dvtmp, A2_column(mtx, jcol)) ; DVgather(nrow, A2_column(mtx, jcol), dvtmp, ivtmp) ; } else if ( A2_IS_COMPLEX(mtx) ) { ZVcopy(nrow, dvtmp, A2_column(mtx, jcol)) ; ZVgather(nrow, A2_column(mtx, jcol), dvtmp, ivtmp) ; } } IVfree(ivtmp) ; DVfree(dvtmp) ; } else { /* ---------------------------------------- use a simple insertion sort to swap rows ---------------------------------------- */ for ( target = 0 ; target < nrow ; target++ ) { minrow = target ; minrowid = rowids[target] ; for ( ii = target + 1 ; ii < nrow ; ii++ ) { if ( minrowid > rowids[ii] ) { minrow = ii ; minrowid = rowids[ii] ; } } if ( minrow != target ) { rowids[minrow] = rowids[target] ; rowids[target] = minrowid ; A2_swapRows(mtx, target, minrow) ; nswap++ ; } } } return(nswap) ; }
/* ----------------------------------------------------------- A contains the following data from the A = QR factorization A(1:ncolA,1:ncolA) = R A(j+1:nrowA,j) is v_j, the j-th householder vector, where v_j[j] = 1.0 we compute Y = Q^T X when A is real and Y = Q^H X when A is complex NOTE: A, Y and X must be column major. NOTE: Y and X can be the same object, in which case X is overwritten with Y created -- 98dec10, cca ----------------------------------------------------------- */ void A2_applyQT ( A2 *Y, A2 *A, A2 *X, DV *workDV, int msglvl, FILE *msgFile ) { double *betas ; int irowA, jcolA, jcolX, ncolA, ncolX, nrowA ; /* --------------- check the input --------------- */ if ( A == NULL ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n A is NULL\n") ; exit(-1) ; } if ( X == NULL ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n X is NULL\n") ; exit(-1) ; } if ( workDV == NULL ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n workDV is NULL\n") ; exit(-1) ; } if ( msglvl > 0 && msgFile == NULL ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n msglvl > 0 and msgFile is NULL\n") ; exit(-1) ; } nrowA = A2_nrow(A) ; ncolA = A2_ncol(A) ; ncolX = A2_ncol(X) ; if ( nrowA <= 0 ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n nrowA = %d\n", nrowA) ; exit(-1) ; } if ( ncolA <= 0 ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n ncolA = %d\n", nrowA) ; exit(-1) ; } if ( nrowA != A2_nrow(X) ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n nrowA = %d, nrowX = %d\n", nrowA, A2_nrow(X)) ; exit(-1) ; } switch ( A->type ) { case SPOOLES_REAL : case SPOOLES_COMPLEX : break ; default : fprintf(stderr, "\n fatal error in A2_applyQT()" "\n invalid type for A\n") ; exit(-1) ; } if ( A->type != X->type ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n A->type = %d, X->type = %d\n", A->type, X->type) ; exit(-1) ; } if ( A2_inc1(A) != 1 ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n A->inc1 = %d \n", A2_inc1(A)) ; exit(-1) ; } if ( A2_inc1(X) != 1 ) { fprintf(stderr, "\n fatal error in A2_applyQT()" "\n X->inc1 = %d, \n", A2_inc1(X)) ; exit(-1) ; } /* -------------------------------------------------- compute the beta values, beta_j = 2./(V_j^H * V_j) -------------------------------------------------- */ DV_setSize(workDV, ncolA) ; betas = DV_entries(workDV) ; if ( A2_IS_REAL(A) ) { int irowA, jcolA ; double sum ; double *colA ; for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) { sum = 1.0 ; colA = A2_column(A, jcolA) ; for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) { sum += colA[irowA] * colA[irowA] ; } betas[jcolA] = 2./sum ; } } else { double ival, rval, sum ; double *colA ; for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) { sum = 1.0 ; colA = A2_column(A, jcolA) ; for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) { rval = colA[2*irowA] ; ival = colA[2*irowA+1] ; sum += rval*rval + ival*ival ; } betas[jcolA] = 2./sum ; } } /* ------------------------------------------ loop over the number of columns in X and Y ------------------------------------------ */ for ( jcolX = 0 ; jcolX < ncolX ; jcolX++ ) { double *V, *colX, *colY ; int jcolV ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %% jcolX = %d", jcolX) ; fflush(msgFile) ; } /* ------------------------------- copy X(:,jcolX) into Y(:,jcolX) ------------------------------- */ colY = A2_column(Y, jcolX) ; colX = A2_column(X, jcolX) ; if ( A2_IS_REAL(A) ) { DVcopy(nrowA, colY, colX) ; } else { DVcopy(2*nrowA, colY, colX) ; } for ( jcolV = 0 ; jcolV < ncolA ; jcolV++ ) { double beta ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %% jcolV = %d", jcolV) ; fflush(msgFile) ; } /* ------------------------------------------------------------ update colY = (I - beta_jcolV * V_jcolV * V_jcolV^T)colY = colY - beta_jcolV * V_jcolV * V_jcolV^T * colY = colY - (beta_jcolV * V_jcolV^T * Y) * V_jcolV ------------------------------------------------------------ */ V = A2_column(A, jcolV) ; beta = betas[jcolV] ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %% beta = %12.4e", beta) ; fflush(msgFile) ; } if ( A2_IS_REAL(A) ) { double fac, sum = colY[jcolV] ; int irow ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n %% V[%d] = %12.4e, X[%d] = %12.4e", irow, V[irow], irow, colY[irow]) ; fflush(msgFile) ; } sum += V[irow] * colY[irow] ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n %% sum = %12.4e", sum) ; fflush(msgFile) ; } fac = beta * sum ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %% fac = %12.4e", fac) ; fflush(msgFile) ; } colY[jcolV] -= fac ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { colY[irow] -= fac * V[irow] ; } } else { double rfac, ifac, rsum = colY[2*jcolV], isum = colY[2*jcolV+1] ; int irow ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { double Vi, Vr, Yi, Yr ; Vr = V[2*irow] ; Vi = V[2*irow+1] ; Yr = colY[2*irow] ; Yi = colY[2*irow+1] ; rsum += Vr*Yr + Vi*Yi ; isum += Vr*Yi - Vi*Yr ; } rfac = beta * rsum ; ifac = beta * isum ; colY[2*jcolV] -= rfac ; colY[2*jcolV+1] -= ifac ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { double Vi, Vr ; Vr = V[2*irow] ; Vi = V[2*irow+1] ; colY[2*irow] -= rfac*Vr - ifac*Vi ; colY[2*irow+1] -= rfac*Vi + ifac*Vr ; } } } } return ; }