/* ------------------------------------------------- sort the columns of the matrix in ascending order of the colids[] vector. on return, colids is in asending order. return value is the number of column swaps made. created -- 98apr15, cca ------------------------------------------------- */ int A2_sortColumnsUp ( A2 *mtx, int ncol, int colids[] ) { int ii, mincol, mincolid, nswap, target ; /* --------------- check the input --------------- */ if ( mtx == NULL || mtx->n2 < ncol || ncol < 0 || colids == NULL ) { fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)" "\n bad input\n", mtx, ncol, colids) ; if ( mtx != NULL ) { A2_writeStats(mtx, stderr) ; } exit(-1) ; } if ( ! (A2_IS_REAL(mtx) || A2_IS_COMPLEX(mtx)) ) { fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)" "\n bad type %d, must be SPOOLES_REAL or SPOOLES_COMPLEX\n", mtx, ncol, colids, mtx->type) ; exit(-1) ; } nswap = 0 ; if ( mtx->inc2 == 1 ) { double *dvtmp ; int irow, nrow ; int *ivtmp ; /* --------------------------------------------------- matrix is stored by rows, so permute each row --------------------------------------------------- */ ivtmp = IVinit(ncol, -1) ; if ( A2_IS_REAL(mtx) ) { dvtmp = DVinit(ncol, 0.0) ; } else if ( A2_IS_COMPLEX(mtx) ) { dvtmp = DVinit(2*ncol, 0.0) ; } IVramp(ncol, ivtmp, 0, 1) ; IV2qsortUp(ncol, colids, ivtmp) ; nrow = mtx->n1 ; for ( irow = 0 ; irow < nrow ; irow++ ) { if ( A2_IS_REAL(mtx) ) { DVcopy(ncol, dvtmp, A2_row(mtx, irow)) ; DVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ; } else if ( A2_IS_COMPLEX(mtx) ) { ZVcopy(ncol, dvtmp, A2_row(mtx, irow)) ; ZVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ; } } IVfree(ivtmp) ; DVfree(dvtmp) ; } else { /* ---------------------------------------- use a simple insertion sort to swap cols ---------------------------------------- */ for ( target = 0 ; target < ncol ; target++ ) { mincol = target ; mincolid = colids[target] ; for ( ii = target + 1 ; ii < ncol ; ii++ ) { if ( mincolid > colids[ii] ) { mincol = ii ; mincolid = colids[ii] ; } } if ( mincol != target ) { colids[mincol] = colids[target] ; colids[target] = mincolid ; A2_swapColumns(mtx, target, mincol) ; nswap++ ; } } } return(nswap) ; }
/* --------------------------------- compute A := A - H0 * beta0 * W0 created -- 98may30, cca --------------------------------- */ static double updateA1 ( A2 *mtxA, double H0[], double beta0, double W0[], int msglvl, FILE *msgFile ) { double nops ; int inc1, inc2, ncolA, nrowA ; if ( msglvl > 5 ) { fprintf(msgFile, "\n %% inside updateA1, nrow %d, ncol %d", mtxA->n1, mtxA->n2) ; } nrowA = mtxA->n1 ; ncolA = mtxA->n2 ; inc1 = mtxA->inc1 ; inc2 = mtxA->inc2 ; nops = 0.0 ; if ( A2_IS_REAL(mtxA) ) { int irow, jcol ; if ( inc1 == 1 ) { double alpha[3] ; double *colA0, *colA1, *colA2 ; /* ----------------------------------------- A is column major compute A(:,jcol) -= beta * W0(jcol) * H0 ----------------------------------------- */ for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; colA2 = A2_column(mtxA, jcol+2) ; alpha[0] = -beta0 * W0[jcol] ; alpha[1] = -beta0 * W0[jcol+1] ; alpha[2] = -beta0 * W0[jcol+2] ; DVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ; nops += 6*nrowA ; } if ( jcol == ncolA - 2 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; alpha[0] = -beta0 * W0[jcol] ; alpha[1] = -beta0 * W0[jcol+1] ; DVaxpy21(nrowA, colA0, colA1, alpha, H0) ; nops += 4*nrowA ; } else if ( jcol == ncolA - 1 ) { colA0 = A2_column(mtxA, jcol) ; alpha[0] = -beta0 * W0[jcol] ; DVaxpy11(nrowA, colA0, alpha, H0) ; nops += 2*nrowA ; } } else { double alpha[3] ; double *rowA0, *rowA1, *rowA2 ; /* ----------------------------------------- A is row major compute A(irow,:) -= H0[irow]*beta0*W0(:) ----------------------------------------- */ for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; rowA2 = A2_row(mtxA, irow+2) ; alpha[0] = -beta0 * H0[irow] ; alpha[1] = -beta0 * H0[irow+1] ; alpha[2] = -beta0 * H0[irow+2] ; DVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ; nops += 6*ncolA + 3 ; } if ( irow == nrowA - 2 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; alpha[0] = -beta0 * H0[irow] ; alpha[1] = -beta0 * H0[irow+1] ; DVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ; nops += 4*ncolA + 2 ; } else if ( irow == nrowA - 1 ) { rowA0 = A2_row(mtxA, irow) ; alpha[0] = -beta0 * H0[irow] ; DVaxpy11(ncolA, rowA0, alpha, W0) ; nops += 2*ncolA + 1 ; } } } else if ( A2_IS_COMPLEX(mtxA) ) { int irow, jcol ; if ( inc1 == 1 ) { double alpha[6] ; double *colA0, *colA1, *colA2 ; /* ----------------------------------------- A is column major compute A(:,jcol) -= beta * W0(jcol) * H0 ----------------------------------------- */ for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; colA2 = A2_column(mtxA, jcol+2) ; alpha[0] = -beta0 * W0[2*jcol] ; alpha[1] = -beta0 * W0[2*jcol+1] ; alpha[2] = -beta0 * W0[2*(jcol+1)] ; alpha[3] = -beta0 * W0[2*(jcol+1)+1] ; alpha[4] = -beta0 * W0[2*(jcol+2)] ; alpha[5] = -beta0 * W0[2*(jcol+2)+1] ; ZVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ; nops += 24*nrowA ; } if ( jcol == ncolA - 2 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; alpha[0] = -beta0 * W0[2*jcol] ; alpha[1] = -beta0 * W0[2*jcol+1] ; alpha[2] = -beta0 * W0[2*(jcol+1)] ; alpha[3] = -beta0 * W0[2*(jcol+1)+1] ; ZVaxpy21(nrowA, colA0, colA1, alpha, H0) ; nops += 16*nrowA ; } else if ( jcol == ncolA - 1 ) { colA0 = A2_column(mtxA, jcol) ; alpha[0] = -beta0 * W0[2*jcol] ; alpha[1] = -beta0 * W0[2*jcol+1] ; ZVaxpy11(nrowA, colA0, alpha, H0) ; nops += 8*nrowA ; } } else { double alpha[6] ; double *rowA0, *rowA1, *rowA2 ; /* ----------------------------------------- A is row major compute A(irow,:) -= H0[irow]*beta0*W0(:) ----------------------------------------- */ for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; rowA2 = A2_row(mtxA, irow+2) ; alpha[0] = -beta0 * H0[2*irow] ; alpha[1] = -beta0 * H0[2*irow+1] ; alpha[2] = -beta0 * H0[2*(irow+1)] ; alpha[3] = -beta0 * H0[2*(irow+1)+1] ; alpha[4] = -beta0 * H0[2*(irow+2)] ; alpha[5] = -beta0 * H0[2*(irow+2)+1] ; ZVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ; nops += 24*ncolA + 12 ; } if( irow == nrowA - 2 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; alpha[0] = -beta0 * H0[2*irow] ; alpha[1] = -beta0 * H0[2*irow+1] ; alpha[2] = -beta0 * H0[2*(irow+1)] ; alpha[3] = -beta0 * H0[2*(irow+1)+1] ; ZVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ; nops += 16*ncolA + 8 ; } else if( irow == nrowA - 1 ) { rowA0 = A2_row(mtxA, irow) ; alpha[0] = -beta0 * H0[2*irow] ; alpha[1] = -beta0 * H0[2*irow+1] ; ZVaxpy11(ncolA, rowA0, alpha, W0) ; nops += 8*ncolA + 4 ; } } } return(nops) ; }
/* -------------------------------------------------------------- purpose -- create and return an A2 object that contains rows of A and rows from update matrices of the children. the matrix may not be in staircase form created -- 98may25, cca -------------------------------------------------------------- */ A2 * FrontMtx_QR_assembleFront ( FrontMtx *frontmtx, int J, InpMtx *mtxA, IVL *rowsIVL, int firstnz[], int colmap[], Chv *firstchild, DV *workDV, int msglvl, FILE *msgFile ) { A2 *frontJ ; Chv *chvI ; double *rowI, *rowJ, *rowentA ; int ii, irow, irowA, irowI, jcol, jj, jrow, ncolI, ncolJ, nentA, nrowI, nrowJ, nrowFromA ; int *colindA, *colindI, *colindJ, *map, *rowids, *rowsFromA ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || mtxA == NULL || rowsIVL == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_QR_assembleFront()" "\n bad input\n") ; exit(-1) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n inside FrontMtx_QR_assembleFront(%d)", J) ; fflush(msgFile) ; } /* -------------------------------------------------- set up the map from global to local column indices -------------------------------------------------- */ FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; for ( jcol = 0 ; jcol < ncolJ ; jcol++ ) { colmap[colindJ[jcol]] = jcol ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n front %d's column indices", J) ; IVfprintf(msgFile, ncolJ, colindJ) ; fflush(msgFile) ; } /* ------------------------------------------------- compute the size of the front and map the global indices of the update matrices into local indices ------------------------------------------------- */ IVL_listAndSize(rowsIVL, J, &nrowFromA, &rowsFromA) ; nrowJ = nrowFromA ; if ( msglvl > 3 ) { fprintf(msgFile, "\n %d rows from A", nrowFromA) ; fflush(msgFile) ; } for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) { nrowJ += chvI->nD ; Chv_columnIndices(chvI, &ncolI, &colindI) ; for ( jcol = 0 ; jcol < ncolI ; jcol++ ) { colindI[jcol] = colmap[colindI[jcol]] ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n %d rows from child %d", chvI->nD, chvI->id) ; fflush(msgFile) ; } } /* ---------------------------------------------------------- get workspace for the rowids[nrowJ] and map[nrowJ] vectors ---------------------------------------------------------- */ if ( sizeof(int) == sizeof(double) ) { DV_setSize(workDV, 2*nrowJ) ; } else if ( 2*sizeof(int) == sizeof(double) ) { DV_setSize(workDV, nrowJ) ; } rowids = (int *) DV_entries(workDV) ; map = rowids + nrowJ ; IVramp(nrowJ, rowids, 0, 1) ; IVfill(nrowJ, map, -1) ; /* ----------------------------------------------------------------- get the map from incoming rows to their place in the front matrix ----------------------------------------------------------------- */ for ( irow = jrow = 0 ; irow < nrowFromA ; irow++, jrow++ ) { irowA = rowsFromA[irow] ; map[jrow] = colmap[firstnz[irowA]] ; } for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) { nrowI = chvI->nD ; Chv_columnIndices(chvI, &ncolI, &colindI) ; for ( irow = 0 ; irow < nrowI ; irow++, jrow++ ) { map[jrow] = colindI[irow] ; } } IV2qsortUp(nrowJ, map, rowids) ; for ( irow = 0 ; irow < nrowJ ; irow++ ) { map[rowids[irow]] = irow ; } /* ---------------------------- allocate the A2 front object ---------------------------- */ frontJ = A2_new() ; A2_init(frontJ, frontmtx->type, nrowJ, ncolJ, ncolJ, 1, NULL) ; A2_zero(frontJ) ; /* ------------------------------------ load the original rows of the matrix ------------------------------------ */ for ( jrow = 0 ; jrow < nrowFromA ; jrow++ ) { irowA = rowsFromA[jrow] ; rowJ = A2_row(frontJ, map[jrow]) ; if ( A2_IS_REAL(frontJ) ) { InpMtx_realVector(mtxA, irowA, &nentA, &colindA, &rowentA) ; } else if ( A2_IS_COMPLEX(frontJ) ) { InpMtx_complexVector(mtxA, irowA, &nentA, &colindA, &rowentA) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n loading row %d", irowA) ; fprintf(msgFile, "\n global indices") ; IVfprintf(msgFile, nentA, colindA) ; fflush(msgFile) ; } if ( A2_IS_REAL(frontJ) ) { for ( ii = 0 ; ii < nentA ; ii++ ) { jj = colmap[colindA[ii]] ; rowJ[jj] = rowentA[ii] ; } } else if ( A2_IS_COMPLEX(frontJ) ) { for ( ii = 0 ; ii < nentA ; ii++ ) { jj = colmap[colindA[ii]] ; rowJ[2*jj] = rowentA[2*ii] ; rowJ[2*jj+1] = rowentA[2*ii+1] ; } } } if ( msglvl > 3 ) { fprintf(msgFile, "\n after assembling rows of A") ; A2_writeForHumanEye(frontJ, msgFile) ; fflush(msgFile) ; } /* ---------------------------------- load the updates from the children ---------------------------------- */ for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) { nrowI = chvI->nD ; Chv_columnIndices(chvI, &ncolI, &colindI) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n loading child %d", chvI->id) ; fprintf(msgFile, "\n child's column indices") ; IVfprintf(msgFile, ncolI, colindI) ; Chv_writeForHumanEye(chvI, msgFile) ; fflush(msgFile) ; } rowI = Chv_entries(chvI) ; for ( irowI = 0 ; irowI < nrowI ; irowI++, jrow++ ) { rowJ = A2_row(frontJ, map[jrow]) ; if ( A2_IS_REAL(frontJ) ) { for ( ii = irowI ; ii < ncolI ; ii++ ) { jj = colindI[ii] ; rowJ[jj] = rowI[ii] ; } rowI += ncolI - irowI - 1 ; } else if ( A2_IS_COMPLEX(frontJ) ) { for ( ii = irowI ; ii < ncolI ; ii++ ) { jj = colindI[ii] ; rowJ[2*jj] = rowI[2*ii] ; rowJ[2*jj+1] = rowI[2*ii+1] ; } rowI += 2*(ncolI - irowI - 1) ; } } if ( msglvl > 3 ) { fprintf(msgFile, "\n after assembling child %d", chvI->id) ; A2_writeForHumanEye(frontJ, msgFile) ; fflush(msgFile) ; } } return(frontJ) ; }
/* ----------------------- compute W0 = v^H * A created -- 98may30, cca ----------------------- */ static double computeW1 ( A2 *mtxA, double H0[], double W0[], int msglvl, FILE *msgFile ) { double nops ; int inc1, inc2, ncolA, nrowA ; if ( msglvl > 5 ) { fprintf(msgFile, "\n %% inside computeW1, nrow %d, ncol %d", mtxA->n1, mtxA->n2) ; } nrowA = mtxA->n1 ; ncolA = mtxA->n2 ; inc1 = mtxA->inc1 ; inc2 = mtxA->inc2 ; if ( inc1 != 1 && inc2 != 1 ) { fprintf(stderr, "\n error in computeW1" "\n inc1 = %d, inc2 = %d\n", inc1, inc2) ; exit(-1) ; } nops = 0.0 ; if ( A2_IS_REAL(mtxA) ) { int irow, jcol ; if ( inc1 == 1 ) { double sums[3] ; double *colA0, *colA1, *colA2 ; /* ---------------------------- A is column major, compute W(j) = H0^T * A(*,j) ---------------------------- */ for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; colA2 = A2_column(mtxA, jcol+2) ; DVdot13(nrowA, H0, colA0, colA1, colA2, sums) ; W0[jcol] = sums[0] ; W0[jcol+1] = sums[1] ; W0[jcol+2] = sums[2] ; nops += 6*nrowA ; } if ( jcol == ncolA - 2 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; DVdot12(nrowA, H0, colA0, colA1, sums) ; W0[jcol] = sums[0] ; W0[jcol+1] = sums[1] ; nops += 4*nrowA ; } else if ( jcol == ncolA - 1 ) { colA0 = A2_column(mtxA, jcol) ; DVdot11(nrowA, H0, colA0, sums) ; W0[jcol] = sums[0] ; nops += 2*nrowA ; } } else { double alpha[3] ; double *rowA0, *rowA1, *rowA2 ; /* ------------------------------- A is row major compute W := W + H0(j) * A(j,*) ------------------------------- */ DVzero(ncolA, W0) ; for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; rowA2 = A2_row(mtxA, irow+2) ; alpha[0] = H0[irow] ; alpha[1] = H0[irow+1] ; alpha[2] = H0[irow+2] ; DVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ; nops += 6*ncolA ; } if ( irow == nrowA - 2 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; alpha[0] = H0[irow] ; alpha[1] = H0[irow+1] ; DVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ; nops += 4*ncolA ; } else if ( irow == nrowA - 1 ) { rowA0 = A2_row(mtxA, irow) ; alpha[0] = H0[irow] ; DVaxpy11(ncolA, W0, alpha, rowA0) ; nops += 2*ncolA ; } } } else if ( A2_IS_COMPLEX(mtxA) ) { int irow, jcol ; if ( inc1 == 1 ) { double sums[6] ; double *colA0, *colA1, *colA2 ; /* ---------------------------- A is column major compute W(j) = H0^H * A(*,j) ---------------------------- */ for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; colA2 = A2_column(mtxA, jcol+2) ; ZVdotC13(nrowA, H0, colA0, colA1, colA2, sums) ; W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ; W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ; W0[2*(jcol+2)] = sums[4] ; W0[2*(jcol+2)+1] = sums[5] ; nops += 24*nrowA ; } if ( jcol == ncolA - 2 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; ZVdotC12(nrowA, H0, colA0, colA1, sums) ; W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ; W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ; nops += 16*nrowA ; } else if ( jcol == ncolA - 1 ) { colA0 = A2_column(mtxA, jcol) ; ZVdotC11(nrowA, H0, colA0, sums) ; W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ; nops += 8*nrowA ; } } else { double alpha[6] ; double *rowA0, *rowA1, *rowA2 ; /* --------------------------------- A is row major compute W := W + H0(j)^H * A(j,*) --------------------------------- */ DVzero(2*ncolA, W0) ; for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; rowA2 = A2_row(mtxA, irow+2) ; alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ; alpha[4] = H0[2*(irow+2)] ; alpha[5] = -H0[2*(irow+2)+1] ; ZVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ; nops += 24*ncolA ; } if ( irow == nrowA - 2 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ; ZVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ; nops += 16*ncolA ; } else if ( irow == nrowA - 1 ) { rowA0 = A2_row(mtxA, irow) ; alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; ZVaxpy11(ncolA, W0, alpha, rowA0) ; nops += 8*ncolA ; } } } return(nops) ; }