示例#1
0
文件: sort.c 项目: bialk/SPOOLES
/*
   -------------------------------------------------
   sort the columns of the matrix in ascending order
   of the colids[] vector. on return, colids is
   in asending order. return value is the number
   of column swaps made.

   created -- 98apr15, cca
   -------------------------------------------------
*/
int
A2_sortColumnsUp (
   A2   *mtx,
   int   ncol,
   int   colids[]
) {
int   ii, mincol, mincolid, nswap, target ;
/*
   ---------------
   check the input
   ---------------
*/
if ( mtx == NULL || mtx->n2 < ncol || ncol < 0 || colids == NULL ) {
   fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)"
           "\n bad input\n", mtx, ncol, colids) ;
   if ( mtx != NULL ) {
      A2_writeStats(mtx, stderr) ;
   }
   exit(-1) ;
}
if ( ! (A2_IS_REAL(mtx) || A2_IS_COMPLEX(mtx)) ) {
   fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)"
           "\n bad type %d, must be SPOOLES_REAL or SPOOLES_COMPLEX\n", 
           mtx, ncol, colids, mtx->type) ;
   exit(-1) ;
}
nswap = 0 ;
if ( mtx->inc2 == 1 ) {
   double   *dvtmp ;
   int      irow, nrow ;
   int      *ivtmp ;
/*
   ---------------------------------------------------
   matrix is stored by rows, so permute each row
   ---------------------------------------------------
*/
   ivtmp = IVinit(ncol, -1) ;
   if ( A2_IS_REAL(mtx) ) {
      dvtmp = DVinit(ncol, 0.0) ;
   } else if ( A2_IS_COMPLEX(mtx) ) {
      dvtmp = DVinit(2*ncol, 0.0) ;
   }
   IVramp(ncol, ivtmp, 0, 1) ;
   IV2qsortUp(ncol, colids, ivtmp) ;
   nrow = mtx->n1 ;
   for ( irow = 0 ; irow < nrow ; irow++ ) {
      if ( A2_IS_REAL(mtx) ) {
         DVcopy(ncol, dvtmp, A2_row(mtx, irow)) ;
         DVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ;
      } else if ( A2_IS_COMPLEX(mtx) ) {
         ZVcopy(ncol, dvtmp, A2_row(mtx, irow)) ;
         ZVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ;
      }
   }
   IVfree(ivtmp) ;
   DVfree(dvtmp) ;
} else {
/*
   ----------------------------------------
   use a simple insertion sort to swap cols
   ----------------------------------------
*/
   for ( target = 0 ; target < ncol ; target++ ) {
      mincol   = target ;
      mincolid = colids[target] ;
      for ( ii = target + 1 ; ii < ncol ; ii++ ) {
         if ( mincolid > colids[ii] ) {
            mincol   = ii ;
            mincolid = colids[ii] ;
         }
      }
      if ( mincol != target ) {
         colids[mincol] = colids[target] ;
         colids[target] = mincolid ;
         A2_swapColumns(mtx, target, mincol) ;
         nswap++ ;
      }
   }
}
return(nswap) ; }
示例#2
0
/*
   ---------------------------------
   compute A := A - H0 * beta0 * W0

   created -- 98may30, cca
   ---------------------------------
*/
static double
updateA1 (
   A2       *mtxA,
   double   H0[],
   double   beta0,
   double   W0[],
   int      msglvl,
   FILE     *msgFile
) {
double   nops ;
int      inc1, inc2, ncolA, nrowA ;

if ( msglvl > 5 ) {
   fprintf(msgFile, "\n %% inside updateA1, nrow %d, ncol %d",
           mtxA->n1, mtxA->n2) ;
}

nrowA = mtxA->n1 ;
ncolA = mtxA->n2 ;
inc1  = mtxA->inc1 ;
inc2  = mtxA->inc2 ;
nops  = 0.0 ;
if ( A2_IS_REAL(mtxA) ) {
   int      irow, jcol ;

   if ( inc1 == 1 ) {
      double   alpha[3] ;
      double   *colA0, *colA1, *colA2 ;
/*
      -----------------------------------------
      A is column major
      compute A(:,jcol) -= beta * W0(jcol) * H0
      -----------------------------------------
*/
      for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
         colA0 = A2_column(mtxA, jcol) ;
         colA1 = A2_column(mtxA, jcol+1) ;
         colA2 = A2_column(mtxA, jcol+2) ;
         alpha[0] = -beta0 * W0[jcol] ;
         alpha[1] = -beta0 * W0[jcol+1] ;
         alpha[2] = -beta0 * W0[jcol+2] ;
         DVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ;
         nops += 6*nrowA ;
      }
      if ( jcol == ncolA - 2 ) {
         colA0 = A2_column(mtxA, jcol) ;
         colA1 = A2_column(mtxA, jcol+1) ;
         alpha[0] = -beta0 * W0[jcol] ;
         alpha[1] = -beta0 * W0[jcol+1] ;
         DVaxpy21(nrowA, colA0, colA1, alpha, H0) ;
         nops += 4*nrowA ;
      } else if ( jcol == ncolA - 1 ) {
         colA0 = A2_column(mtxA, jcol) ;
         alpha[0] = -beta0 * W0[jcol] ;
         DVaxpy11(nrowA, colA0, alpha, H0) ;
         nops += 2*nrowA ;
      }
   } else {
      double   alpha[3] ;
      double   *rowA0, *rowA1, *rowA2 ;
/*
      -----------------------------------------
      A is row major
      compute A(irow,:) -= H0[irow]*beta0*W0(:)
      -----------------------------------------
*/
      for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         rowA2 = A2_row(mtxA, irow+2) ;
         alpha[0] = -beta0 * H0[irow] ;
         alpha[1] = -beta0 * H0[irow+1] ;
         alpha[2] = -beta0 * H0[irow+2] ;
         DVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ;
         nops += 6*ncolA + 3 ;
      }
      if ( irow == nrowA - 2 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         alpha[0] = -beta0 * H0[irow] ;
         alpha[1] = -beta0 * H0[irow+1] ;
         DVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ;
         nops += 4*ncolA + 2 ;
      } else if ( irow == nrowA - 1 ) {
         rowA0 = A2_row(mtxA, irow) ;
         alpha[0] = -beta0 * H0[irow] ;
         DVaxpy11(ncolA, rowA0, alpha, W0) ;
         nops += 2*ncolA + 1 ;
      }
   }
} else if ( A2_IS_COMPLEX(mtxA) ) {
   int      irow, jcol ;

   if ( inc1 == 1 ) {
      double   alpha[6] ;
      double   *colA0, *colA1, *colA2 ;
/*
      -----------------------------------------
      A is column major
      compute A(:,jcol) -= beta * W0(jcol) * H0
      -----------------------------------------
*/
      for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
         colA0 = A2_column(mtxA, jcol) ;
         colA1 = A2_column(mtxA, jcol+1) ;
         colA2 = A2_column(mtxA, jcol+2) ;
         alpha[0] = -beta0 * W0[2*jcol] ;
         alpha[1] = -beta0 * W0[2*jcol+1] ;
         alpha[2] = -beta0 * W0[2*(jcol+1)] ;
         alpha[3] = -beta0 * W0[2*(jcol+1)+1] ;
         alpha[4] = -beta0 * W0[2*(jcol+2)] ;
         alpha[5] = -beta0 * W0[2*(jcol+2)+1] ;
         ZVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ;
         nops += 24*nrowA ;
      }
      if ( jcol == ncolA - 2 ) {
         colA0 = A2_column(mtxA, jcol) ;
         colA1 = A2_column(mtxA, jcol+1) ;
         alpha[0] = -beta0 * W0[2*jcol] ;
         alpha[1] = -beta0 * W0[2*jcol+1] ;
         alpha[2] = -beta0 * W0[2*(jcol+1)] ;
         alpha[3] = -beta0 * W0[2*(jcol+1)+1] ;
         ZVaxpy21(nrowA, colA0, colA1, alpha, H0) ;
         nops += 16*nrowA ;
      } else if ( jcol == ncolA - 1 ) {
         colA0 = A2_column(mtxA, jcol) ;
         alpha[0] = -beta0 * W0[2*jcol] ;
         alpha[1] = -beta0 * W0[2*jcol+1] ;
         ZVaxpy11(nrowA, colA0, alpha, H0) ;
         nops += 8*nrowA ;
      }
   } else {
      double   alpha[6] ;
      double   *rowA0, *rowA1, *rowA2 ;
/*
      -----------------------------------------
      A is row major
      compute A(irow,:) -= H0[irow]*beta0*W0(:)
      -----------------------------------------
*/
      for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         rowA2 = A2_row(mtxA, irow+2) ;
         alpha[0] = -beta0 * H0[2*irow] ;
         alpha[1] = -beta0 * H0[2*irow+1] ;
         alpha[2] = -beta0 * H0[2*(irow+1)] ;
         alpha[3] = -beta0 * H0[2*(irow+1)+1] ;
         alpha[4] = -beta0 * H0[2*(irow+2)] ;
         alpha[5] = -beta0 * H0[2*(irow+2)+1] ;
         ZVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ;
         nops += 24*ncolA + 12 ;
      }
      if( irow == nrowA - 2 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         alpha[0] = -beta0 * H0[2*irow] ;
         alpha[1] = -beta0 * H0[2*irow+1] ;
         alpha[2] = -beta0 * H0[2*(irow+1)] ;
         alpha[3] = -beta0 * H0[2*(irow+1)+1] ;
         ZVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ;
         nops += 16*ncolA + 8 ;
      } else if( irow == nrowA - 1 ) {
         rowA0 = A2_row(mtxA, irow) ;
         alpha[0] = -beta0 * H0[2*irow] ;
         alpha[1] = -beta0 * H0[2*irow+1] ;
         ZVaxpy11(ncolA, rowA0, alpha, W0) ;
         nops += 8*ncolA + 4 ;
      }
   }
}
return(nops) ; }
示例#3
0
文件: QRutil.c 项目: damiannz/spooles
/*
   --------------------------------------------------------------
   purpose -- create and return an A2 object that contains rows
              of A and rows from update matrices of the children.
              the matrix may not be in staircase form

   created -- 98may25, cca
   --------------------------------------------------------------
*/
A2 *
FrontMtx_QR_assembleFront (
   FrontMtx   *frontmtx,
   int        J,
   InpMtx     *mtxA,
   IVL        *rowsIVL,
   int        firstnz[],
   int        colmap[],
   Chv        *firstchild,
   DV         *workDV,
   int        msglvl,
   FILE       *msgFile
) {
A2       *frontJ ;
Chv      *chvI ;
double   *rowI, *rowJ, *rowentA ;
int      ii, irow, irowA, irowI, jcol, jj, jrow, ncolI, ncolJ, 
         nentA, nrowI, nrowJ, nrowFromA ;
int      *colindA, *colindI, *colindJ, *map, *rowids, *rowsFromA ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontmtx == NULL || mtxA == NULL || rowsIVL == NULL
   || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in FrontMtx_QR_assembleFront()"
           "\n bad input\n") ;
   exit(-1) ;
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n\n inside FrontMtx_QR_assembleFront(%d)", J) ;
   fflush(msgFile) ;
}
/*
   --------------------------------------------------
   set up the map from global to local column indices
   --------------------------------------------------
*/
FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ;
for ( jcol = 0 ; jcol < ncolJ ; jcol++ ) {
   colmap[colindJ[jcol]] = jcol ;
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n front %d's column indices", J) ;
   IVfprintf(msgFile, ncolJ, colindJ) ;
   fflush(msgFile) ;
}
/*
   -------------------------------------------------
   compute the size of the front and map the global 
   indices of the update matrices into local indices
   -------------------------------------------------
*/
IVL_listAndSize(rowsIVL, J, &nrowFromA, &rowsFromA) ;
nrowJ = nrowFromA ;
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n %d rows from A", nrowFromA) ;
   fflush(msgFile) ;
}
for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) {
   nrowJ += chvI->nD ;
   Chv_columnIndices(chvI, &ncolI, &colindI) ;
   for ( jcol = 0 ; jcol < ncolI ; jcol++ ) {
      colindI[jcol] = colmap[colindI[jcol]] ;
   }
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n %d rows from child %d", chvI->nD, chvI->id) ;
      fflush(msgFile) ;
   }
}
/*
   ----------------------------------------------------------
   get workspace for the rowids[nrowJ] and map[nrowJ] vectors
   ----------------------------------------------------------
*/
if ( sizeof(int) == sizeof(double) ) {
   DV_setSize(workDV, 2*nrowJ) ;
} else if ( 2*sizeof(int) == sizeof(double) ) {
   DV_setSize(workDV, nrowJ) ;
}
rowids = (int *) DV_entries(workDV) ;
map    = rowids + nrowJ ;
IVramp(nrowJ, rowids, 0, 1) ;
IVfill(nrowJ, map, -1) ;
/*
   -----------------------------------------------------------------
   get the map from incoming rows to their place in the front matrix
   -----------------------------------------------------------------
*/
for ( irow = jrow = 0 ; irow < nrowFromA ; irow++, jrow++ ) {
   irowA = rowsFromA[irow] ;
   map[jrow] = colmap[firstnz[irowA]] ;
}
for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) {
   nrowI = chvI->nD ;
   Chv_columnIndices(chvI, &ncolI, &colindI) ;
   for ( irow = 0 ; irow < nrowI ; irow++, jrow++ ) {
      map[jrow] = colindI[irow] ;
   }
}
IV2qsortUp(nrowJ, map, rowids) ;
for ( irow = 0 ; irow < nrowJ ; irow++ ) {
   map[rowids[irow]] = irow ;
}
/*
   ----------------------------
   allocate the A2 front object
   ----------------------------
*/
frontJ = A2_new() ;
A2_init(frontJ, frontmtx->type, nrowJ, ncolJ, ncolJ, 1, NULL) ;
A2_zero(frontJ) ;
/*
   ------------------------------------
   load the original rows of the matrix
   ------------------------------------
*/
for ( jrow = 0 ; jrow < nrowFromA ; jrow++ ) {
   irowA = rowsFromA[jrow] ;
   rowJ  = A2_row(frontJ, map[jrow]) ;
   if ( A2_IS_REAL(frontJ) ) {
      InpMtx_realVector(mtxA, irowA, &nentA, &colindA, &rowentA) ;
   } else if ( A2_IS_COMPLEX(frontJ) ) {
      InpMtx_complexVector(mtxA, irowA, &nentA, &colindA, &rowentA) ;
   }
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n loading row %d", irowA) ;
      fprintf(msgFile, "\n global indices") ;
      IVfprintf(msgFile, nentA, colindA) ;
      fflush(msgFile) ;
   }
   if ( A2_IS_REAL(frontJ) ) {
      for ( ii = 0 ; ii < nentA ; ii++ ) {
         jj = colmap[colindA[ii]] ;
         rowJ[jj] = rowentA[ii] ;
      }
   } else if ( A2_IS_COMPLEX(frontJ) ) {
      for ( ii = 0 ; ii < nentA ; ii++ ) {
         jj = colmap[colindA[ii]] ;
         rowJ[2*jj]   = rowentA[2*ii]   ;
         rowJ[2*jj+1] = rowentA[2*ii+1] ;
      }
   }
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n after assembling rows of A") ;
   A2_writeForHumanEye(frontJ, msgFile) ;
   fflush(msgFile) ;
}
/*
   ----------------------------------
   load the updates from the children 
   ----------------------------------
*/
for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) {
   nrowI = chvI->nD ;
   Chv_columnIndices(chvI, &ncolI, &colindI) ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n loading child %d", chvI->id) ;
      fprintf(msgFile, "\n child's column indices") ;
      IVfprintf(msgFile, ncolI, colindI) ;
      Chv_writeForHumanEye(chvI, msgFile) ;
      fflush(msgFile) ;
   }
   rowI = Chv_entries(chvI) ;
   for ( irowI = 0 ; irowI < nrowI ; irowI++, jrow++ ) {
      rowJ = A2_row(frontJ, map[jrow]) ;
      if ( A2_IS_REAL(frontJ) ) {
         for ( ii = irowI ; ii < ncolI ; ii++ ) {
            jj = colindI[ii] ;
            rowJ[jj] = rowI[ii] ;
         }
         rowI += ncolI - irowI - 1 ;
      } else if ( A2_IS_COMPLEX(frontJ) ) {
         for ( ii = irowI ; ii < ncolI ; ii++ ) {
            jj = colindI[ii] ;
            rowJ[2*jj]   = rowI[2*ii]   ;
            rowJ[2*jj+1] = rowI[2*ii+1] ;
         }
         rowI += 2*(ncolI - irowI - 1) ;
      }
   }
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n after assembling child %d", chvI->id) ;
      A2_writeForHumanEye(frontJ, msgFile) ;
      fflush(msgFile) ;
   }
}
return(frontJ) ; }
示例#4
0
/*
   -----------------------
   compute W0 = v^H * A

   created -- 98may30, cca
   -----------------------
*/
static double
computeW1 (
   A2       *mtxA,
   double   H0[],
   double   W0[],
   int      msglvl,
   FILE     *msgFile
) {
double   nops ;
int      inc1, inc2, ncolA, nrowA ;

if ( msglvl > 5 ) {
   fprintf(msgFile, "\n %% inside computeW1, nrow %d, ncol %d",
           mtxA->n1, mtxA->n2) ;
}

nrowA = mtxA->n1 ;
ncolA = mtxA->n2 ;
inc1  = mtxA->inc1 ;
inc2  = mtxA->inc2 ;
if ( inc1 != 1 && inc2 != 1 ) {
   fprintf(stderr, "\n error in computeW1"
           "\n inc1 = %d, inc2 = %d\n", inc1, inc2) ;
   exit(-1) ;
}
nops  = 0.0 ;
if ( A2_IS_REAL(mtxA) ) {
   int      irow, jcol ;

   if ( inc1 == 1 ) {
      double   sums[3] ;
      double   *colA0, *colA1, *colA2 ;
/*
      ----------------------------
      A is column major, 
      compute W(j) = H0^T * A(*,j)
      ----------------------------
*/
      for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
         colA0 = A2_column(mtxA, jcol)   ;
         colA1 = A2_column(mtxA, jcol+1) ;
         colA2 = A2_column(mtxA, jcol+2) ;
         DVdot13(nrowA, H0, colA0, colA1, colA2, sums) ;
         W0[jcol]   = sums[0] ;
         W0[jcol+1] = sums[1] ;
         W0[jcol+2] = sums[2] ;
         nops += 6*nrowA ;
      }
      if ( jcol == ncolA - 2 ) {
         colA0 = A2_column(mtxA, jcol)   ;
         colA1 = A2_column(mtxA, jcol+1) ;
         DVdot12(nrowA, H0, colA0, colA1, sums) ;
         W0[jcol]   = sums[0] ;
         W0[jcol+1] = sums[1] ;
         nops += 4*nrowA ;
      } else if ( jcol == ncolA - 1 ) {
         colA0 = A2_column(mtxA, jcol)   ;
         DVdot11(nrowA, H0, colA0, sums) ;
         W0[jcol] = sums[0] ;
         nops += 2*nrowA ;
      }
   } else {
      double   alpha[3] ;
      double   *rowA0, *rowA1, *rowA2 ;
/*
      -------------------------------
      A is row major
      compute W := W + H0(j) * A(j,*)
      -------------------------------
*/
      DVzero(ncolA, W0) ;
      for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         rowA2 = A2_row(mtxA, irow+2) ;
         alpha[0] = H0[irow]   ; 
         alpha[1] = H0[irow+1] ; 
         alpha[2] = H0[irow+2] ; 
         DVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ;
         nops += 6*ncolA ;
      }
      if ( irow == nrowA - 2 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         alpha[0] = H0[irow]   ; 
         alpha[1] = H0[irow+1] ; 
         DVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ;
         nops += 4*ncolA ;
      } else if ( irow == nrowA - 1 ) {
         rowA0 = A2_row(mtxA, irow) ;
         alpha[0] = H0[irow]   ; 
         DVaxpy11(ncolA, W0, alpha, rowA0) ;
         nops += 2*ncolA ;
      }
   }
} else if ( A2_IS_COMPLEX(mtxA) ) {
   int      irow, jcol ;

   if ( inc1 == 1 ) {
      double   sums[6] ;
      double   *colA0, *colA1, *colA2 ;
/*
      ----------------------------
      A is column major
      compute W(j) = H0^H * A(*,j)
      ----------------------------
*/
      for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
         colA0 = A2_column(mtxA, jcol)   ;
         colA1 = A2_column(mtxA, jcol+1) ;
         colA2 = A2_column(mtxA, jcol+2) ;
         ZVdotC13(nrowA, H0, colA0, colA1, colA2, sums) ;
         W0[2*jcol]     = sums[0] ; W0[2*jcol+1]     = sums[1] ;
         W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ;
         W0[2*(jcol+2)] = sums[4] ; W0[2*(jcol+2)+1] = sums[5] ;
         nops += 24*nrowA ;
      }
      if ( jcol == ncolA - 2 ) {
         colA0 = A2_column(mtxA, jcol)   ;
         colA1 = A2_column(mtxA, jcol+1) ;
         ZVdotC12(nrowA, H0, colA0, colA1, sums) ;
         W0[2*jcol]     = sums[0] ; W0[2*jcol+1]     = sums[1] ;
         W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ;
         nops += 16*nrowA ;
      } else if ( jcol == ncolA - 1 ) {
         colA0 = A2_column(mtxA, jcol)   ;
         ZVdotC11(nrowA, H0, colA0, sums) ;
         W0[2*jcol]     = sums[0] ; W0[2*jcol+1]     = sums[1] ;
         nops += 8*nrowA ;
      }
   } else {
      double   alpha[6] ;
      double   *rowA0, *rowA1, *rowA2 ;
/*
      ---------------------------------
      A is row major
      compute W := W + H0(j)^H * A(j,*)
      ---------------------------------
*/
      DVzero(2*ncolA, W0) ;
      for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         rowA2 = A2_row(mtxA, irow+2) ;
         alpha[0] = H0[2*irow]     ; alpha[1] = -H0[2*irow+1]   ; 
         alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ;
         alpha[4] = H0[2*(irow+2)] ; alpha[5] = -H0[2*(irow+2)+1] ;
         ZVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ;
         nops += 24*ncolA ;
      }
      if ( irow == nrowA - 2 ) {
         rowA0 = A2_row(mtxA, irow) ;
         rowA1 = A2_row(mtxA, irow+1) ;
         alpha[0] = H0[2*irow]     ; alpha[1] = -H0[2*irow+1]   ; 
         alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ;
         ZVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ;
         nops += 16*ncolA ;
      } else if ( irow == nrowA - 1 ) {
         rowA0 = A2_row(mtxA, irow) ;
         alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; 
         ZVaxpy11(ncolA, W0, alpha, rowA0) ;
         nops += 8*ncolA ;
      }
   }
}
return(nops) ; }