Beispiel #1
0
PetscErrorCode MatMultTranspose_SeqBSTRM_5(Mat A,Vec xx,Vec zz)
{
  Mat_SeqBAIJ     *a      = (Mat_SeqBAIJ*)A->data;
  Mat_SeqBSTRM    *sbstrm = (Mat_SeqBSTRM*)A->spptr;
  PetscScalar     zero    = 0.0;
  PetscScalar     *z      = 0;
  PetscScalar     *x,*xb;
  const MatScalar *v1, *v2, *v3, *v4, *v5;
  PetscScalar     x1, x2, x3, x4, x5;
  PetscErrorCode  ierr;
  PetscInt        mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j;
  PetscInt        nonzerorow=0;
  PetscInt        slen;

  PetscFunctionBegin;
  ierr = VecSet(zz,zero);CHKERRQ(ierr);
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&z);CHKERRQ(ierr);

  slen = 5*(ai[mbs]-ai[0]);

  v1 = sbstrm->as;
  v2 = v1 + slen;
  v3 = v2 + slen;
  v4 = v3 + slen;
  v5 = v4 + slen;

  xb = x;

  for (i=0; i<mbs; i++) {
    n           = ai[i+1] - ai[i];
    nonzerorow += (n>0);

    x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; xb += 5;
    ib = aj + ai[i];

    PetscPrefetchBlock(ib+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
    PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


    for (j=0; j<n; j++) {
      cval       = ib[j]*5;
      z[cval]   += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
      z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
      z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
      z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
      z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
      v1        += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
    }
  }
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
  ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #2
0
PetscErrorCode MatMult_SeqSBSTRM_5(Mat A,Vec xx,Vec zz)
{
  Mat_SeqSBAIJ    *a      = (Mat_SeqSBAIJ*)A->data;
  Mat_SeqSBSTRM   *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
  PetscScalar     zero    = 0.0;
  PetscScalar     *z      = 0;
  PetscScalar     *x,*xb;
  const MatScalar *v1, *v2, *v3, *v4, *v5;
  PetscScalar     x1, x2, x3, x4, x5;
  PetscScalar     xr1, xr2, xr3, xr4, xr5;
  PetscScalar     sum1, sum2, sum3, sum4, sum5;
  PetscErrorCode  ierr;
  PetscInt        mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
  PetscInt        nonzerorow=0;
  PetscInt        slen;

  PetscFunctionBegin;
  ierr = VecSet(zz,zero);CHKERRQ(ierr);
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&z);CHKERRQ(ierr);

  slen = 5*(ai[mbs]-ai[0]);

  v1 = sbstrm->as;
  v2 = v1 + slen;
  v3 = v2 + slen;
  v4 = v3 + slen;
  v5 = v4 + slen;

  xb = x;

  for (i=0; i<mbs; i++) {
    n  = ai[i+1] - ai[i];

    nonzerorow += (n>0);

    x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];

    sum1 = z[5*i];
    sum2 = z[5*i+1];
    sum3 = z[5*i+2];
    sum4 = z[5*i+3];
    sum5 = z[5*i+4];
    jmin = 0;
    ib   = aj + ai[i];
    if (*ib == i) {     /* (diag of A)*x */
      sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
      sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
      sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
      sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v4[4]*x5;
      sum5 += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;

      v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
      jmin++;
    }

    PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
    PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */

    for (j=jmin; j<n; j++) {
      cval       = ib[j]*5;
      z[cval]   += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
      z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
      z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
      z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
      z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;

      xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3]; xr5 = x[cval+4];

      sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4 + v1[4]*xr5;
      sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4 + v2[4]*xr5;
      sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4 + v3[4]*xr5;
      sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4 + v4[4]*xr5;
      sum5 += v5[0]*xr1 + v5[1]*xr2 + v5[2]*xr3 + v5[3]*xr4 + v5[4]*xr5;

      v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
    }
    z[5*i] = sum1; z[5*i+1] = sum2; z[5*i+2] = sum3; z[5*i+3] = sum4; z[5*i+4] = sum5;
    xb    += 5;
  }
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
  ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #3
0
PetscErrorCode MatMultAdd_SeqBSTRM_5(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
  Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*)A->spptr;
  PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
  PetscScalar    *yarray,*zarray;
  MatScalar      *v1,*v2,*v3,*v4,*v5;
  PetscErrorCode ierr;
  PetscInt       mbs     =a->mbs,i,*idx,*ii,j,n,*ridx=NULL;
  PetscBool      usecprow=a->compressedrow.use;
  PetscInt       slen;

  PetscFunctionBegin;
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
  if (zz != yy) {
    ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
  } else {
    zarray = yarray;
  }


  idx = a->j;
  if (usecprow) {
    if (zz != yy) {
      ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
    }
    mbs  = a->compressedrow.nrows;
    ii   = a->compressedrow.i;
    ridx = a->compressedrow.rindex;
  } else {
    ii = a->i;
    y  = yarray;
    z  = zarray;
  }

  slen = 5*(ii[mbs]-ii[0]);
  v1   = bstrm->as;
  v2   = v1 + slen;
  v3   = v2 + slen;
  v4   = v3 + slen;
  v5   = v4 + slen;


  for (i=0; i<mbs; i++) {
    n = ii[1] - ii[0]; ii++;
    if (usecprow) {
      z = zarray + 5*ridx[i];
      y = yarray + 5*ridx[i];
    }
    sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
    PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
    PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
    PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


    for (j=0; j<n; j++) {
      xb    = x + 5*(*idx++);
      x1    = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
      sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
      sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
      sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
      sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
      sum5 += v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
      v1   += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
    }
    z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
    if (!usecprow) {
      z += 5; y += 5;
    }
  }
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
  if (zz != yy) {
    ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
  }
  ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}