コード例 #1
0
void
MultiPlasticityLinearSystem::nrStep(const RankTwoTensor & stress, const std::vector<Real> & intnl_old, const std::vector<Real> & intnl, const std::vector<Real> & pm, const RankFourTensor & E_inv, const RankTwoTensor & delta_dp, RankTwoTensor & dstress, std::vector<Real> & dpm, std::vector<Real> & dintnl, const std::vector<bool> & active, std::vector<bool> & deactivated_due_to_ld)
{
  // Calculate RHS and Jacobian
  std::vector<Real> rhs;
  calculateRHS(stress, intnl_old, intnl, pm, delta_dp, rhs, active, true, deactivated_due_to_ld);

  std::vector<std::vector<Real> > jac;
  calculateJacobian(stress, intnl, pm, E_inv, active, deactivated_due_to_ld, jac);


  // prepare for LAPACKgesv_ routine provided by PETSc
  int system_size = rhs.size();

  std::vector<double> a(system_size*system_size);
  // Fill in the a "matrix" by going down columns
  unsigned ind = 0;
  for (int col = 0 ; col < system_size ; ++col)
    for (int row = 0 ; row < system_size ; ++row)
      a[ind++] = jac[row][col];

  int nrhs = 1;
  std::vector<int> ipiv(system_size);
  int info;
  LAPACKgesv_(&system_size, &nrhs, &a[0], &system_size, &ipiv[0], &rhs[0], &system_size, &info);

  if (info != 0)
    mooseError("In solving the linear system in a Newton-Raphson process, the PETSC LAPACK gsev routine returned with error code " << info);



  // Extract the results back to dstress, dpm and dintnl
  std::vector<bool> active_not_deact(_num_surfaces);
  for (unsigned surface = 0 ; surface < _num_surfaces ; ++surface)
    active_not_deact[surface] = (active[surface] && !deactivated_due_to_ld[surface]);

  unsigned int dim = 3;
  ind = 0;

  for (unsigned i = 0 ; i < dim ; ++i)
    for (unsigned j = 0 ; j <= i ; ++j)
      dstress(i, j) = dstress(j, i) = rhs[ind++];
  dpm.assign(_num_surfaces, 0);
  for (unsigned surface = 0 ; surface < _num_surfaces ; ++surface)
    if (active_not_deact[surface])
      dpm[surface] = rhs[ind++];
  dintnl.assign(_num_models, 0);
  for (unsigned model = 0 ; model < _num_models ; ++model)
    if (anyActiveSurfaces(model, active_not_deact))
      dintnl[model] = rhs[ind++];

  mooseAssert(static_cast<int>(ind) == system_size, "Incorrect extracting of changes from NR solution in nrStep");
}
コード例 #2
0
void
FiniteStrainPlasticBase::nrStep(const RankTwoTensor & stress, const std::vector<Real> & intnl_old, const std::vector<Real> & intnl, const std::vector<Real> & pm, const RankFourTensor & E_inv, const RankTwoTensor & delta_dp, RankTwoTensor & dstress, std::vector<Real> & dpm, std::vector<Real> & dintnl)
{
  // Calculate RHS and Jacobian
  std::vector<Real> rhs;
  calculateRHS(stress, intnl_old, intnl, pm, delta_dp, rhs);

  std::vector<std::vector<Real> > jac;
  calculateJacobian(stress, intnl, pm, E_inv, jac);

  // prepare for LAPACKgesv_ routine provided by PETSc (at least since PETSc 3.0.0)
  int system_size = rhs.size();

  std::vector<double> a(system_size*system_size);
  // Fill in the a "matrix" by going down columns
  unsigned ind = 0;
  for (int col = 0 ; col < system_size ; ++col)
    for (int row = 0 ; row < system_size ; ++row)
      a[ind++] = jac[row][col];

  int nrhs = 1;
  std::vector<int> ipiv(system_size);
  int info;
  LAPACKgesv_(&system_size, &nrhs, &a[0], &system_size, &ipiv[0], &rhs[0], &system_size, &info);

  if (info != 0)
    mooseError("In solving the linear system in a Newton-Raphson process, the PETSC LAPACK gsev routine returned with error code " << info);

  // Extract the results back to dstress, dpm and dintnl
  unsigned int dim = 3;
  ind = 0;
  for (unsigned i = 0 ; i < dim ; ++i)
    for (unsigned j = 0 ; j <= i ; ++j)
      dstress(i, j) = dstress(j, i) = rhs[ind++];
  dpm.resize(numberOfYieldFunctions());
  for (unsigned alpha = 0 ; alpha < numberOfYieldFunctions() ; ++alpha)
    dpm[alpha] = rhs[ind++];
  dintnl.resize(numberOfInternalParameters());
  for (unsigned a = 0 ; a < numberOfInternalParameters() ; ++a)
    dintnl[a] = rhs[ind++];
}
コード例 #3
0
ファイル: geo.c プロジェクト: 00liujj/petsc
static PetscErrorCode triangulateAndFormProl(IS  selected_2, /* list of selected local ID, includes selected ghosts */
                                             const PetscInt data_stride,
                                             const PetscReal coords[], /* column vector of local coordinates w/ ghosts */
                                             const PetscInt nselected_1, /* list of selected local ID, includes selected ghosts */
                                             const PetscInt clid_lid_1[],
                                             const PetscCoarsenData *agg_lists_1, /* selected_1 vertices of aggregate unselected vertices */
                                             const PetscInt crsGID[],
                                             const PetscInt bs,
                                             Mat a_Prol, /* prolongation operator (output) */
                                             PetscReal *a_worst_best) /* measure of worst missed fine vertex, 0 is no misses */
{
#if defined(PETSC_HAVE_TRIANGLE)
  PetscErrorCode       ierr;
  PetscInt             jj,tid,tt,idx,nselected_2;
  struct triangulateio in,mid;
  const PetscInt       *selected_idx_2;
  PetscMPIInt          rank,size;
  PetscInt             Istart,Iend,nFineLoc,myFine0;
  int                  kk,nPlotPts,sid;
  MPI_Comm             comm;
  PetscReal            tm;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr);
  if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */
    *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
  } else *a_worst_best = 0.0;
  ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr);
  if (tm > 0.0) {
    *a_worst_best = 100.0;
    PetscFunctionReturn(0);
  }
  ierr     = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr);
  nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs;
  nPlotPts = nFineLoc; /* locals */
  /* traingle */
  /* Define input points - in*/
  in.numberofpoints          = nselected_2;
  in.numberofpointattributes = 0;
  /* get nselected points */
  ierr = PetscMalloc1(2*(nselected_2), &in.pointlist);CHKERRQ(ierr);
  ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);

  for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) {
    PetscInt lid = selected_idx_2[kk];
    in.pointlist[sid]   = coords[lid];
    in.pointlist[sid+1] = coords[data_stride + lid];
    if (lid>=nFineLoc) nPlotPts++;
  }
  if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2);

  in.numberofsegments      = 0;
  in.numberofedges         = 0;
  in.numberofholes         = 0;
  in.numberofregions       = 0;
  in.trianglelist          = 0;
  in.segmentmarkerlist     = 0;
  in.pointattributelist    = 0;
  in.pointmarkerlist       = 0;
  in.triangleattributelist = 0;
  in.trianglearealist      = 0;
  in.segmentlist           = 0;
  in.holelist              = 0;
  in.regionlist            = 0;
  in.edgelist              = 0;
  in.edgemarkerlist        = 0;
  in.normlist              = 0;

  /* triangulate */
  mid.pointlist = 0;            /* Not needed if -N switch used. */
  /* Not needed if -N switch used or number of point attributes is zero: */
  mid.pointattributelist = 0;
  mid.pointmarkerlist    = 0; /* Not needed if -N or -B switch used. */
  mid.trianglelist       = 0;    /* Not needed if -E switch used. */
  /* Not needed if -E switch used or number of triangle attributes is zero: */
  mid.triangleattributelist = 0;
  mid.neighborlist          = 0; /* Needed only if -n switch used. */
  /* Needed only if segments are output (-p or -c) and -P not used: */
  mid.segmentlist = 0;
  /* Needed only if segments are output (-p or -c) and -P and -B not used: */
  mid.segmentmarkerlist = 0;
  mid.edgelist          = 0;    /* Needed only if -e switch used. */
  mid.edgemarkerlist    = 0; /* Needed if -e used and -B not used. */
  mid.numberoftriangles = 0;

  /* Triangulate the points.  Switches are chosen to read and write a  */
  /*   PSLG (p), preserve the convex hull (c), number everything from  */
  /*   zero (z), assign a regional attribute to each element (A), and  */
  /*   produce an edge list (e), a Voronoi diagram (v), and a triangle */
  /*   neighbor list (n).                                            */
  if (nselected_2 != 0) { /* inactive processor */
    char args[] = "npczQ"; /* c is needed ? */
    triangulate(args, &in, &mid, (struct triangulateio*) NULL);
    /* output .poly files for 'showme' */
    if (!PETSC_TRUE) {
      static int level = 1;
      FILE       *file; char fname[32];

      sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w");
      /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
      fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }
      /*One line: <# of segments> <# of boundary markers (0 or 1)> */
      fprintf(file, "%d  %d\n",0,0);
      /*Following lines: <segment #> <endpoint> <endpoint> [boundary marker] */
      /* One line: <# of holes> */
      fprintf(file, "%d\n",0);
      /* Following lines: <hole #> <x> <y> */
      /* Optional line: <# of regional attributes and/or area constraints> */
      /* Optional following lines: <region #> <x> <y> <attribute> <maximum area> */
      fclose(file);

      /* elems */
      sprintf(fname,"C%d_%d.ele",level,rank); file = fopen(fname, "w");
      /* First line: <# of triangles> <nodes per triangle> <# of attributes> */
      fprintf(file, "%d %d %d\n",mid.numberoftriangles,3,0);
      /* Remaining lines: <triangle #> <node> <node> <node> ... [attributes] */
      for (kk=0,sid=0; kk<mid.numberoftriangles; kk++,sid += 3) {
        fprintf(file, "%d %d %d %d\n",kk,mid.trianglelist[sid],mid.trianglelist[sid+1],mid.trianglelist[sid+2]);
      }
      fclose(file);

      sprintf(fname,"C%d_%d.node",level,rank); file = fopen(fname, "w");
      /* First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)> */
      /* fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0); */
      fprintf(file, "%d  %d  %d  %d\n",nPlotPts,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid+=2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }

      sid /= 2;
      for (jj=0; jj<nFineLoc; jj++) {
        PetscBool sel = PETSC_TRUE;
        for (kk=0; kk<nselected_2 && sel; kk++) {
          PetscInt lid = selected_idx_2[kk];
          if (lid == jj) sel = PETSC_FALSE;
        }
        if (sel) fprintf(file, "%d %e %e\n",sid++,coords[jj],coords[data_stride + jj]);
      }
      fclose(file);
      if (sid != nPlotPts) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != nPlotPts %D",sid,nPlotPts);
      level++;
    }
  }
#if defined PETSC_GAMG_USE_LOG
  ierr = PetscLogEventBegin(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr);
#endif
  { /* form P - setup some maps */
    PetscInt clid,mm,*nTri,*node_tri;

    ierr = PetscMalloc1(nselected_2, &node_tri);CHKERRQ(ierr);
    ierr = PetscMalloc1(nselected_2, &nTri);CHKERRQ(ierr);

    /* need list of triangles on node */
    for (kk=0; kk<nselected_2; kk++) nTri[kk] = 0;
    for (tid=0,kk=0; tid<mid.numberoftriangles; tid++) {
      for (jj=0; jj<3; jj++) {
        PetscInt cid = mid.trianglelist[kk++];
        if (nTri[cid] == 0) node_tri[cid] = tid;
        nTri[cid]++;
      }
    }
#define EPS 1.e-12
    /* find points and set prolongation */
    for (mm = clid = 0; mm < nFineLoc; mm++) {
      PetscBool ise;
      ierr = PetscCDEmptyAt(agg_lists_1,mm,&ise);CHKERRQ(ierr);
      if (!ise) {
        const PetscInt lid = mm;
        /* for (clid_iterator=0;clid_iterator<nselected_1;clid_iterator++) { */
        PetscScalar  AA[3][3];
        PetscBLASInt N=3,NRHS=1,LDA=3,IPIV[3],LDB=3,INFO;
        PetscCDPos   pos;
        ierr = PetscCDGetHeadPos(agg_lists_1,lid,&pos);CHKERRQ(ierr);
        while (pos) {
          PetscInt flid;
          ierr = PetscLLNGetID(pos, &flid);CHKERRQ(ierr);
          ierr = PetscCDGetNextPos(agg_lists_1,lid,&pos);CHKERRQ(ierr);

          if (flid < nFineLoc) {  /* could be a ghost */
            PetscInt       bestTID = -1; PetscReal best_alpha = 1.e10;
            const PetscInt fgid    = flid + myFine0;
            /* compute shape function for gid */
            const PetscReal fcoord[3] = {coords[flid],coords[data_stride+flid],1.0};
            PetscBool       haveit    =PETSC_FALSE; PetscScalar alpha[3]; PetscInt clids[3];

            /* look for it */
            for (tid = node_tri[clid], jj=0;
                 jj < 5 && !haveit && tid != -1;
                 jj++) {
              for (tt=0; tt<3; tt++) {
                PetscInt cid2 = mid.trianglelist[3*tid + tt];
                PetscInt lid2 = selected_idx_2[cid2];
                AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                clids[tt] = cid2; /* store for interp */
              }

              for (tt=0; tt<3; tt++) alpha[tt] = (PetscScalar)fcoord[tt];

              /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
              PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
              {
                PetscBool have=PETSC_TRUE;  PetscReal lowest=1.e10;
                for (tt = 0, idx = 0; tt < 3; tt++) {
                  if (PetscRealPart(alpha[tt]) > (1.0+EPS) || PetscRealPart(alpha[tt]) < -EPS) have = PETSC_FALSE;
                  if (PetscRealPart(alpha[tt]) < lowest) {
                    lowest = PetscRealPart(alpha[tt]);
                    idx    = tt;
                  }
                }
                haveit = have;
              }
              tid = mid.neighborlist[3*tid + idx];
            }

            if (!haveit) {
              /* brute force */
              for (tid=0; tid<mid.numberoftriangles && !haveit; tid++) {
                for (tt=0; tt<3; tt++) {
                  PetscInt cid2 = mid.trianglelist[3*tid + tt];
                  PetscInt lid2 = selected_idx_2[cid2];
                  AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                  clids[tt] = cid2; /* store for interp */
                }
                for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt];
                /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
                PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
                {
                  PetscBool have=PETSC_TRUE;  PetscReal worst=0.0, v;
                  for (tt=0; tt<3 && have; tt++) {
                    if (PetscRealPart(alpha[tt]) > 1.0+EPS || PetscRealPart(alpha[tt]) < -EPS) have=PETSC_FALSE;
                    if ((v=PetscAbs(PetscRealPart(alpha[tt])-0.5)) > worst) worst = v;
                  }
                  if (worst < best_alpha) {
                    best_alpha = worst; bestTID = tid;
                  }
                  haveit = have;
                }
              }
            }
            if (!haveit) {
              if (best_alpha > *a_worst_best) *a_worst_best = best_alpha;
              /* use best one */
              for (tt=0; tt<3; tt++) {
                PetscInt cid2 = mid.trianglelist[3*bestTID + tt];
                PetscInt lid2 = selected_idx_2[cid2];
                AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                clids[tt] = cid2; /* store for interp */
              }
              for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt];
              /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
              PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
            }

            /* put in row of P */
            for (idx=0; idx<3; idx++) {
              PetscScalar shp = alpha[idx];
              if (PetscAbs(PetscRealPart(shp)) > 1.e-6) {
                PetscInt cgid = crsGID[clids[idx]];
                PetscInt jj   = cgid*bs, ii = fgid*bs; /* need to gloalize */
                for (tt=0; tt < bs; tt++, ii++, jj++) {
                  ierr = MatSetValues(a_Prol,1,&ii,1,&jj,&shp,INSERT_VALUES);CHKERRQ(ierr);
                }
              }
            }
          }
        } /* aggregates iterations */
        clid++;
      } /* a coarse agg */
    } /* for all fine nodes */

    ierr = ISRestoreIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);
    ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

    ierr = PetscFree(node_tri);CHKERRQ(ierr);
    ierr = PetscFree(nTri);CHKERRQ(ierr);
  }
#if defined PETSC_GAMG_USE_LOG
  ierr = PetscLogEventEnd(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr);
#endif
  free(mid.trianglelist);
  free(mid.neighborlist);
  ierr = PetscFree(in.pointlist);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#else
  SETERRQ(PetscObjectComm((PetscObject)a_Prol),PETSC_ERR_PLIB,"configure with TRIANGLE to use geometric MG");
#endif
}
コード例 #4
0
PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds)
{
#if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscBLASInt   n,ld,ld2,*ipiv,info,inc=1;
  PetscInt       j,k,odd;
  const PetscInt p=MAX_PADE;
  PetscReal      c[MAX_PADE+1],s;
  PetscScalar    scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
  PetscScalar    *A,*A2,*Q,*P,*W,*aux;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ld2 = ld*ld;
  ierr = DSAllocateWork_Private(ds,0,ld,ld);CHKERRQ(ierr);
  ipiv = ds->iwork;
  if (!ds->mat[DS_MAT_W]) { ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); }
  if (!ds->mat[DS_MAT_Z]) { ierr = DSAllocateMat_Private(ds,DS_MAT_Z);CHKERRQ(ierr); }
  A  = ds->mat[DS_MAT_A];
  A2 = ds->mat[DS_MAT_Z];
  Q  = ds->mat[DS_MAT_Q];
  P  = ds->mat[DS_MAT_F];
  W  = ds->mat[DS_MAT_W];

  /* Pade' coefficients */
  c[0] = 1.0;
  for (k=1;k<=p;k++) {
    c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
  }

  /* Scaling */
  s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork);
  if (s>0.5) {
    s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2);
    scale = PetscPowReal(2.0,(-1)*s);
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc));
  }

  /* Horner evaluation */
  PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld));
  ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = PetscMemzero(P,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  for (j=0;j<n;j++) {
    Q[j+j*ld] = c[p];
    P[j+j*ld] = c[p-1];
  }

  odd = 1;
  for (k=p-1;k>0;k--) {
    if (odd==1) {
      PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
      aux = Q;
      Q = W;
      W = aux;
      for (j=0;j<n;j++)
        Q[j+j*ld] = Q[j+j*ld] + c[k-1];
    } else {
      PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
      aux = P;
      P = W;
      W = aux;
      for (j=0;j<n;j++)
        P[j+j*ld] = P[j+j*ld] + c[k-1];
    }
    odd = 1-odd;
  }
  if (odd==1) {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld));
    aux = Q;
    Q = W;
    W = aux;
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
    for (j=0;j<n;j++)
      P[j+j*ld] = P[j+j*ld] + 1.0;
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
  } else {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld));
    aux = P;
    P = W;
    W = aux;
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
    for (j=0;j<n;j++)
      P[j+j*ld] = P[j+j*ld] + 1.0;
  }

  for (k=1;k<=s;k++) {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
    ierr = PetscMemcpy(P,W,ld2*sizeof(PetscScalar));CHKERRQ(ierr);
  }
  if (P!=ds->mat[DS_MAT_F]) {
    ierr = PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
#endif
}