Ejemplo n.º 1
0
static PetscErrorCode KSPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
{
  PetscScalar *hh,*cc,*ss,tt;
  PetscInt    j;
  KSP_GMRES   *gmres = (KSP_GMRES*)(ksp->data);

  PetscFunctionBegin;
  hh = HH(0,it);
  cc = CC(0);
  ss = SS(0);

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  for (j=1; j<=it; j++) {
    tt  = *hh;
    *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
    hh++;
    *hh = *cc++ * *hh - (*ss++ * tt);
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system
     2) the new column of the Hessenberg matrix
    thus obtaining the updated value of the residual
  */
  if (!hapend) {
    tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
    if (tt == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }
    *cc        = *hh / tt;
    *ss        = *(hh+1) / tt;
    *GRS(it+1) = -(*ss * *GRS(it));
    *GRS(it)   = PetscConj(*cc) * *GRS(it);
    *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);
    *res       = PetscAbsScalar(*GRS(it+1));
  } else {
    /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (GRS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Ejemplo n.º 2
0
static PetscErrorCode KSPGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
{
  PetscScalar    tt;
  PetscErrorCode ierr;
  PetscInt       ii,k,j;
  KSP_GMRES      *gmres = (KSP_GMRES*)(ksp->data);

  PetscFunctionBegin;
  /* Solve for solution vector that minimizes the residual */

  /* If it is < 0, no gmres steps have been performed */
  if (it < 0) {
    ierr = VecCopy(vs,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */
    PetscFunctionReturn(0);
  }
  if (*HH(it,it) != 0.0) {
    nrs[it] = *GRS(it) / *HH(it,it);
  } else {
    ksp->reason = KSP_DIVERGED_BREAKDOWN;

    ierr = PetscInfo2(ksp,"Likely your matrix or preconditioner is singular. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }
  for (ii=1; ii<=it; ii++) {
    k  = it - ii;
    tt = *GRS(k);
    for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
    if (*HH(k,k) == 0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;

      ierr = PetscInfo1(ksp,"Likely your matrix or preconditioner is singular. HH(k,k) is identically zero; k = %D",k);CHKERRQ(ierr);
      PetscFunctionReturn(0);
    }
    nrs[k] = tt / *HH(k,k);
  }

  /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
  ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);
  ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr);

  ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr);
  /* add solution to previous solution */
  if (vdest != vs) {
    ierr = VecCopy(vs,vdest);CHKERRQ(ierr);
  }
  ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Ejemplo n.º 3
0
PetscErrorCode KSPGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_GMRES      *gmres = (KSP_GMRES*)(ksp->data);
  PetscReal      res_norm,res,hapbnd,tt;
  PetscErrorCode ierr;
  PetscInt       it     = 0, max_k = gmres->max_k;
  PetscBool      hapend = PETSC_FALSE;

  PetscFunctionBegin;
  ierr    = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr);
  res     = res_norm;
  *GRS(0) = res_norm;

  /* check for the convergence */
  ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->rnorm = res;
  ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  gmres->it  = (it - 1);
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  if (!res) {
    if (itcount) *itcount = 0;
    ksp->reason = KSP_CONVERGED_ATOL;
    ierr        = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
    if (it) {
      ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
      ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
    }
    gmres->it = (it - 1);
    if (gmres->vv_allocated <= it + VEC_OFFSET + 1) {
      ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr);
    }
    ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);CHKERRQ(ierr);

    /* update hessenberg matrix and do Gram-Schmidt */
    ierr = (*gmres->orthog)(ksp,it);CHKERRQ(ierr);

    /* vv(i+1) . vv(i+1) */
    ierr = VecNormalize(VEC_VV(it+1),&tt);CHKERRQ(ierr);

    /* save the magnitude */
    *HH(it+1,it)  = tt;
    *HES(it+1,it) = tt;

    /* check for the happy breakdown */
    hapbnd = PetscAbsScalar(tt / *GRS(it));
    if (hapbnd > gmres->haptol) hapbnd = gmres->haptol;
    if (tt < hapbnd) {
      ierr   = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %14.12e tt = %14.12e\n",(double)hapbnd,(double)tt);CHKERRQ(ierr);
      hapend = PETSC_TRUE;
    }
    ierr = KSPGMRESUpdateHessenberg(ksp,it,hapend,&res);CHKERRQ(ierr);

    it++;
    gmres->it = (it-1);   /* For converged */
    ksp->its++;
    ksp->rnorm = res;
    if (ksp->reason) break;

    ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

    /* Catch error in happy breakdown and signal convergence and break from loop */
    if (hapend) {
      if (!ksp->reason) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }
    }
  }

  /* Monitor if we know that we will not return for a restart */
  if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
    ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  }

  if (itcount) *itcount = it;


  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */
  /* Form the solution (or the solution so far) */
  ierr = KSPGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Ejemplo n.º 4
0
    std::string DisassemblyTR3200 (const VComputer& vc, dword_t pc) {
#define BUF_SIZE (32)
      char buf[BUF_SIZE] = {0};

      dword_t inst = vc.ReadDW(pc); // Fetch
      pc = pc +4;

      dword_t opcode, rd, rn, rs;
      rd = GRD(inst);
      rs = GRS(inst);
      // Here beging the Decoding
      bool literal = HAVE_LITERAL(inst);
      opcode = GET_OP_CODE(inst);

      if (IS_P3(inst)) {
        // 3 parameter instruction ********************************************
        if (literal) {
          rn = LIT15(inst);
          if (IS_BIG_LITERAL_L15(rn)) { // Next dword is literal value
            rn = vc.ReadDW(pc);
            pc +=4;
          } else if (RN_SIGN_BIT(inst)) { // Negative Literal -> Extend sign
            rn |= 0xFFFF8000;
          }
        } else {
          rn = GRN(inst);
        }

        switch (opcode) {
          case P3_OPCODE::AND :
            if (literal)
              snprintf(buf, BUF_SIZE, "AND %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "AND %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::OR :
            if (literal)
              snprintf(buf, BUF_SIZE, "OR %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "OR %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::XOR :
            if (literal)
              snprintf(buf, BUF_SIZE, "XOR %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "XOR %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::BITC :
            if (literal)
              snprintf(buf, BUF_SIZE, "BITC %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "BITC %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::ADD :
            if (literal)
              snprintf(buf, BUF_SIZE, "ADD %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "ADD %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::ADDC :
            if (literal)
              snprintf(buf, BUF_SIZE, "ADDC %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "ADDC %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::SUB :
            if (literal)
              snprintf(buf, BUF_SIZE, "SUB %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "SUB %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::SUBB :
            if (literal)
              snprintf(buf, BUF_SIZE, "SUBB %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "SUBB %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::RSB :
            if (literal)
              snprintf(buf, BUF_SIZE, "RSB %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "RSB %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::RSBB :
            if (literal)
              snprintf(buf, BUF_SIZE, "RSBB %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "RSBB %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::LLS :
            if (literal)
              snprintf(buf, BUF_SIZE, "LLS %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "LLS %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::RLS :
            if (literal)
              snprintf(buf, BUF_SIZE, "RLS %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "RLS %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::ARS :
            if (literal)
              snprintf(buf, BUF_SIZE, "ARS %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "ARS %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::ROTL :
            if (literal)
              snprintf(buf, BUF_SIZE, "ROTL %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "ROTL %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::ROTR :
            if (literal)
              snprintf(buf, BUF_SIZE, "ROTR %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "ROTR %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;


          case P3_OPCODE::MUL :
            if (literal)
              snprintf(buf, BUF_SIZE, "MUL %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "MUL %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::SMUL :
            if (literal)
              snprintf(buf, BUF_SIZE, "SMUL %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "SMUL %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::DIV :
            if (literal)
              snprintf(buf, BUF_SIZE, "DIV %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "DIV %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;

          case P3_OPCODE::SDIV :
            if (literal)
              snprintf(buf, BUF_SIZE, "SDIV %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "SDIV %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;


          case P3_OPCODE::LOAD :
            if (literal)
              snprintf(buf, BUF_SIZE, "LOAD %%r%u, [%%r%u + 0x%08X]",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "LOAD %%r%u, [%%r%u + %%r%u]", rd, rs, rn);
            break;

          case P3_OPCODE::LOADW :
            if (literal)
              snprintf(buf, BUF_SIZE, "LOADW %%r%u, [%%r%u + 0x%08X]",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "LOADW %%r%u, [%%r%u + %%r%u]", rd, rs, rn);
            break;

          case P3_OPCODE::LOADB :
            if (literal)
              snprintf(buf, BUF_SIZE, "LOADB %%r%u, [%%r%u + 0x%08X]",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "LOADB %%r%u, [%%r%u + %%r%u]", rd, rs, rn);
            break;

          case P3_OPCODE::STORE :
            if (literal)
              snprintf(buf, BUF_SIZE, "STORE [%%r%u + 0x%08X], %%r%u", rs, rn, rd);
            else
              snprintf(buf, BUF_SIZE, "STORE [%%r%u + %%r%u], %%r%u",rs, rn, rd);
            break;

          case P3_OPCODE::STOREW :
            if (literal)
              snprintf(buf, BUF_SIZE, "STOREW [%%r%u + 0x%08X], %%r%u", rs, rn, rd);
            else
              snprintf(buf, BUF_SIZE, "STOREW [%%r%u + %%r%u], %%r%u", rs, rn, rd);
            break;

          case P3_OPCODE::STOREB :
            if (literal)
              snprintf(buf, BUF_SIZE, "STOREB [%%r%u + 0x%08X], %%r%u", rs, rn, rd);
            else
              snprintf(buf, BUF_SIZE, "STOREB [%%r%u + %%r%u], %%r%u", rs, rn, rd);
            break;


          default:
            if (literal)
              snprintf(buf, BUF_SIZE, "???? %%r%u, %%r%u, 0x%08X",  rd, rs, rn);
            else
              snprintf(buf, BUF_SIZE, "???? %%r%u, %%r%u, %%r%u", rd, rs, rn);
            break;
        }
      } else if (IS_P2(inst)) {
        // Fetch Rn operand
        if (literal) {
          rn = LIT19(inst);
          if (IS_BIG_LITERAL_L19(rn)) { // Next dword is literal value
            rn = vc.ReadDW(pc);
            pc +=4;
          } else if (RN_SIGN_BIT(inst)) { // Negative Literal -> Extend sign
            rn |= 0xFFF80000;
          }
        } else {
          rn = GRS(inst);
        }

        switch (opcode) {
          case P2_OPCODE::MOV :
            if (literal)
              snprintf(buf, BUF_SIZE, "MOV %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "MOV %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::SWP :
            if (literal)
              snprintf(buf, BUF_SIZE, "SWP %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "SWP %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::SIGXB :
            if (literal)
              snprintf(buf, BUF_SIZE, "SIGXB %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "SIGXB %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::SIGXW :
            if (literal)
              snprintf(buf, BUF_SIZE, "SIGXW %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "SIGXW %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::NOT :
            if (literal)
              snprintf(buf, BUF_SIZE, "NOT %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "NOT %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::LOAD2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "LOAD %%r%u, [0x%08X]",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "LOAD %%r%u, [%%r%u]", rd,  rn);
            break;

          case P2_OPCODE::LOADW2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "LOADW %%r%u, [0x%08X]",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "LOADW %%r%u, [%%r%u]", rd, rn);
            break;

          case P2_OPCODE::LOADB2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "LOADB %%r%u, [0x%08X]",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "LOADB %%r%u, [%%r%u]", rd, rn);
            break;

          case P2_OPCODE::STORE2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "STORE [0x%08X], %%r%u",  rn, rd);
            else
              snprintf(buf, BUF_SIZE, "STORE [%%r%u], %%r%u", rn, rd);
            break;

          case P2_OPCODE::STOREW2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "STOREW [0x%08X], %%r%u", rn, rd);
            else
              snprintf(buf, BUF_SIZE, "STOREW [%%r%u], %%r%u", rn, rd);
            break;

          case P2_OPCODE::STOREB2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "STOREB [0x%08X], %%r%u", rn, rd);
            else
              snprintf(buf, BUF_SIZE, "STOREB [%%r%u], %%r%u", rn, rd);
            break;


          case P2_OPCODE::IFEQ :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFEQ %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFEQ %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFNEQ :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFNEQ %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFNEQ %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFL :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFL %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFL %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFSL :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFSL %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFSL %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFLE :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFLE %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFLE %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFSLE :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFSLE %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFSLE %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFBITS :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFBITS %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFBITS %%r%u, %%r%u", rd, rn);
            break;

          case P2_OPCODE::IFCLEAR :
            if (literal)
              snprintf(buf, BUF_SIZE, "IFCLEAR %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "IFCLEAR %%r%u, %%r%u", rd, rn);
            break;


          case P2_OPCODE::JMP2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "JMP %%r%u + 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "JMP %%r%u + %%r%u", rd, rn);
            break;

          case P2_OPCODE::CALL2 :
            if (literal)
              snprintf(buf, BUF_SIZE, "CALL %%r%u + 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "CALL %%r%u + %%r%u", rd, rn);
            break;


          default:
            if (literal)
              snprintf(buf, BUF_SIZE, "???? %%r%u, 0x%08X",  rd, rn);
            else
              snprintf(buf, BUF_SIZE, "???? %%r%u, %%r%u", rd, rn);
            break;
        }

      } else if (IS_P1(inst)) {
        // 1 parameter instrucction *******************************************
        // Fetch Rn operand
        if (literal) {
          rn = LIT23(inst);
          if (IS_BIG_LITERAL_L23(rn)) { // Next dword is literal value
            rn = vc.ReadDW(pc);
            pc +=4;
          } else if (RN_SIGN_BIT(inst)) { // Negative Literal -> Extend sign
            rn |= 0xFF800000;
          }
        } else {
          rn = GRD(inst);
        }

        switch (opcode) {
          case P1_OPCODE::XCHGB :
            if (literal)
              snprintf(buf, BUF_SIZE, "XCHGB? 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "XCHGB %%r%u", rn);
            break;

          case P1_OPCODE::XCHGW :
            if (literal)
              snprintf(buf, BUF_SIZE, "XCHGW? 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "XCHGW %%r%u", rn);
            break;

          case P1_OPCODE::GETPC :
            if (literal)
              snprintf(buf, BUF_SIZE, "GETPC? 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "GETPC %%r%u", rn);
            break;


          case P1_OPCODE::POP :
            if (literal)
              snprintf(buf, BUF_SIZE, "POP? 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "POP %%r%u", rn);
            break;

          case P1_OPCODE::PUSH :
            if (literal)
              snprintf(buf, BUF_SIZE, "PUSH 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "PUSH %%r%u", rn);
            break;


          case P1_OPCODE::JMP :
            if (literal)
              snprintf(buf, BUF_SIZE, "JMP 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "JMP %%r%u", rn);
            break;

          case P1_OPCODE::CALL :
            if (literal)
              snprintf(buf, BUF_SIZE, "CALL 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "CALL %%r%u", rn);
            break;

          case P1_OPCODE::RJMP :
            if (literal)
              snprintf(buf, BUF_SIZE, "JMP %%pc +0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "JMP %%pc +%%r%u", rn);
            break;

          case P1_OPCODE::RCALL :
            if (literal)
              snprintf(buf, BUF_SIZE, "CALL %%pc +0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "CALL %%pc +%%r%u", rn);
            break;

          case P1_OPCODE::INT :
            if (literal)
              snprintf(buf, BUF_SIZE, "INT %08Xh", rn);
            else
              snprintf(buf, BUF_SIZE, "INT %%r%u", rn);
            break;



          default:
            if (literal)
              snprintf(buf, BUF_SIZE, "???? 0x%08X",  rn);
            else
              snprintf(buf, BUF_SIZE, "???? %%r%u", rn);
            break;
        }
      } else {
        // 0 parameter instrucction *******************************************

        switch (opcode) {
          case NP_OPCODE::SLEEP :
            snprintf(buf, BUF_SIZE, "SLEEP");
            break;

          case NP_OPCODE::RET :
            snprintf(buf, BUF_SIZE, "RET");
            break;

          case NP_OPCODE::RFI :
            snprintf(buf, BUF_SIZE, "RFI");
            break;

          default:
            snprintf(buf, BUF_SIZE, "????");
        }
      }


      std::string out(buf);
      return out;

#undef BUF_SIZE
    }
Ejemplo n.º 5
0
static PetscErrorCode KSPLGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
{
  PetscScalar *hh,*cc,*ss,tt;
  PetscInt    j;
  KSP_LGMRES  *lgmres = (KSP_LGMRES*)(ksp->data);

  PetscFunctionBegin;
  hh = HH(0,it);   /* pointer to beginning of column to update - so
                      incrementing hh "steps down" the (it+1)th col of HH*/
  cc = CC(0);      /* beginning of cosine rotations */
  ss = SS(0);      /* beginning of sine rotations */

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta) */

  for (j=1; j<=it; j++) {
    tt  = *hh;
    *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
    hh++;
    *hh = *cc++ * *hh - (*ss++ * tt);
    /* hh, cc, and ss have all been incremented one by end of loop */
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system (GRS)
        note: it affects GRS(it) and GRS(it+1)
     2) the new column of the Hessenberg matrix
        note: it affects HH(it,it) which is currently pointed to
        by hh and HH(it+1, it) (*(hh+1))
    thus obtaining the updated value of the residual...
  */

  /* compute new plane rotation */

  if (!hapend) {
    tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
    if (tt == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }
    *cc = *hh / tt;         /* new cosine value */
    *ss = *(hh+1) / tt;        /* new sine value */

    /* apply to 1) and 2) */
    *GRS(it+1) = -(*ss * *GRS(it));
    *GRS(it)   = PetscConj(*cc) * *GRS(it);
    *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);

    /* residual is the last element (it+1) of right-hand side! */
    *res = PetscAbsScalar(*GRS(it+1));

  } else { /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (GRS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Ejemplo n.º 6
0
static PetscErrorCode KSPLGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
{
  PetscScalar    tt;
  PetscErrorCode ierr;
  PetscInt       ii,k,j;
  KSP_LGMRES     *lgmres = (KSP_LGMRES*)(ksp->data);
  /*LGMRES_MOD */
  PetscInt it_arnoldi, it_aug;
  PetscInt jj, spot = 0;

  PetscFunctionBegin;
  /* Solve for solution vector that minimizes the residual */

  /* If it is < 0, no lgmres steps have been performed */
  if (it < 0) {
    ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */
    PetscFunctionReturn(0);
  }

  /* so (it+1) lgmres steps HAVE been performed */

  /* LGMRES_MOD - determine if we need to use augvecs for the soln  - do not assume that
     this is called after the total its allowed for an approx space */
  if (lgmres->approx_constant) {
    it_arnoldi = lgmres->max_k - lgmres->aug_ct;
  } else {
    it_arnoldi = lgmres->max_k - lgmres->aug_dim;
  }
  if (it_arnoldi >= it +1) {
    it_aug     = 0;
    it_arnoldi = it+1;
  } else {
    it_aug = (it + 1) - it_arnoldi;
  }

  /* now it_arnoldi indicates the number of matvecs that took place */
  lgmres->matvecs += it_arnoldi;


  /* solve the upper triangular system - GRS is the right side and HH is
     the upper triangular matrix  - put soln in nrs */
  if (*HH(it,it) == 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));
  if (*HH(it,it) != 0.0) {
    nrs[it] = *GRS(it) / *HH(it,it);
  } else {
    nrs[it] = 0.0;
  }

  for (ii=1; ii<=it; ii++) {
    k  = it - ii;
    tt = *GRS(k);
    for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
    nrs[k] = tt / *HH(k,k);
  }

  /* Accumulate the correction to the soln of the preconditioned prob. in VEC_TEMP */
  ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP components to 0 */

  /*LGMRES_MOD - if augmenting has happened we need to form the solution
    using the augvecs */
  if (!it_aug) { /* all its are from arnoldi */
    ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr);
  } else { /*use aug vecs */
    /*first do regular krylov directions */
    ierr = VecMAXPY(VEC_TEMP,it_arnoldi,nrs,&VEC_VV(0));CHKERRQ(ierr);
    /*now add augmented portions - add contribution of aug vectors one at a time*/


    for (ii=0; ii<it_aug; ii++) {
      for (jj=0; jj<lgmres->aug_dim; jj++) {
        if (lgmres->aug_order[jj] == (ii+1)) {
          spot = jj;
          break; /* must have this because there will be duplicates before aug_ct = aug_dim */
        }
      }
      ierr = VecAXPY(VEC_TEMP,nrs[it_arnoldi+ii],AUGVEC(spot));CHKERRQ(ierr);
    }
  }
  /* now VEC_TEMP is what we want to keep for augmenting purposes - grab before the
     preconditioner is "unwound" from right-precondtioning*/
  ierr = VecCopy(VEC_TEMP, AUG_TEMP);CHKERRQ(ierr);

  ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr);

  /* add solution to previous solution */
  /* put updated solution into vdest.*/
  ierr = VecCopy(vguess,vdest);CHKERRQ(ierr);
  ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Ejemplo n.º 7
0
PetscErrorCode KSPLGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_LGMRES     *lgmres = (KSP_LGMRES*)(ksp->data);
  PetscReal      res_norm, res;
  PetscReal      hapbnd, tt;
  PetscScalar    tmp;
  PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
  PetscErrorCode ierr;
  PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
  PetscInt       max_k  = lgmres->max_k; /* max approx space size */
  PetscInt       max_it = ksp->max_it;  /* max # of overall iterations for the method */

  /* LGMRES_MOD - new variables*/
  PetscInt    aug_dim = lgmres->aug_dim;
  PetscInt    spot    = 0;
  PetscInt    order   = 0;
  PetscInt    it_arnoldi;                /* number of arnoldi steps to take */
  PetscInt    it_total;                  /* total number of its to take (=approx space size)*/
  PetscInt    ii, jj;
  PetscReal   tmp_norm;
  PetscScalar inv_tmp_norm;
  PetscScalar *avec;

  PetscFunctionBegin;
  /* Number of pseudo iterations since last restart is the number
     of prestart directions */
  loc_it = 0;

  /* LGMRES_MOD: determine number of arnoldi steps to take */
  /* if approx_constant then we keep the space the same size even if
     we don't have the full number of aug vectors yet*/
  if (lgmres->approx_constant) it_arnoldi = max_k - lgmres->aug_ct;
  else it_arnoldi = max_k - aug_dim;

  it_total =  it_arnoldi + lgmres->aug_ct;

  /* initial residual is in VEC_VV(0)  - compute its norm*/
  ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
  res  = res_norm;

  /* first entry in right-hand-side of hessenberg system is just
     the initial residual norm */
  *GRS(0) = res_norm;

  /* check for the convergence */
  if (!res) {
    if (itcount) *itcount = 0;
    ksp->reason = KSP_CONVERGED_ATOL;

    ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  /* scale VEC_VV (the initial residual) */
  tmp = 1.0/res_norm; ierr = VecScale(VEC_VV(0),tmp);CHKERRQ(ierr);

  ksp->rnorm = res;


  /* note: (lgmres->it) is always set one less than (loc_it) It is used in
     KSPBUILDSolution_LGMRES, where it is passed to KSPLGMRESBuildSoln.
     Note that when KSPLGMRESBuildSoln is called from this function,
     (loc_it -1) is passed, so the two are equivalent */
  lgmres->it = (loc_it - 1);


  /* MAIN ITERATION LOOP BEGINNING*/


  /* keep iterating until we have converged OR generated the max number
     of directions OR reached the max number of iterations for the method */
  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

  while (!ksp->reason && loc_it < it_total && ksp->its < max_it) { /* LGMRES_MOD: changed to it_total */
    ierr       = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
    lgmres->it = (loc_it - 1);
    ierr       = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);

    /* see if more space is needed for work vectors */
    if (lgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
      ierr = KSPLGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
      /* (loc_it+1) is passed in as number of the first vector that should
          be allocated */
    }

    /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */
    if (loc_it < it_arnoldi) { /* Arnoldi */
      ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(loc_it),VEC_VV(1+loc_it),VEC_TEMP_MATOP);CHKERRQ(ierr);
    } else { /*aug step */
      order = loc_it - it_arnoldi + 1; /* which aug step */
      for (ii=0; ii<aug_dim; ii++) {
        if (lgmres->aug_order[ii] == order) {
          spot = ii;
          break; /* must have this because there will be duplicates before aug_ct = aug_dim */
        }
      }

      ierr = VecCopy(A_AUGVEC(spot), VEC_VV(1+loc_it));CHKERRQ(ierr);
      /*note: an alternate implementation choice would be to only save the AUGVECS and
        not A_AUGVEC and then apply the PC here to the augvec */
    }

    /* update hessenberg matrix and do Gram-Schmidt - new direction is in
       VEC_VV(1+loc_it)*/
    ierr = (*lgmres->orthog)(ksp,loc_it);CHKERRQ(ierr);

    /* new entry in hessenburg is the 2-norm of our new direction */
    ierr = VecNorm(VEC_VV(loc_it+1),NORM_2,&tt);CHKERRQ(ierr);

    *HH(loc_it+1,loc_it)  = tt;
    *HES(loc_it+1,loc_it) = tt;


    /* check for the happy breakdown */
    hapbnd = PetscAbsScalar(tt / *GRS(loc_it)); /* GRS(loc_it) contains the res_norm from the last iteration  */
    if (hapbnd > lgmres->haptol) hapbnd = lgmres->haptol;
    if (tt > hapbnd) {
      tmp  = 1.0/tt;
      ierr = VecScale(VEC_VV(loc_it+1),tmp);CHKERRQ(ierr); /* scale new direction by its norm */
    } else {
      ierr   = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);CHKERRQ(ierr);
      hapend = PETSC_TRUE;
    }

    /* Now apply rotations to new col of hessenberg (and right side of system),
       calculate new rotation, and get new residual norm at the same time*/
    ierr = KSPLGMRESUpdateHessenberg(ksp,loc_it,hapend,&res);CHKERRQ(ierr);
    if (ksp->reason) break;

    loc_it++;
    lgmres->it = (loc_it-1);   /* Add this here in case it has converged */

    ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its++;
    ksp->rnorm = res;
    ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

    /* Catch error in happy breakdown and signal convergence and break from loop */
    if (hapend) {
      if (!ksp->reason) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }
    }
  }
  /* END OF ITERATION LOOP */
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);

  /* Monitor if we know that we will not return for a restart */
  if (ksp->reason || ksp->its >= max_it) {
    ierr = KSPMonitor(ksp, ksp->its, res);CHKERRQ(ierr);
  }

  if (itcount) *itcount = loc_it;

  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */

  /* Form the solution (or the solution so far) */
  /* Note: must pass in (loc_it-1) for iteration count so that KSPLGMRESBuildSoln
     properly navigates */

  ierr = KSPLGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);


  /* LGMRES_MOD collect aug vector and A*augvector for future restarts -
     only if we will be restarting (i.e. this cycle performed it_total
     iterations)  */
  if (!ksp->reason && ksp->its < max_it && aug_dim > 0) {

    /*AUG_TEMP contains the new augmentation vector (assigned in  KSPLGMRESBuildSoln) */
    if (!lgmres->aug_ct) {
      spot = 0;
      lgmres->aug_ct++;
    } else if (lgmres->aug_ct < aug_dim) {
      spot = lgmres->aug_ct;
      lgmres->aug_ct++;
    } else { /* truncate */
      for (ii=0; ii<aug_dim; ii++) {
        if (lgmres->aug_order[ii] == aug_dim) spot = ii;
      }
    }



    ierr = VecCopy(AUG_TEMP, AUGVEC(spot));CHKERRQ(ierr);
    /*need to normalize */
    ierr = VecNorm(AUGVEC(spot), NORM_2, &tmp_norm);CHKERRQ(ierr);

    inv_tmp_norm = 1.0/tmp_norm;

    ierr = VecScale(AUGVEC(spot),inv_tmp_norm);CHKERRQ(ierr);

    /*set new aug vector to order 1  - move all others back one */
    for (ii=0; ii < aug_dim; ii++) AUG_ORDER(ii)++;
    AUG_ORDER(spot) = 1;

    /*now add the A*aug vector to A_AUGVEC(spot)  - this is independ. of preconditioning type*/
    /* want V*H*y - y is in GRS, V is in VEC_VV and H is in HES */


    /* first do H+*y */
    avec = lgmres->hwork;
    ierr = PetscMemzero(avec,(it_total+1)*sizeof(*avec));CHKERRQ(ierr);
    for (ii=0; ii < it_total + 1; ii++) {
      for (jj=0; jj <= ii+1 && jj < it_total+1; jj++) {
        avec[jj] += *HES(jj ,ii) * *GRS(ii);
      }
    }

    /*now multiply result by V+ */
    ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);
    ierr = VecMAXPY(VEC_TEMP, it_total+1, avec, &VEC_VV(0));CHKERRQ(ierr); /*answer is in VEC_TEMP*/

    /*copy answer to aug location  and scale*/
    ierr = VecCopy(VEC_TEMP,  A_AUGVEC(spot));CHKERRQ(ierr);
    ierr = VecScale(A_AUGVEC(spot),inv_tmp_norm);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}