Beispiel #1
0
static struct avl_node *_avlInsert(struct avl_node *n, int key)
{
    if(!n) return _avlNewNode(key);


    if(key < K(n)) {
        L(n) = _avlInsert(L(n), key);
    }
    else {
        R(n) = _avlInsert(R(n), key);
    }
    H(n) = MAX(HH(n->left), HH(n->right)) + 1;

    int bf = BF(n);

    if(bf > 1) {
        if(key > K(L(n))) { // LR case
            L(n) = _leftRotate(L(n));
        }
        // else LL case
        return _rightRotate(n);
    }
    if(bf < -1) {
        if(key < K(R(n))) { // RL case
            R(n) = _rightRotate(R(n));
        }
        // else RR case
        return _leftRotate(n);
    }

    return n;
}
Beispiel #2
0
static struct avl_node *_leftRotate(struct avl_node *n)
{
    struct avl_node *t = R(n);

    R(n) = L(t);
    L(t) = n;

    // Calculate H(n) first
    H(n) = MAX(HH(n->left), HH(n->right)) + 1;
    H(t) = MAX(HH(t->left), HH(t->right)) + 1;

    return t;
}
Beispiel #3
0
static struct avl_node *_avl_delete(struct avl_node *n, int key)
{
    if(!n) return NULL;

    if(key < K(n)) L(n) = _avl_delete(L(n), key);
    else if(key > K(n)) R(n) = _avl_delete(R(n), key);
    else { // key == K(n)
        if(NULL == L(n) || NULL == R(n)) {
            struct avl_node *tmp = L(n) ? L(n) : R(n);
            if(tmp) {
                *n = *tmp; // copy contents of child to n
            }
            else { // n is leaf
                tmp = n;
                n = NULL;
            }
            free(tmp);
        }
        else { // two children case
            struct avl_node *d = _avl_minimum(n);
            K(n) = K(d);
            R(n) = _avl_delete(d, key);
        }

    }
    // no child case
    if(!n) return NULL;

    H(n) = MAX(HH(n->left), HH(n->right)) + 1;

    int bf = BF(n);

    if(bf > 1) {
        if(0 > BF(L(n))) { // LR case
            L(n) = _leftRotate(L(n));
        }
        // else LL case
        return _rightRotate(n);
    }
    if(bf < -1) {
        if(0 < BF(R(n))) { // RL case
            R(n) = _rightRotate(R(n));
        }
        // else RR case
        return _leftRotate(n);
    }
    
    
    return n;
}
Beispiel #4
0
static PetscErrorCode KSPGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
{
  PetscScalar    tt;
  PetscErrorCode ierr;
  PetscInt       ii,k,j;
  KSP_GMRES      *gmres = (KSP_GMRES*)(ksp->data);

  PetscFunctionBegin;
  /* Solve for solution vector that minimizes the residual */

  /* If it is < 0, no gmres steps have been performed */
  if (it < 0) {
    ierr = VecCopy(vs,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */
    PetscFunctionReturn(0);
  }
  if (*HH(it,it) != 0.0) {
    nrs[it] = *GRS(it) / *HH(it,it);
  } else {
    ksp->reason = KSP_DIVERGED_BREAKDOWN;

    ierr = PetscInfo2(ksp,"Likely your matrix or preconditioner is singular. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }
  for (ii=1; ii<=it; ii++) {
    k  = it - ii;
    tt = *GRS(k);
    for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
    if (*HH(k,k) == 0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;

      ierr = PetscInfo1(ksp,"Likely your matrix or preconditioner is singular. HH(k,k) is identically zero; k = %D",k);CHKERRQ(ierr);
      PetscFunctionReturn(0);
    }
    nrs[k] = tt / *HH(k,k);
  }

  /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
  ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);
  ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr);

  ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr);
  /* add solution to previous solution */
  if (vdest != vs) {
    ierr = VecCopy(vs,vdest);CHKERRQ(ierr);
  }
  ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #5
0
static PetscErrorCode KSPFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
{
  PetscScalar    tt;
  PetscErrorCode ierr;
  PetscInt       ii,k,j;
  KSP_FGMRES     *fgmres = (KSP_FGMRES*)(ksp->data);

  PetscFunctionBegin;
  /* Solve for solution vector that minimizes the residual */

  /* If it is < 0, no fgmres steps have been performed */
  if (it < 0) {
    ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */
    PetscFunctionReturn(0);
  }

  /* so fgmres steps HAVE been performed */

  /* solve the upper triangular system - RS is the right side and HH is
     the upper triangular matrix  - put soln in nrs */
  if (*HH(it,it) != 0.0) {
    nrs[it] = *RS(it) / *HH(it,it);
  } else {
    nrs[it] = 0.0;
  }
  for (ii=1; ii<=it; ii++) {
    k  = it - ii;
    tt = *RS(k);
    for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
    nrs[k] = tt / *HH(k,k);
  }

  /* Accumulate the correction to the soln of the preconditioned prob. in
     VEC_TEMP - note that we use the preconditioned vectors  */
  ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP components to 0 */
  ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);

  /* put updated solution into vdest.*/
  if (vdest != vguess) {
    ierr = VecCopy(VEC_TEMP,vdest);CHKERRQ(ierr);
    ierr = VecAXPY(vdest,1.0,vguess);CHKERRQ(ierr);
  } else { /* replace guess with solution */
    ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Beispiel #6
0
static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
{
  PetscScalar    tt;
  PetscErrorCode ierr;
  PetscInt       k,j;
  KSP_PGMRES     *pgmres = (KSP_PGMRES*)(ksp->data);

  PetscFunctionBegin;
  /* Solve for solution vector that minimizes the residual */

  if (it < 0) {                                 /* no pgmres steps have been performed */
    ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
    PetscFunctionReturn(0);
  }

  /* solve the upper triangular system - RS is the right side and HH is
     the upper triangular matrix  - put soln in nrs */
  if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
  else nrs[it] = 0.0;

  for (k=it-1; k>=0; k--) {
    tt = *RS(k);
    for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
    nrs[k] = tt / *HH(k,k);
  }

  /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
  ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
  ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr);
  ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr);
  /* add solution to previous solution */
  if (vdest == vguess) {
    ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
  } else {
    ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Beispiel #7
0
static PetscErrorCode KSPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
{
  PetscScalar *hh,*cc,*ss,tt;
  PetscInt    j;
  KSP_GMRES   *gmres = (KSP_GMRES*)(ksp->data);

  PetscFunctionBegin;
  hh = HH(0,it);
  cc = CC(0);
  ss = SS(0);

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  for (j=1; j<=it; j++) {
    tt  = *hh;
    *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
    hh++;
    *hh = *cc++ * *hh - (*ss++ * tt);
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system
     2) the new column of the Hessenberg matrix
    thus obtaining the updated value of the residual
  */
  if (!hapend) {
    tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
    if (tt == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }
    *cc        = *hh / tt;
    *ss        = *(hh+1) / tt;
    *GRS(it+1) = -(*ss * *GRS(it));
    *GRS(it)   = PetscConj(*cc) * *GRS(it);
    *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);
    *res       = PetscAbsScalar(*GRS(it+1));
  } else {
    /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (GRS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Beispiel #8
0
int main()
{
	
	FILE *out1;
	out1=fopen("out1Uniform.txt","w");
	
	// Parameters!!
	
	int Nr=1000;
	int Nt=1;
	
	
	HankelMatrix HH(Nr,200.);
	
	waveUniform w;
	w.initialize(HH);
	
	printf("%d\n",w.Nr);
	
	double r0=100.;
	for(int i=0;i<HH.Nr;i++)
	{
		w.phi[i]=exp(-(w.r[i]-r0)*(w.r[i]-r0)/0.5/0.5);
	}
	
	printf("%e\n",w.norm());
	w.normalize();
	printf("%e\n",w.norm());
	
	
	double dt=0.005;
	w.PrepareCrankArrays(dt);
	for (int ktime=0; ktime<1000; ktime++)
	{
		w.KineticPropCrankUniform(dt);
		
		if((ktime%10)==0)
		{
			for (int i=0; i<HH.Nr; i++)
				fprintf(out1,"%10.17e \n", w.r[i]*abs(w.phi[i]) ); //Save wave function multiply by rho axis
		}
		printf("%e\n",1.-w.norm());
	}
	
	
	
}
//using std;
int main()
{
	
	
	//complex I=complex(0.,1.);
	int N=1024;//50;
	int MM=12;//50;
	
	double R=.05;
	
	//Parametros para blas
	int lda=N;
	int ldb=1;
	int ldc=1;

	HankelMatrix HH(N,R);
	
	
}
Beispiel #10
0
PetscErrorCode  KSPGMRESModifiedGramSchmidtOrthogonalization(KSP ksp,PetscInt it)
{
  KSP_GMRES      *gmres = (KSP_GMRES*)(ksp->data);
  PetscErrorCode ierr;
  PetscInt       j;
  PetscScalar    *hh,*hes;

  PetscFunctionBegin;
  ierr = PetscLogEventBegin(KSP_GMRESOrthogonalization,ksp,0,0,0);CHKERRQ(ierr);
  /* update Hessenberg matrix and do Gram-Schmidt */
  hh  = HH(0,it);
  hes = HES(0,it);
  for (j=0; j<=it; j++) {
    /* (vv(it+1), vv(j)) */
    ierr   = VecDot(VEC_VV(it+1),VEC_VV(j),hh);CHKERRQ(ierr);
    KSPCheckDot(ksp,*hh);
    *hes++ = *hh;
    /* vv(it+1) <- vv(it+1) - hh[it+1][j] vv(j) */
    ierr = VecAXPY(VEC_VV(it+1),-(*hh++),VEC_VV(j));CHKERRQ(ierr);
  }
  ierr = PetscLogEventEnd(KSP_GMRESOrthogonalization,ksp,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #11
0
function tradeOneNightStand() {

        vars Price = series(price());
        vars SMA10 = series(SMA(Price, 10));
        vars SMA40 = series(SMA(Price, 40));

        //Stop = 3 * 90 * PIP;

        var BuyStop,SellStop;

        BuyStop = HH(10) + 1*PIP;
        SellStop = LL(10) - 1*PIP;

        if (dow() == 5 && NumOpenLong == 0 && NumPendingLong == 0 && SMA10[0] > SMA40[0])
                enterLong(0,BuyStop);
        else if (dow() == 5 && NumOpenShort == 0 && NumPendingShort == 0 && SMA10[0] < SMA40[0])
                enterShort(0,SellStop);

        if (dow() != 5 && dow() != 6 && dow() != 7) {
                exitLong();
                exitShort();
        }

}
Beispiel #12
0
void md4_process_block(uint32_t state[4], const uint32_t block[MD4_BLOCK_SIZE / 4])
{
    unsigned a, b, c, d;
    a = state[0];
    b = state[1];
    c = state[2];
    d = state[3];

    FF(a, b, c, d, block[0], 3);   /* 1 */
    FF(d, a, b, c, block[1], 7);   /* 2 */
    FF(c, d, a, b, block[2], 11);  /* 3 */
    FF(b, c, d, a, block[3], 19);  /* 4 */
    FF(a, b, c, d, block[4], 3);   /* 5 */
    FF(d, a, b, c, block[5], 7);   /* 6 */
    FF(c, d, a, b, block[6], 11);  /* 7 */
    FF(b, c, d, a, block[7], 19);  /* 8 */
    FF(a, b, c, d, block[8], 3);   /* 9 */
    FF(d, a, b, c, block[9], 7);   /* 10 */
    FF(c, d, a, b, block[10], 11); /* 11 */
    FF(b, c, d, a, block[11], 19); /* 12 */
    FF(a, b, c, d, block[12], 3);  /* 13 */
    FF(d, a, b, c, block[13], 7);  /* 14 */
    FF(c, d, a, b, block[14], 11); /* 15 */
    FF(b, c, d, a, block[15], 19); /* 16 */

    GG(a, b, c, d, block[0], 3);   /* 17 */
    GG(d, a, b, c, block[4], 5);   /* 18 */
    GG(c, d, a, b, block[8], 9);   /* 19 */
    GG(b, c, d, a, block[12], 13); /* 20 */
    GG(a, b, c, d, block[1], 3);   /* 21 */
    GG(d, a, b, c, block[5], 5);   /* 22 */
    GG(c, d, a, b, block[9], 9);   /* 23 */
    GG(b, c, d, a, block[13], 13); /* 24 */
    GG(a, b, c, d, block[2], 3);   /* 25 */
    GG(d, a, b, c, block[6], 5);   /* 26 */
    GG(c, d, a, b, block[10], 9);  /* 27 */
    GG(b, c, d, a, block[14], 13); /* 28 */
    GG(a, b, c, d, block[3], 3);   /* 29 */
    GG(d, a, b, c, block[7], 5);   /* 30 */
    GG(c, d, a, b, block[11], 9);  /* 31 */
    GG(b, c, d, a, block[15], 13); /* 32 */

    HH(a, b, c, d, block[0], 3);   /* 33 */
    HH(d, a, b, c, block[8], 9);   /* 34 */
    HH(c, d, a, b, block[4], 11);  /* 35 */
    HH(b, c, d, a, block[12], 15); /* 36 */
    HH(a, b, c, d, block[2], 3);   /* 37 */
    HH(d, a, b, c, block[10], 9);  /* 38 */
    HH(c, d, a, b, block[6], 11);  /* 39 */
    HH(b, c, d, a, block[14], 15); /* 40 */
    HH(a, b, c, d, block[1], 3);   /* 41 */
    HH(d, a, b, c, block[9], 9);   /* 42 */
    HH(c, d, a, b, block[5], 11);  /* 43 */
    HH(b, c, d, a, block[13], 15); /* 44 */
    HH(a, b, c, d, block[3], 3);   /* 45 */
    HH(d, a, b, c, block[11], 9);  /* 46 */
    HH(c, d, a, b, block[7], 11);  /* 47 */
    HH(b, c, d, a, block[15], 15); /* 48 */

    state[0] += a;
    state[1] += b;
    state[2] += c;
    state[3] += d;
}
Beispiel #13
0
void parse_ip_meta(FILE* w, u8* ip_meta, bool first) 
{
	char temp[256];

	const char* chm = (const char*)ip_meta;

	#define HH(ofs, len, name, first) { strncpy(temp, chm+ofs, len); temp[len]=0; for (int i=len-1; i>=0; i--) if(temp[i]==' ') temp[i]=0; else break; data_kvp(w, #name, temp, first);  }
	
	fprintf(w, "%s\"meta-info\": {\n", first?"\n":",\n");
	
	//data_kvp("type", "\"meta-info\"");

	HH(0x00,  16, hardwareId, true);
	HH(0x10,  16, makerId, false);
	
	HH(0x80, 128, productName, false);
	HH(0x4A,   6, productVersion, false);
	HH(0x50,  16, releaseDate, false);
	HH(0x40,  10, productId, false);
	HH(0x20,  16, discId, false);
	

	HH(0x30,   8, areas, false);
	HH(0x38,   8, peripherals, false);
	
	HH(0x60,  16, bootfile, false);
	HH(0x70,  16, publisher, false);

	fprintf(w, "\n}");
}
Beispiel #14
0
void MD5::Transform(const uint8_t Block[64], int& error)
{
        uint32_t a = m_lMD5[0];
        uint32_t b = m_lMD5[1];
        uint32_t c = m_lMD5[2];
        uint32_t d = m_lMD5[3];

        uint32_t X[16];
        ByteToUINT( X, Block, 64, error);

        //Round 1 Transformation
        FF (a, b, c, d, X[ 0], MD5_S11, MD5_T01);
        FF (d, a, b, c, X[ 1], MD5_S12, MD5_T02);
        FF (c, d, a, b, X[ 2], MD5_S13, MD5_T03);
        FF (b, c, d, a, X[ 3], MD5_S14, MD5_T04);
        FF (a, b, c, d, X[ 4], MD5_S11, MD5_T05);
        FF (d, a, b, c, X[ 5], MD5_S12, MD5_T06);
        FF (c, d, a, b, X[ 6], MD5_S13, MD5_T07);
        FF (b, c, d, a, X[ 7], MD5_S14, MD5_T08);
        FF (a, b, c, d, X[ 8], MD5_S11, MD5_T09);
        FF (d, a, b, c, X[ 9], MD5_S12, MD5_T10);
        FF (c, d, a, b, X[10], MD5_S13, MD5_T11);
        FF (b, c, d, a, X[11], MD5_S14, MD5_T12);
        FF (a, b, c, d, X[12], MD5_S11, MD5_T13);
        FF (d, a, b, c, X[13], MD5_S12, MD5_T14);
        FF (c, d, a, b, X[14], MD5_S13, MD5_T15);
        FF (b, c, d, a, X[15], MD5_S14, MD5_T16);

        //Round 2 Transformation
        GG (a, b, c, d, X[ 1], MD5_S21, MD5_T17);
        GG (d, a, b, c, X[ 6], MD5_S22, MD5_T18);
        GG (c, d, a, b, X[11], MD5_S23, MD5_T19);
        GG (b, c, d, a, X[ 0], MD5_S24, MD5_T20);
        GG (a, b, c, d, X[ 5], MD5_S21, MD5_T21);
        GG (d, a, b, c, X[10], MD5_S22, MD5_T22);
        GG (c, d, a, b, X[15], MD5_S23, MD5_T23);
        GG (b, c, d, a, X[ 4], MD5_S24, MD5_T24);
        GG (a, b, c, d, X[ 9], MD5_S21, MD5_T25);
        GG (d, a, b, c, X[14], MD5_S22, MD5_T26);
        GG (c, d, a, b, X[ 3], MD5_S23, MD5_T27);
        GG (b, c, d, a, X[ 8], MD5_S24, MD5_T28);
        GG (a, b, c, d, X[13], MD5_S21, MD5_T29);
        GG (d, a, b, c, X[ 2], MD5_S22, MD5_T30);
        GG (c, d, a, b, X[ 7], MD5_S23, MD5_T31);
        GG (b, c, d, a, X[12], MD5_S24, MD5_T32);


        //Round 3 Transformation
        HH (a, b, c, d, X[ 5], MD5_S31, MD5_T33);
        HH (d, a, b, c, X[ 8], MD5_S32, MD5_T34);
        HH (c, d, a, b, X[11], MD5_S33, MD5_T35);
        HH (b, c, d, a, X[14], MD5_S34, MD5_T36);
        HH (a, b, c, d, X[ 1], MD5_S31, MD5_T37);
        HH (d, a, b, c, X[ 4], MD5_S32, MD5_T38);
        HH (c, d, a, b, X[ 7], MD5_S33, MD5_T39);
        HH (b, c, d, a, X[10], MD5_S34, MD5_T40);
        HH (a, b, c, d, X[13], MD5_S31, MD5_T41);
        HH (d, a, b, c, X[ 0], MD5_S32, MD5_T42);
        HH (c, d, a, b, X[ 3], MD5_S33, MD5_T43);
        HH (b, c, d, a, X[ 6], MD5_S34, MD5_T44);
        HH (a, b, c, d, X[ 9], MD5_S31, MD5_T45);
        HH (d, a, b, c, X[12], MD5_S32, MD5_T46);
        HH (c, d, a, b, X[15], MD5_S33, MD5_T47);
        HH (b, c, d, a, X[ 2], MD5_S34, MD5_T48);

        //Round 4 Transformation
        II (a, b, c, d, X[ 0], MD5_S41, MD5_T49);
        II (d, a, b, c, X[ 7], MD5_S42, MD5_T50);
        II (c, d, a, b, X[14], MD5_S43, MD5_T51);
        II (b, c, d, a, X[ 5], MD5_S44, MD5_T52);
        II (a, b, c, d, X[12], MD5_S41, MD5_T53);
        II (d, a, b, c, X[ 3], MD5_S42, MD5_T54);
        II (c, d, a, b, X[10], MD5_S43, MD5_T55);
        II (b, c, d, a, X[ 1], MD5_S44, MD5_T56);
        II (a, b, c, d, X[ 8], MD5_S41, MD5_T57);
        II (d, a, b, c, X[15], MD5_S42, MD5_T58);
        II (c, d, a, b, X[ 6], MD5_S43, MD5_T59);
        II (b, c, d, a, X[13], MD5_S44, MD5_T60);
        II (a, b, c, d, X[ 4], MD5_S41, MD5_T61);
        II (d, a, b, c, X[11], MD5_S42, MD5_T62);
        II (c, d, a, b, X[ 2], MD5_S43, MD5_T63);
        II (b, c, d, a, X[ 9], MD5_S44, MD5_T64);

        m_lMD5[0] += a;
        m_lMD5[1] += b;
        m_lMD5[2] += c;
        m_lMD5[3] += d;

}
Beispiel #15
0
/*
* MD5 Compression Function
*/
void MD5::compress_n(const byte input[], size_t blocks)
   {
   u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3];

   for(size_t i = 0; i != blocks; ++i)
      {
      load_le(&M[0], input, M.size());

      FF(A,B,C,D,M[ 0], 7,0xD76AA478);   FF(D,A,B,C,M[ 1],12,0xE8C7B756);
      FF(C,D,A,B,M[ 2],17,0x242070DB);   FF(B,C,D,A,M[ 3],22,0xC1BDCEEE);
      FF(A,B,C,D,M[ 4], 7,0xF57C0FAF);   FF(D,A,B,C,M[ 5],12,0x4787C62A);
      FF(C,D,A,B,M[ 6],17,0xA8304613);   FF(B,C,D,A,M[ 7],22,0xFD469501);
      FF(A,B,C,D,M[ 8], 7,0x698098D8);   FF(D,A,B,C,M[ 9],12,0x8B44F7AF);
      FF(C,D,A,B,M[10],17,0xFFFF5BB1);   FF(B,C,D,A,M[11],22,0x895CD7BE);
      FF(A,B,C,D,M[12], 7,0x6B901122);   FF(D,A,B,C,M[13],12,0xFD987193);
      FF(C,D,A,B,M[14],17,0xA679438E);   FF(B,C,D,A,M[15],22,0x49B40821);

      GG(A,B,C,D,M[ 1], 5,0xF61E2562);   GG(D,A,B,C,M[ 6], 9,0xC040B340);
      GG(C,D,A,B,M[11],14,0x265E5A51);   GG(B,C,D,A,M[ 0],20,0xE9B6C7AA);
      GG(A,B,C,D,M[ 5], 5,0xD62F105D);   GG(D,A,B,C,M[10], 9,0x02441453);
      GG(C,D,A,B,M[15],14,0xD8A1E681);   GG(B,C,D,A,M[ 4],20,0xE7D3FBC8);
      GG(A,B,C,D,M[ 9], 5,0x21E1CDE6);   GG(D,A,B,C,M[14], 9,0xC33707D6);
      GG(C,D,A,B,M[ 3],14,0xF4D50D87);   GG(B,C,D,A,M[ 8],20,0x455A14ED);
      GG(A,B,C,D,M[13], 5,0xA9E3E905);   GG(D,A,B,C,M[ 2], 9,0xFCEFA3F8);
      GG(C,D,A,B,M[ 7],14,0x676F02D9);   GG(B,C,D,A,M[12],20,0x8D2A4C8A);

      HH(A,B,C,D,M[ 5], 4,0xFFFA3942);   HH(D,A,B,C,M[ 8],11,0x8771F681);
      HH(C,D,A,B,M[11],16,0x6D9D6122);   HH(B,C,D,A,M[14],23,0xFDE5380C);
      HH(A,B,C,D,M[ 1], 4,0xA4BEEA44);   HH(D,A,B,C,M[ 4],11,0x4BDECFA9);
      HH(C,D,A,B,M[ 7],16,0xF6BB4B60);   HH(B,C,D,A,M[10],23,0xBEBFBC70);
      HH(A,B,C,D,M[13], 4,0x289B7EC6);   HH(D,A,B,C,M[ 0],11,0xEAA127FA);
      HH(C,D,A,B,M[ 3],16,0xD4EF3085);   HH(B,C,D,A,M[ 6],23,0x04881D05);
      HH(A,B,C,D,M[ 9], 4,0xD9D4D039);   HH(D,A,B,C,M[12],11,0xE6DB99E5);
      HH(C,D,A,B,M[15],16,0x1FA27CF8);   HH(B,C,D,A,M[ 2],23,0xC4AC5665);

      II(A,B,C,D,M[ 0], 6,0xF4292244);   II(D,A,B,C,M[ 7],10,0x432AFF97);
      II(C,D,A,B,M[14],15,0xAB9423A7);   II(B,C,D,A,M[ 5],21,0xFC93A039);
      II(A,B,C,D,M[12], 6,0x655B59C3);   II(D,A,B,C,M[ 3],10,0x8F0CCC92);
      II(C,D,A,B,M[10],15,0xFFEFF47D);   II(B,C,D,A,M[ 1],21,0x85845DD1);
      II(A,B,C,D,M[ 8], 6,0x6FA87E4F);   II(D,A,B,C,M[15],10,0xFE2CE6E0);
      II(C,D,A,B,M[ 6],15,0xA3014314);   II(B,C,D,A,M[13],21,0x4E0811A1);
      II(A,B,C,D,M[ 4], 6,0xF7537E82);   II(D,A,B,C,M[11],10,0xBD3AF235);
      II(C,D,A,B,M[ 2],15,0x2AD7D2BB);   II(B,C,D,A,M[ 9],21,0xEB86D391);

      A = (digest[0] += A);
      B = (digest[1] += B);
      C = (digest[2] += C);
      D = (digest[3] += D);

      input += hash_block_size();
      }
   }
Beispiel #16
0
/*
.  it - column of the Hessenberg that is complete, PGMRES is actually computing two columns ahead of this
 */
static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
{
  PetscScalar    *hh,*cc,*ss,*rs;
  PetscInt       j;
  PetscReal      hapbnd;
  KSP_PGMRES     *pgmres = (KSP_PGMRES*)(ksp->data);
  PetscErrorCode ierr;

  PetscFunctionBegin;
  hh = HH(0,it);   /* pointer to beginning of column to update */
  cc = CC(0);      /* beginning of cosine rotations */
  ss = SS(0);      /* beginning of sine rotations */
  rs = RS(0);      /* right hand side of least squares system */

  /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
  for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];

  /* check for the happy breakdown */
  hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pgmres->haptol);
  if (PetscAbsScalar(hh[it+1]) < hapbnd) {
    ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
    *hapend = PETSC_TRUE;
  }

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
     and some refs have [c   s ; -conj(s)  c] (don't be confused!) */

  for (j=0; j<it; j++) {
    PetscScalar hhj = hh[j];
    hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
    hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system (RS)
        note: it affects RS(it) and RS(it+1)
     2) the new column of the Hessenberg matrix
        note: it affects HH(it,it) which is currently pointed to
        by hh and HH(it+1, it) (*(hh+1))
    thus obtaining the updated value of the residual...
  */

  /* compute new plane rotation */

  if (!*hapend) {
    PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
    if (delta == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }

    cc[it] = hh[it] / delta;    /* new cosine value */
    ss[it] = hh[it+1] / delta;  /* new sine value */

    hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
    rs[it+1] = -ss[it]*rs[it];
    rs[it]   = PetscConj(cc[it])*rs[it];
    *res     = PetscAbsScalar(rs[it+1]);
  } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (RS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Beispiel #17
0
static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_PGMRES     *pgmres = (KSP_PGMRES*)(ksp->data);
  PetscReal      res_norm,res,newnorm;
  PetscErrorCode ierr;
  PetscInt       it     = 0,j,k;
  PetscBool      hapend = PETSC_FALSE;

  PetscFunctionBegin;
  if (itcount) *itcount = 0;
  ierr   = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr);
  res    = res_norm;
  *RS(0) = res_norm;

  /* check for the convergence */
  ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->rnorm = res;
  ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  pgmres->it = it-2;
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  if (!res) {
    ksp->reason = KSP_CONVERGED_ATOL;
    ierr        = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  for (; !ksp->reason; it++) {
    Vec Zcur,Znext;
    if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) {
      ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr);
    }
    /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */
    Zcur  = VEC_VV(it);         /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */
    Znext = VEC_VV(it+1);       /* This iteration will compute Znext, update with a deferred correction once we know how
                                 * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */

    if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */
      ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr);
    }

    if (it > 1) {               /* Complete the pending reduction */
      ierr           = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr);
      *HH(it-1,it-2) = newnorm;
    }
    if (it > 0) {               /* Finish the reduction computing the latest column of H */
      ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr);
    }

    if (it > 1) {
      /* normalize the base vector from two iterations ago, basis is complete up to here */
      ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr);

      ierr       = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr);
      pgmres->it = it-2;
      ksp->its++;
      ksp->rnorm = res;

      ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
      if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) {  /* Monitor if we are done or still iterating, but not before a restart. */
        ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
        ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
      }
      if (ksp->reason) break;
      /* Catch error in happy breakdown and signal convergence and break from loop */
      if (hapend) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }

      if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break;

      /* The it-2 column of H was not scaled when we computed Zcur, apply correction */
      ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr);
      /* And Znext computed in this iteration was computed using the under-scaled Zcur */
      ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr);

      /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */
      for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2);
      /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This
       * column is complete except for HH(it,it-1) which we won't know until the next iteration. */
      *HH(it-1,it-1) /= *HH(it-1,it-2);
    }

    if (it > 0) {
      PetscScalar *work;
      if (!pgmres->orthogwork) {ierr = PetscMalloc((pgmres->max_k + 2)*sizeof(PetscScalar),&pgmres->orthogwork);CHKERRQ(ierr);}
      work = pgmres->orthogwork;
      /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is
       *
       *   Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1]
       *
       * where
       *
       *   Z[j] = sum_{k=0}^j V[k] * H[k,j-1]
       *
       * substituting
       *
       *   Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1]
       *
       * rearranging the iteration space from row-column to column-row
       *
       *   Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1]
       *
       * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already
       * been transformed to upper triangular form.
       */
      for (k=0; k<it+1; k++) {
        work[k] = 0;
        for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1);
      }
      ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr);
      ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr);

      /* Orthogonalize Zcur against existing basis vectors. */
      for (k=0; k<it; k++) work[k] = -*HH(k,it-1);
      ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr);
      /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */
      /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */
      ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr);
    }

    /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */
    ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr);

    /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */
    ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr);
  }

  if (itcount) *itcount = it-1; /* Number of iterations actually completed. */

  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */
  /* Form the solution (or the solution so far) */
  ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #18
0
extern "C" magma_int_t
magma_cgmres(
    magma_c_sparse_matrix A, 
    magma_c_vector b, 
    magma_c_vector *x,  
    magma_c_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t stat = 0;
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );
    
    magma_int_t stat_cpu = 0, stat_dev = 0;
    // prepare solver feedback
    solver_par->solver = Magma_GMRES;
    solver_par->numiter = 0;
    solver_par->info = MAGMA_SUCCESS;

    // local variables
    magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE, 
                                                c_mone = MAGMA_C_NEG_ONE;
    magma_int_t dofs = A.num_rows;
    magma_int_t i, j, k, m = 0;
    magma_int_t restart = min( dofs-1, solver_par->restart );
    magma_int_t ldh = restart+1;
    float nom, rNorm, RNorm, nom0, betanom, r0 = 0.;

    // CPU workspace
    //magma_setdevice(0);
    magmaFloatComplex *H, *HH, *y, *h1;
    stat_cpu += magma_cmalloc_pinned( &H, (ldh+1)*ldh );
    stat_cpu += magma_cmalloc_pinned( &y, ldh );
    stat_cpu += magma_cmalloc_pinned( &HH, ldh*ldh );
    stat_cpu += magma_cmalloc_pinned( &h1, ldh );
    if( stat_cpu != 0){
        magma_free_pinned( H );
        magma_free_pinned( y );
        magma_free_pinned( HH );
        magma_free_pinned( h1 );
        magmablasSetKernelStream( orig_queue );
        return MAGMA_ERR_HOST_ALLOC;
    }

    // GPU workspace
    magma_c_vector r, q, q_t;
    magma_c_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_c_vinit( &q, Magma_DEV, dofs*(ldh+1), c_zero, queue );
    q_t.memory_location = Magma_DEV; 
    q_t.dval = NULL; 
    q_t.num_rows = q_t.nnz = dofs; q_t.num_cols = 1;

    magmaFloatComplex *dy = NULL, *dH = NULL;
    stat_dev += magma_cmalloc( &dy, ldh );
    stat_dev += magma_cmalloc( &dH, (ldh+1)*ldh );
    if( stat_dev != 0){
        magma_free_pinned( H );
        magma_free_pinned( y );
        magma_free_pinned( HH );
        magma_free_pinned( h1 );
        magma_free( dH );
        magma_free( dy );
        magma_free( dH );
        magma_free( dy );
        magmablasSetKernelStream( orig_queue );
        return MAGMA_ERR_DEVICE_ALLOC;
    }

    // GPU stream
    magma_queue_t stream[2];
    magma_event_t event[1];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    magma_event_create( &event[0] );
    //magmablasSetKernelStream(stream[0]);

    magma_cscal( dofs, c_zero, x->dval, 1 );              //  x = 0
    magma_ccopy( dofs, b.dval, 1, r.dval, 1 );             //  r = b
    nom0 = betanom = magma_scnrm2( dofs, r.dval, 1 );     //  nom0= || r||
    nom = nom0  * nom0;
    solver_par->init_res = nom0;
    H(1,0) = MAGMA_C_MAKE( nom0, 0. ); 
    magma_csetvector(1, &H(1,0), 1, &dH(1,0), 1);

    if ( (r0 = nom0 * solver_par->epsilon ) < ATOLERANCE ){ 
        r0 = solver_par->epsilon;
    }
    if ( nom < r0 ) {
        magmablasSetKernelStream( orig_queue );
        return MAGMA_SUCCESS;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ) {

        for(k=1; k<=restart; k++) {

        magma_ccopy(dofs, r.dval, 1, q(k-1), 1);       //  q[0]    = 1.0/||r||
        magma_cscal(dofs, 1./H(k,k-1), q(k-1), 1);    //  (to be fused)

            q_t.dval = q(k-1);
            //magmablasSetKernelStream(stream[0]);
            magma_c_spmv( c_one, A, q_t, c_zero, r, queue ); //  r = A q[k] 
    //            if (solver_par->ortho == Magma_MGS ) {
                // modified Gram-Schmidt

                for (i=1; i<=k; i++) {
                    H(i,k) =magma_cdotc(dofs, q(i-1), 1, r.dval, 1);            
                        //  H(i,k) = q[i] . r
                    magma_caxpy(dofs,-H(i,k), q(i-1), 1, r.dval, 1);            
                       //  r = r - H(i,k) q[i]
                }
                H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // H(k+1,k) = ||r|| 

            /*} else if (solver_par->ortho == Magma_FUSED_CGS ) {
                // fusing cgemv with scnrm2 in classical Gram-Schmidt
                magmablasSetKernelStream(stream[0]);
                magma_ccopy(dofs, r.dval, 1, q(k), 1);  
                    // dH(1:k+1,k) = q[0:k] . r
                magmablas_cgemv(MagmaTrans, dofs, k+1, c_one, q(0), 
                                dofs, r.dval, 1, c_zero, &dH(1,k), 1);
                    // r = r - q[0:k-1] dH(1:k,k)
                magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), 
                                dofs, &dH(1,k), 1, c_one, r.dval, 1);
                   // 1) dH(k+1,k) = sqrt( dH(k+1,k) - dH(1:k,k) )
                magma_ccopyscale(  dofs, k, r.dval, q(k), &dH(1,k) );  
                   // 2) q[k] = q[k] / dH(k+1,k) 

                magma_event_record( event[0], stream[0] );
                magma_queue_wait_event( stream[1], event[0] );
                magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); 
                    // asynch copy dH(1:(k+1),k) to H(1:(k+1),k)
            } else {
                // classical Gram-Schmidt (default)
                // > explicitly calling magmabls
                magmablasSetKernelStream(stream[0]);                                                  
                magmablas_cgemv(MagmaTrans, dofs, k, c_one, q(0), 
                                dofs, r.dval, 1, c_zero, &dH(1,k), 1, queue ); 
                                // dH(1:k,k) = q[0:k-1] . r
                #ifndef SCNRM2SCALE 
                // start copying dH(1:k,k) to H(1:k,k)
                magma_event_record( event[0], stream[0] );
                magma_queue_wait_event( stream[1], event[0] );
                magma_cgetvector_async(k, &dH(1,k), 1, &H(1,k), 
                                                    1, stream[1]);
                #endif
                                  // r = r - q[0:k-1] dH(1:k,k)
                magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), 
                                    dofs, &dH(1,k), 1, c_one, r.dval, 1);
                #ifdef SCNRM2SCALE
                magma_ccopy(dofs, r.dval, 1, q(k), 1);                 
                    //  q[k] = r / H(k,k-1) 
                magma_scnrm2scale(dofs, q(k), dofs, &dH(k+1,k) );     
                    //  dH(k+1,k) = sqrt(r . r) and r = r / dH(k+1,k)

                magma_event_record( event[0], stream[0] );            
                            // start sending dH(1:k,k) to H(1:k,k)
                magma_queue_wait_event( stream[1], event[0] );        
                            // can we keep H(k+1,k) on GPU and combine?
                magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]);
                #else
                H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. );   
                            //  H(k+1,k) = sqrt(r . r) 
                if ( k<solver_par->restart ) {
                        magmablasSetKernelStream(stream[0]);
                        magma_ccopy(dofs, r.dval, 1, q(k), 1);                  
                            //  q[k]    = 1.0/H[k][k-1] r
                        magma_cscal(dofs, 1./H(k+1,k), q(k), 1);              
                            //  (to be fused)   
                 }
                #endif
            }*/
            /*     Minimization of  || b-Ax ||  in H_k       */ 
            for (i=1; i<=k; i++) {
                HH(k,i) = magma_cblas_cdotc( i+1, &H(1,k), 1, &H(1,i), 1 );
            }
            h1[k] = H(1,k)*H(1,0); 
            if (k != 1) {
                for (i=1; i<k; i++) {
                    HH(k,i) = HH(k,i)/HH(i,i);//
                    for (m=i+1; m<=k; m++) {
                        HH(k,m) -= HH(k,i) * HH(m,i) * HH(i,i);
                    }
                    h1[k] -= h1[i] * HH(k,i);   
                }    
            }
            y[k] = h1[k]/HH(k,k); 
            if (k != 1)  
                for (i=k-1; i>=1; i--) {
                    y[i] = h1[i]/HH(i,i);
                    for (j=i+1; j<=k; j++)
                        y[i] -= y[j] * HH(j,i);
                }                    
            m = k;
            rNorm = fabs(MAGMA_C_REAL(H(k+1,k)));
        }/*     Minimization done       */ 
        // compute solution approximation
        magma_csetmatrix(m, 1, y+1, m, dy, m );
        magma_cgemv(MagmaNoTrans, dofs, m, c_one, q(0), dofs, dy, 1, 
                                                    c_one, x->dval, 1); 

        // compute residual
        magma_c_spmv( c_mone, A, *x, c_zero, r, queue );      //  r = - A * x
        magma_caxpy(dofs, c_one, b.dval, 1, r.dval, 1);  //  r = r + b
        H(1,0) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); 
                                            //  RNorm = H[1][0] = || r ||
        RNorm = MAGMA_C_REAL( H(1,0) );
        betanom = fabs(RNorm);  

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  betanom  < r0 ) {
            break;
        } 
    }

    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    magma_cresidual( A, b, *x, &residual, queue );
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter) {
        solver_par->info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_SLOW_CONVERGENCE;
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }
    // free pinned memory
    magma_free_pinned( H );
    magma_free_pinned( y );
    magma_free_pinned( HH );
    magma_free_pinned( h1 );
    // free GPU memory
    magma_free(dy); 
    if (dH != NULL ) magma_free(dH); 
    magma_c_vfree(&r, queue );
    magma_c_vfree(&q, queue );

    // free GPU streams and events
    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_event_destroy( event[0] );
    //magmablasSetKernelStream(NULL);

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_cgmres */
Beispiel #19
0
PetscErrorCode KSPGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_GMRES      *gmres = (KSP_GMRES*)(ksp->data);
  PetscReal      res_norm,res,hapbnd,tt;
  PetscErrorCode ierr;
  PetscInt       it     = 0, max_k = gmres->max_k;
  PetscBool      hapend = PETSC_FALSE;

  PetscFunctionBegin;
  ierr    = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr);
  res     = res_norm;
  *GRS(0) = res_norm;

  /* check for the convergence */
  ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->rnorm = res;
  ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  gmres->it  = (it - 1);
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  if (!res) {
    if (itcount) *itcount = 0;
    ksp->reason = KSP_CONVERGED_ATOL;
    ierr        = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
    if (it) {
      ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
      ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
    }
    gmres->it = (it - 1);
    if (gmres->vv_allocated <= it + VEC_OFFSET + 1) {
      ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr);
    }
    ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);CHKERRQ(ierr);

    /* update hessenberg matrix and do Gram-Schmidt */
    ierr = (*gmres->orthog)(ksp,it);CHKERRQ(ierr);

    /* vv(i+1) . vv(i+1) */
    ierr = VecNormalize(VEC_VV(it+1),&tt);CHKERRQ(ierr);

    /* save the magnitude */
    *HH(it+1,it)  = tt;
    *HES(it+1,it) = tt;

    /* check for the happy breakdown */
    hapbnd = PetscAbsScalar(tt / *GRS(it));
    if (hapbnd > gmres->haptol) hapbnd = gmres->haptol;
    if (tt < hapbnd) {
      ierr   = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %14.12e tt = %14.12e\n",(double)hapbnd,(double)tt);CHKERRQ(ierr);
      hapend = PETSC_TRUE;
    }
    ierr = KSPGMRESUpdateHessenberg(ksp,it,hapend,&res);CHKERRQ(ierr);

    it++;
    gmres->it = (it-1);   /* For converged */
    ksp->its++;
    ksp->rnorm = res;
    if (ksp->reason) break;

    ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

    /* Catch error in happy breakdown and signal convergence and break from loop */
    if (hapend) {
      if (!ksp->reason) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }
    }
  }

  /* Monitor if we know that we will not return for a restart */
  if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
    ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  }

  if (itcount) *itcount = it;


  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */
  /* Form the solution (or the solution so far) */
  ierr = KSPGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #20
0
static int  rmd128_compress(hash_state *md, unsigned char *buf)
#endif
{
   ulong32 aa,bb,cc,dd,aaa,bbb,ccc,ddd,X[16];
   int i;
   
   /* load words X */
   for (i = 0; i < 16; i++){
      LOAD32L(X[i], buf + (4 * i));
   }

   /* load state */
   aa = aaa = md->rmd128.state[0];
   bb = bbb = md->rmd128.state[1];
   cc = ccc = md->rmd128.state[2];
   dd = ddd = md->rmd128.state[3];

   /* round 1 */
   FF(aa, bb, cc, dd, X[ 0], 11);
   FF(dd, aa, bb, cc, X[ 1], 14);
   FF(cc, dd, aa, bb, X[ 2], 15);
   FF(bb, cc, dd, aa, X[ 3], 12);
   FF(aa, bb, cc, dd, X[ 4],  5);
   FF(dd, aa, bb, cc, X[ 5],  8);
   FF(cc, dd, aa, bb, X[ 6],  7);
   FF(bb, cc, dd, aa, X[ 7],  9);
   FF(aa, bb, cc, dd, X[ 8], 11);
   FF(dd, aa, bb, cc, X[ 9], 13);
   FF(cc, dd, aa, bb, X[10], 14);
   FF(bb, cc, dd, aa, X[11], 15);
   FF(aa, bb, cc, dd, X[12],  6);
   FF(dd, aa, bb, cc, X[13],  7);
   FF(cc, dd, aa, bb, X[14],  9);
   FF(bb, cc, dd, aa, X[15],  8);
                             
   /* round 2 */
   GG(aa, bb, cc, dd, X[ 7],  7);
   GG(dd, aa, bb, cc, X[ 4],  6);
   GG(cc, dd, aa, bb, X[13],  8);
   GG(bb, cc, dd, aa, X[ 1], 13);
   GG(aa, bb, cc, dd, X[10], 11);
   GG(dd, aa, bb, cc, X[ 6],  9);
   GG(cc, dd, aa, bb, X[15],  7);
   GG(bb, cc, dd, aa, X[ 3], 15);
   GG(aa, bb, cc, dd, X[12],  7);
   GG(dd, aa, bb, cc, X[ 0], 12);
   GG(cc, dd, aa, bb, X[ 9], 15);
   GG(bb, cc, dd, aa, X[ 5],  9);
   GG(aa, bb, cc, dd, X[ 2], 11);
   GG(dd, aa, bb, cc, X[14],  7);
   GG(cc, dd, aa, bb, X[11], 13);
   GG(bb, cc, dd, aa, X[ 8], 12);

   /* round 3 */
   HH(aa, bb, cc, dd, X[ 3], 11);
   HH(dd, aa, bb, cc, X[10], 13);
   HH(cc, dd, aa, bb, X[14],  6);
   HH(bb, cc, dd, aa, X[ 4],  7);
   HH(aa, bb, cc, dd, X[ 9], 14);
   HH(dd, aa, bb, cc, X[15],  9);
   HH(cc, dd, aa, bb, X[ 8], 13);
   HH(bb, cc, dd, aa, X[ 1], 15);
   HH(aa, bb, cc, dd, X[ 2], 14);
   HH(dd, aa, bb, cc, X[ 7],  8);
   HH(cc, dd, aa, bb, X[ 0], 13);
   HH(bb, cc, dd, aa, X[ 6],  6);
   HH(aa, bb, cc, dd, X[13],  5);
   HH(dd, aa, bb, cc, X[11], 12);
   HH(cc, dd, aa, bb, X[ 5],  7);
   HH(bb, cc, dd, aa, X[12],  5);

   /* round 4 */
   II(aa, bb, cc, dd, X[ 1], 11);
   II(dd, aa, bb, cc, X[ 9], 12);
   II(cc, dd, aa, bb, X[11], 14);
   II(bb, cc, dd, aa, X[10], 15);
   II(aa, bb, cc, dd, X[ 0], 14);
   II(dd, aa, bb, cc, X[ 8], 15);
   II(cc, dd, aa, bb, X[12],  9);
   II(bb, cc, dd, aa, X[ 4],  8);
   II(aa, bb, cc, dd, X[13],  9);
   II(dd, aa, bb, cc, X[ 3], 14);
   II(cc, dd, aa, bb, X[ 7],  5);
   II(bb, cc, dd, aa, X[15],  6);
   II(aa, bb, cc, dd, X[14],  8);
   II(dd, aa, bb, cc, X[ 5],  6);
   II(cc, dd, aa, bb, X[ 6],  5);
   II(bb, cc, dd, aa, X[ 2], 12);

   /* parallel round 1 */
   III(aaa, bbb, ccc, ddd, X[ 5],  8); 
   III(ddd, aaa, bbb, ccc, X[14],  9);
   III(ccc, ddd, aaa, bbb, X[ 7],  9);
   III(bbb, ccc, ddd, aaa, X[ 0], 11);
   III(aaa, bbb, ccc, ddd, X[ 9], 13);
   III(ddd, aaa, bbb, ccc, X[ 2], 15);
   III(ccc, ddd, aaa, bbb, X[11], 15);
   III(bbb, ccc, ddd, aaa, X[ 4],  5);
   III(aaa, bbb, ccc, ddd, X[13],  7);
   III(ddd, aaa, bbb, ccc, X[ 6],  7);
   III(ccc, ddd, aaa, bbb, X[15],  8);
   III(bbb, ccc, ddd, aaa, X[ 8], 11);
   III(aaa, bbb, ccc, ddd, X[ 1], 14);
   III(ddd, aaa, bbb, ccc, X[10], 14);
   III(ccc, ddd, aaa, bbb, X[ 3], 12);
   III(bbb, ccc, ddd, aaa, X[12],  6);

   /* parallel round 2 */
   HHH(aaa, bbb, ccc, ddd, X[ 6],  9);
   HHH(ddd, aaa, bbb, ccc, X[11], 13);
   HHH(ccc, ddd, aaa, bbb, X[ 3], 15);
   HHH(bbb, ccc, ddd, aaa, X[ 7],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 0], 12);
   HHH(ddd, aaa, bbb, ccc, X[13],  8);
   HHH(ccc, ddd, aaa, bbb, X[ 5],  9);
   HHH(bbb, ccc, ddd, aaa, X[10], 11);
   HHH(aaa, bbb, ccc, ddd, X[14],  7);
   HHH(ddd, aaa, bbb, ccc, X[15],  7);
   HHH(ccc, ddd, aaa, bbb, X[ 8], 12);
   HHH(bbb, ccc, ddd, aaa, X[12],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 4],  6);
   HHH(ddd, aaa, bbb, ccc, X[ 9], 15);
   HHH(ccc, ddd, aaa, bbb, X[ 1], 13);
   HHH(bbb, ccc, ddd, aaa, X[ 2], 11);

   /* parallel round 3 */   
   GGG(aaa, bbb, ccc, ddd, X[15],  9);
   GGG(ddd, aaa, bbb, ccc, X[ 5],  7);
   GGG(ccc, ddd, aaa, bbb, X[ 1], 15);
   GGG(bbb, ccc, ddd, aaa, X[ 3], 11);
   GGG(aaa, bbb, ccc, ddd, X[ 7],  8);
   GGG(ddd, aaa, bbb, ccc, X[14],  6);
   GGG(ccc, ddd, aaa, bbb, X[ 6],  6);
   GGG(bbb, ccc, ddd, aaa, X[ 9], 14);
   GGG(aaa, bbb, ccc, ddd, X[11], 12);
   GGG(ddd, aaa, bbb, ccc, X[ 8], 13);
   GGG(ccc, ddd, aaa, bbb, X[12],  5);
   GGG(bbb, ccc, ddd, aaa, X[ 2], 14);
   GGG(aaa, bbb, ccc, ddd, X[10], 13);
   GGG(ddd, aaa, bbb, ccc, X[ 0], 13);
   GGG(ccc, ddd, aaa, bbb, X[ 4],  7);
   GGG(bbb, ccc, ddd, aaa, X[13],  5);

   /* parallel round 4 */
   FFF(aaa, bbb, ccc, ddd, X[ 8], 15);
   FFF(ddd, aaa, bbb, ccc, X[ 6],  5);
   FFF(ccc, ddd, aaa, bbb, X[ 4],  8);
   FFF(bbb, ccc, ddd, aaa, X[ 1], 11);
   FFF(aaa, bbb, ccc, ddd, X[ 3], 14);
   FFF(ddd, aaa, bbb, ccc, X[11], 14);
   FFF(ccc, ddd, aaa, bbb, X[15],  6);
   FFF(bbb, ccc, ddd, aaa, X[ 0], 14);
   FFF(aaa, bbb, ccc, ddd, X[ 5],  6);
   FFF(ddd, aaa, bbb, ccc, X[12],  9);
   FFF(ccc, ddd, aaa, bbb, X[ 2], 12);
   FFF(bbb, ccc, ddd, aaa, X[13],  9);
   FFF(aaa, bbb, ccc, ddd, X[ 9], 12);
   FFF(ddd, aaa, bbb, ccc, X[ 7],  5);
   FFF(ccc, ddd, aaa, bbb, X[10], 15);
   FFF(bbb, ccc, ddd, aaa, X[14],  8);

   /* combine results */
   ddd += cc + md->rmd128.state[1];               /* final result for MDbuf[0] */
   md->rmd128.state[1] = md->rmd128.state[2] + dd + aaa;
   md->rmd128.state[2] = md->rmd128.state[3] + aa + bbb;
   md->rmd128.state[3] = md->rmd128.state[0] + bb + ccc;
   md->rmd128.state[0] = ddd;

   return CRYPT_OK;
}
int main()
{
    printf("\a\t\tIFPB - Campus Joao Pessoa\n");
    printf("\t\tCurso Superior de Engenharia Eletrica\n");
    printf("\t\tDisciplina de Algoritmos e Logica de Programacao\n");
    printf("\t\tProfessor: Erick\n");
    printf("\t\tProva 2: Parte 2, Jogo de Batalha Naval\n");
    printf("\t\tGrupo: Paulo Felipe, Josivaldo Gomes e Marcello Aires\n\n");
    printf("\n--------------------------------------------------------------------------------\n");
    char M[N][N], MB[N][N];
    int i, j, opcao, ME[N][N]={};
    GerarEspiral(ME);
    ZeraT(M, MB);//JOGADOR 1
    opcao = Menu();
    if(opcao==1)
    {
        HH(MB);
        contpos = cont(MB);
        printf("\nSecao de Jogadas do Jogador 1\n\n");
        for(i=0;i<N*N;i++)
        {
            Imprimir(M);
            Jogada(M, MB, 0);
            printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog1, cont1, contpos);
            if(contpos == cont1)
                break;
        }
        Imprimir(M);
        printf("\n\nSECAO DE JOGADAS DO JOGADOR 1 TERMINADA\n\n");
    }
    else if(opcao==2)
    {
        CH(MB);
        contpos = cont(MB);
        printf("\nSecao de Jogadas do Jogador 1\n\n");
        for(i=0;i<N*N;i++)
        {
            Imprimir(M);
            Jogada(M, MB, 0);
            printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog1, cont1, contpos);
            if(contpos == cont1)
                break;
        }
        Imprimir(M);
        printf("\n\nSECAO DE JOGADAS DO JOGADOR 1 TERMINADA\n\n");
    }
    else if(opcao==3)
    {
        int X, Y, i, estado = 0, ult=0;
        HC(MB, M);
        contpos = cont(MB);
        for(i=0;i<N*N;i++)
        {
            do
            {
                estado = 0;
                X = (rand()%N);//VAI DE 0 A N-1
                Y = (rand()%N);//VAI DE 0 A N-1
                if(MB[X][Y]=='~' && M[X][Y]=='~')
                {
                    M[X][Y] = '*';
                    contjog1++;
                    estado = 1;
                }
                else if(MB[X][Y]=='B' && M[X][Y]=='~')
                {
                    M[X][Y] = 'X';
                    contjog1++;
                    cont1++;
                    i += AcertouNavio(M, MB, X, Y, 0);
                    estado = 1;
                }
            }while(estado!=1);
            ImprimirAux(0);
            Imprimir(M);
            if(cont1==contpos)
            {
                printf("\n\nSECAO DE JOGADAS DO JOGADOR 1 TERMINADA\n\n");
                break;
            }
        }
    }

    ZeraT(M, MB);//JOGADOR 2
    opcao = Menu();
    if(opcao==1)
    {
        HH(MB);
        contpos = cont(MB);
        printf("\nSecao de Jogadas do Jogador 2\n\n");
        for(i=0;i<N*N;i++)
        {
            Imprimir(M);
            Jogada(M, MB, 1);
            printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog2, cont2, contpos);
            if(contpos == cont2)
                break;
        }
        Imprimir(M);
        printf("\n\nSECAO DE JOGADAS DO JOGADOR 2 TERMINADA\n\n");
    }
    else if(opcao==2)
    {
        CH(MB);
        contpos = cont(MB);
        printf("\nSecao de Jogadas do Jogador 2\n\n");
        for(i=0;i<N*N;i++)
        {
            Imprimir(M);
            Jogada(M, MB, 1);
            printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog2, cont2, contpos);
            if(contpos == cont2)
                break;
        }
        Imprimir(M);
        printf("\n\nSECAO DE JOGADAS DO JOGADOR 2 TERMINADA\n\n");
    }
    else if(opcao==3)
    {
        int X, Y, i, estado = 0, ult=0;
        HC(MB, M);
        contpos = cont(MB);
        for(i=0;i<N*N;i++)
        {
            do
            {
                estado = 0;
                X = (rand()%N);//VAI DE 0 A N-1
                Y = (rand()%N);//VAI DE 0 A N-1
                if(MB[X][Y]=='~' && M[X][Y]=='~')
                {
                    M[X][Y] = '*';
                    contjog2++;
                    estado = 1;
                }
                else if(MB[X][Y]=='B' && M[X][Y]=='~')
                {
                    M[X][Y] = 'X';
                    contjog2++;
                    cont2++;
                    i+=AcertouNavio(M, MB, X, Y, 1);
                    estado = 1;
                }
            }while(estado!=1);
            ImprimirAux(1);
            Imprimir(M);
            if(cont2==contpos)
            {
                printf("\n\nSECAO DE JOGADAS DO JOGADOR 2 TERMINADA\n\n");
                break;
            }
        }
    }
    if(contjog1<contjog2)
        printf("\nJOGADOR 1 VENCEU!!!   %d x %d\n\n", contjog1, contjog2);
    else if(contjog1>contjog2)
        printf("\nJOGADOR 2 VENCEU!!!   %d x %d\n\n", contjog2, contjog1);
    else
        printf("\nJOGO EMPATADO!!!      %d x %d\n\n", contjog1, contjog2);
    return 0;
}
Beispiel #22
0
void MD5::transform(unsigned char *input) {
	unsigned int a,b,c,d,x[16];
 	a=state[0];
  	b=state[1];
   	c=state[2];
    d=state[3];
    // byte to dword
    for (int i=0;i<16;i++) {
    	int j=i<<2;
    	unsigned int w1,w2,w3,w4;
    	w1=input[j];
    	w2=input[j+1];
    	w3=input[j+2];
    	w4=input[j+3];
    	x[i]=((unsigned int)w1)|(((unsigned int)w2)<<8)|
     		 (((unsigned int)w3)<<16)|(((unsigned int)w4)<<24);
    }
#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21

 /* Round 1*/
	FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
	FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
	FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
  	FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
  	FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
  	FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
  	FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
  	FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
  	FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
	FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
 	FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
  	FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
  	FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
  	FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
  	FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
  	FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
 /* Round 2 */
  	GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
  	GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
  	GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
  	GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
  	GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
  	GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
  	GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
  	GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
  	GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
  	GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
  	GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
  	GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
  	GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
  	GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
  	GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
  	GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
 /* Round 3 */
	HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
	HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
	HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
	HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
	HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
	HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
	HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
	HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
	HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
	HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
	HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
	HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
	HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
	HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
	HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
	HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
 /* Round 4 */
  	II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
  	II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
  	II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
  	II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
  	II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
  	II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
  	II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
  	II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
  	II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
  	II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
  	II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
  	II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
  	II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
  	II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
  	II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
  	II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
	state[0]+=a;
	state[1]+=b;
	state[2]+=c;
	state[3]+=d;
}
Beispiel #23
0
PetscErrorCode KSPFGMRESCycle(PetscInt *itcount,KSP ksp)
{

  KSP_FGMRES     *fgmres = (KSP_FGMRES*)(ksp->data);
  PetscReal      res_norm;
  PetscReal      hapbnd,tt;
  PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
  PetscErrorCode ierr;
  PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
  PetscInt       max_k = fgmres->max_k; /* max # of directions Krylov space */
  Mat            Amat,Pmat;
  MatStructure   pflag;

  PetscFunctionBegin;
  /* Number of pseudo iterations since last restart is the number
     of prestart directions */
  loc_it = 0;

  /* note: (fgmres->it) is always set one less than (loc_it) It is used in
     KSPBUILDSolution_FGMRES, where it is passed to KSPFGMRESBuildSoln.
     Note that when KSPFGMRESBuildSoln is called from this function,
     (loc_it -1) is passed, so the two are equivalent */
  fgmres->it = (loc_it - 1);

  /* initial residual is in VEC_VV(0)  - compute its norm*/
  ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);

  /* first entry in right-hand-side of hessenberg system is just
     the initial residual norm */
  *RS(0) = res_norm;

  ksp->rnorm = res_norm;
  ierr       = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
  ierr       = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);

  /* check for the convergence - maybe the current guess is good enough */
  ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    if (itcount) *itcount = 0;
    PetscFunctionReturn(0);
  }

  /* scale VEC_VV (the initial residual) */
  ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);

  /* MAIN ITERATION LOOP BEGINNING*/
  /* keep iterating until we have converged OR generated the max number
     of directions OR reached the max number of iterations for the method */
  while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
    if (loc_it) {
      ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
      ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
    }
    fgmres->it = (loc_it - 1);

    /* see if more space is needed for work vectors */
    if (fgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
      ierr = KSPFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
      /* (loc_it+1) is passed in as number of the first vector that should
         be allocated */
    }

    /* CHANGE THE PRECONDITIONER? */
    /* ModifyPC is the callback function that can be used to
       change the PC or its attributes before its applied */
    (*fgmres->modifypc)(ksp,ksp->its,loc_it,res_norm,fgmres->modifyctx);


    /* apply PRECONDITIONER to direction vector and store with
       preconditioned vectors in prevec */
    ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);

    ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr);
    /* Multiply preconditioned vector by operator - put in VEC_VV(loc_it+1) */
    ierr = MatMult(Amat,PREVEC(loc_it),VEC_VV(1+loc_it));CHKERRQ(ierr);


    /* update hessenberg matrix and do Gram-Schmidt - new direction is in
       VEC_VV(1+loc_it)*/
    ierr = (*fgmres->orthog)(ksp,loc_it);CHKERRQ(ierr);

    /* new entry in hessenburg is the 2-norm of our new direction */
    ierr = VecNorm(VEC_VV(loc_it+1),NORM_2,&tt);CHKERRQ(ierr);

    *HH(loc_it+1,loc_it)  = tt;
    *HES(loc_it+1,loc_it) = tt;

    /* Happy Breakdown Check */
    hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
    /* RS(loc_it) contains the res_norm from the last iteration  */
    hapbnd = PetscMin(fgmres->haptol,hapbnd);
    if (tt > hapbnd) {
      /* scale new direction by its norm */
      ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
    } else {
      /* This happens when the solution is exactly reached. */
      /* So there is no new direction... */
      ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
      hapend = PETSC_TRUE;
    }
    /* note that for FGMRES we could get HES(loc_it+1, loc_it)  = 0 and the
       current solution would not be exact if HES was singular.  Note that
       HH non-singular implies that HES is no singular, and HES is guaranteed
       to be nonsingular when PREVECS are linearly independent and A is
       nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
       of HES). So we should really add a check to verify that HES is nonsingular.*/


    /* Now apply rotations to new col of hessenberg (and right side of system),
       calculate new rotation, and get new residual norm at the same time*/
    ierr = KSPFGMRESUpdateHessenberg(ksp,loc_it,hapend,&res_norm);CHKERRQ(ierr);
    if (ksp->reason) break;

    loc_it++;
    fgmres->it = (loc_it-1);   /* Add this here in case it has converged */

    ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its++;
    ksp->rnorm = res_norm;
    ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

    /* Catch error in happy breakdown and signal convergence and break from loop */
    if (hapend) {
      if (!ksp->reason) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res_norm);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }
    }
  }
  /* END OF ITERATION LOOP */
  ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);

  /*
     Monitor if we know that we will not return for a restart */
  if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
    ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
  }

  if (itcount) *itcount = loc_it;

  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */

  /* Form the solution (or the solution so far) */
  /* Note: must pass in (loc_it-1) for iteration count so that KSPFGMRESBuildSoln
     properly navigates */

  ierr = KSPFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #24
0
/**
 @brief	md5 basic transformation. Transforms state based on block.
 */
static void md5_transform(uint32_t state[4], uint8_t block[64])
{
	uint32_t a = state[0];
	uint32_t b = state[1];
	uint32_t c = state[2];
	uint32_t d = state[3];
	uint32_t x[16];

	md5_decode(x, block, 64);

	// Round 1
	a = FF(a, b, c, d, x[0],  S11, 0xd76aa478);	// 1
	d = FF(d, a, b, c, x[1],  S12, 0xe8c7b756); // 2
	c = FF(c, d, a, b, x[2],  S13, 0x242070db); // 3
	b = FF(b, c, d, a, x[3],  S14, 0xc1bdceee); // 4
	a = FF(a, b, c, d, x[4],  S11, 0xf57c0faf); // 5
	d = FF(d, a, b, c, x[5],  S12, 0x4787c62a); // 6
	c = FF(c, d, a, b, x[6],  S13, 0xa8304613); // 7
	b = FF(b, c, d, a, x[7],  S14, 0xfd469501); // 8
	a = FF(a, b, c, d, x[8],  S11, 0x698098d8); // 9
	d = FF(d, a, b, c, x[9],  S12, 0x8b44f7af); // 10
	c = FF(c, d, a, b, x[10], S13, 0xffff5bb1); // 11
	b = FF(b, c, d, a, x[11], S14, 0x895cd7be); // 12
	a = FF(a, b, c, d, x[12], S11, 0x6b901122); // 13
	d = FF(d, a, b, c, x[13], S12, 0xfd987193); // 14
	c = FF(c, d, a, b, x[14], S13, 0xa679438e); // 15
	b = FF(b, c, d, a, x[15], S14, 0x49b40821); // 16

	// Round 2
	a = GG(a, b, c, d, x[1],  S21, 0xf61e2562); // 17
	d = GG(d, a, b, c, x[6],  S22, 0xc040b340); // 18
	c = GG(c, d, a, b, x[11], S23, 0x265e5a51); // 19
	b = GG(b, c, d, a, x[0],  S24, 0xe9b6c7aa); // 20
	a = GG(a, b, c, d, x[5],  S21, 0xd62f105d); // 21
	d = GG(d, a, b, c, x[10], S22, 0x2441453);  // 22
	c = GG(c, d, a, b, x[15], S23, 0xd8a1e681); // 23
	b = GG(b, c, d, a, x[4],  S24, 0xe7d3fbc8); // 24
	a = GG(a, b, c, d, x[9],  S21, 0x21e1cde6); // 25
	d = GG(d, a, b, c, x[14], S22, 0xc33707d6); // 26
	c = GG(c, d, a, b, x[3],  S23, 0xf4d50d87); // 27
	b = GG(b, c, d, a, x[8],  S24, 0x455a14ed); // 28
	a = GG(a, b, c, d, x[13], S21, 0xa9e3e905); // 29
	d = GG(d, a, b, c, x[2],  S22, 0xfcefa3f8); // 30
	c = GG(c, d, a, b, x[7],  S23, 0x676f02d9); // 31
	b = GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); // 32

	// Round 3
	a = HH(a, b, c, d, x[5],  S31, 0xfffa3942); // 33
	d = HH(d, a, b, c, x[8],  S32, 0x8771f681); // 34
	c = HH(c, d, a, b, x[11], S33, 0x6d9d6122); // 35
	b = HH(b, c, d, a, x[14], S34, 0xfde5380c); // 36
	a = HH(a, b, c, d, x[1],  S31, 0xa4beea44); // 37
	d = HH(d, a, b, c, x[4],  S32, 0x4bdecfa9); // 38
	c = HH(c, d, a, b, x[7],  S33, 0xf6bb4b60); // 39
	b = HH(b, c, d, a, x[10], S34, 0xbebfbc70); // 40
	a = HH(a, b, c, d, x[13], S31, 0x289b7ec6); // 41
	d = HH(d, a, b, c, x[0],  S32, 0xeaa127fa); // 42
	c = HH(c, d, a, b, x[3],  S33, 0xd4ef3085); // 43
	b = HH(b, c, d, a, x[6],  S34, 0x4881d05);  // 44
	a = HH(a, b, c, d, x[9],  S31, 0xd9d4d039); // 45
	d = HH(d, a, b, c, x[12], S32, 0xe6db99e5); // 46
	c = HH(c, d, a, b, x[15], S33, 0x1fa27cf8); // 47
	b = HH(b, c, d, a, x[2],  S34, 0xc4ac5665); // 48

	// Round 4
	a = II(a, b, c, d, x[0],  S41, 0xf4292244); // 49
	d = II(d, a, b, c, x[7],  S42, 0x432aff97); // 50
	c = II(c, d, a, b, x[14], S43, 0xab9423a7); // 51
	b = II(b, c, d, a, x[5],  S44, 0xfc93a039); // 52
	a = II(a, b, c, d, x[12], S41, 0x655b59c3); // 53
	d = II(d, a, b, c, x[3],  S42, 0x8f0ccc92); // 54
	c = II(c, d, a, b, x[10], S43, 0xffeff47d); // 55
	b = II(b, c, d, a, x[1],  S44, 0x85845dd1); // 56
	a = II(a, b, c, d, x[8],  S41, 0x6fa87e4f); // 57
	d = II(d, a, b, c, x[15], S42, 0xfe2ce6e0); // 58
	c = II(c, d, a, b, x[6],  S43, 0xa3014314); // 59
	b = II(b, c, d, a, x[13], S44, 0x4e0811a1); // 60
	a = II(a, b, c, d, x[4],  S41, 0xf7537e82); // 61
	d = II(d, a, b, c, x[11], S42, 0xbd3af235); // 62
	c = II(c, d, a, b, x[2],  S43, 0x2ad7d2bb); // 63
	b = II(b, c, d, a, x[9],  S44, 0xeb86d391); // 64

	state[0] += a;
	state[1] += b;
	state[2] += c;
	state[3] += d;

	// Zero sensitive information.
	memset(&x,0,sizeof(x));
}
Beispiel #25
0
//MD5摘要
MD5VAL md5(char * str, unsigned int size)
{
	if(size==0)
		size=strlen(str);
	unsigned int m=size%64;
	unsigned int lm=size-m;  //数据整块长度
	unsigned int ln;  //数据补位后长度
	if(m<56)
		ln=lm+64;
	else
		ln=lm+128;
	char * strw=new char[ln];
	unsigned int i;
	//复制原字串到缓冲区strw
	for(i=0;i<size;i++)
		strw[i]=str[i];
	//补位
	strw[i++]=0x80;
	for(i;i<ln-8;i++)
		strw[i]=0x00;
	//补长度
	unsigned int * x=(unsigned int *)(strw+i);
	*(x++)=size<<3;
	*(x++)=size>>29;
	//初始化MD5参数
	MD5VAL val={0x67452301,0xefcdab89,0x98badcfe,0x10325476};
	unsigned int &a=val.a, &b=val.b, &c=val.c, &d=val.d;
	unsigned int aa,bb,cc,dd;
	for(i=0;i<ln;i+=64)
	{
		x=(unsigned int *)(strw+i);
		// Save the values
		aa=a; bb=b; cc=c; dd=d;
		// Round 1
		FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
		FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
		FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
		FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
		FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
		FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
		FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
		FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
		FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
		FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
		FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
		FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
		FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
		FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
		FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
		FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
		// Round 2
		GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
		GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
		GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
		GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
		GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
		GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
		GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
		GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
		GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
		GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
		GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
		GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
		GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
		GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
		GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
		GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
		// Round 3
		HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
		HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
		HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
		HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
		HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
		HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
		HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
		HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
		HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
		HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
		HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
		HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
		HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
		HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
		HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
		HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
		// Round 4 */
		II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
		II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
		II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
		II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
		II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
		II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
		II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
		II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
		II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
		II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
		II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
		II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
		II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
		II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
		II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
		II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
		// Add the original values
		a+=aa;
		b+=bb;
		c+=cc;
		d+=dd;
	}
	delete[] strw;
	return val;
}
Beispiel #26
0
//MD5文件摘要
MD5VAL md5File(FILE * fpin)
{
	if(!Buffer)
		Buffer=new char[BUFFER_SIZE+64];
	char * buf=Buffer;
	MD5VAL val={0x67452301,0xefcdab89,0x98badcfe,0x10325476};
	unsigned int &a=val.a, &b=val.b, &c=val.c, &d=val.d;
	unsigned int aa,bb,cc,dd;
	unsigned int i,j,count,co;
	unsigned int * x;
	i=0;
	do
	{
		count=fread(buf,1,BUFFER_SIZE,fpin);
		i+=count;
		if(count==BUFFER_SIZE)
			co=BUFFER_SIZE;
		else
		{
			j=count;
			buf[j++]=0x80;
			for(j;j%64!=56;j++)
				buf[j]=0x00;
			*(unsigned int *)(buf+j)=i<<3; j+=4;
			*(unsigned int *)(buf+j)=i>>29; j+=4;
			co=j;
		}
		for(j=0;j<co;j+=64)
		{
			x=(unsigned int *)(buf+j);
			// Save the values
			aa=a; bb=b; cc=c; dd=d;
			// Round 1
			FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
			FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
			FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
			FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
			FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
			FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
			FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
			FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
			FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
			FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
			FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
			FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
			FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
			FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
			FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
			FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
			// Round 2
			GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
			GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
			GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
			GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
			GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
			GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
			GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
			GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
			GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
			GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
			GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
			GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
			GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
			GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
			GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
			GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
			// Round 3
			HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
			HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
			HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
			HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
			HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
			HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
			HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
			HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
			HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
			HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
			HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
			HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
			HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
			HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
			HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
			HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
			// Round 4 */
			II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
			II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
			II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
			II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
			II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
			II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
			II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
			II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
			II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
			II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
			II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
			II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
			II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
			II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
			II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
			II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
			// Add the original values
			a+=aa;
			b+=bb;
			c+=cc;
			d+=dd;
		}

	} while(count==BUFFER_SIZE);
	return val;
}
Beispiel #27
0
static PetscErrorCode KSPFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
{
  PetscScalar *hh,*cc,*ss,tt;
  PetscInt    j;
  KSP_FGMRES  *fgmres = (KSP_FGMRES*)(ksp->data);

  PetscFunctionBegin;
  hh = HH(0,it);   /* pointer to beginning of column to update - so
                      incrementing hh "steps down" the (it+1)th col of HH*/
  cc = CC(0);      /* beginning of cosine rotations */
  ss = SS(0);      /* beginning of sine rotations */

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
     and some refs have [c   s ; -conj(s)  c] (don't be confused!) */

  for (j=1; j<=it; j++) {
    tt  = *hh;
    *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
    hh++;
    *hh = *cc++ * *hh - (*ss++ * tt);
    /* hh, cc, and ss have all been incremented one by end of loop */
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system (RS)
        note: it affects RS(it) and RS(it+1)
     2) the new column of the Hessenberg matrix
        note: it affects HH(it,it) which is currently pointed to
        by hh and HH(it+1, it) (*(hh+1))
    thus obtaining the updated value of the residual...
  */

  /* compute new plane rotation */

  if (!hapend) {
    tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
    if (tt == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }

    *cc = *hh / tt;         /* new cosine value */
    *ss = *(hh+1) / tt;        /* new sine value */

    /* apply to 1) and 2) */
    *RS(it+1) = -(*ss * *RS(it));
    *RS(it)   = PetscConj(*cc) * *RS(it);
    *hh       = PetscConj(*cc) * *hh + *ss * *(hh+1);

    /* residual is the last element (it+1) of right-hand side! */
    *res = PetscAbsScalar(*RS(it+1));

  } else { /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (RS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Beispiel #28
0
static int  rmd256_compress(hash_state *md, unsigned char *buf)
#endif
{
   ulong32 aa,bb,cc,dd,aaa,bbb,ccc,ddd,tmp,X[16];
   int i;

   /* load words X */
   for (i = 0; i < 16; i++){
      LOAD32L(X[i], buf + (4 * i));
   }

   /* load state */
   aa = md->rmd256.state[0];
   bb = md->rmd256.state[1];
   cc = md->rmd256.state[2];
   dd = md->rmd256.state[3];
   aaa = md->rmd256.state[4];
   bbb = md->rmd256.state[5];
   ccc = md->rmd256.state[6];
   ddd = md->rmd256.state[7];

   /* round 1 */
   FF(aa, bb, cc, dd, X[ 0], 11);
   FF(dd, aa, bb, cc, X[ 1], 14);
   FF(cc, dd, aa, bb, X[ 2], 15);
   FF(bb, cc, dd, aa, X[ 3], 12);
   FF(aa, bb, cc, dd, X[ 4],  5);
   FF(dd, aa, bb, cc, X[ 5],  8);
   FF(cc, dd, aa, bb, X[ 6],  7);
   FF(bb, cc, dd, aa, X[ 7],  9);
   FF(aa, bb, cc, dd, X[ 8], 11);
   FF(dd, aa, bb, cc, X[ 9], 13);
   FF(cc, dd, aa, bb, X[10], 14);
   FF(bb, cc, dd, aa, X[11], 15);
   FF(aa, bb, cc, dd, X[12],  6);
   FF(dd, aa, bb, cc, X[13],  7);
   FF(cc, dd, aa, bb, X[14],  9);
   FF(bb, cc, dd, aa, X[15],  8);

   /* parallel round 1 */
   III(aaa, bbb, ccc, ddd, X[ 5],  8);
   III(ddd, aaa, bbb, ccc, X[14],  9);
   III(ccc, ddd, aaa, bbb, X[ 7],  9);
   III(bbb, ccc, ddd, aaa, X[ 0], 11);
   III(aaa, bbb, ccc, ddd, X[ 9], 13);
   III(ddd, aaa, bbb, ccc, X[ 2], 15);
   III(ccc, ddd, aaa, bbb, X[11], 15);
   III(bbb, ccc, ddd, aaa, X[ 4],  5);
   III(aaa, bbb, ccc, ddd, X[13],  7);
   III(ddd, aaa, bbb, ccc, X[ 6],  7);
   III(ccc, ddd, aaa, bbb, X[15],  8);
   III(bbb, ccc, ddd, aaa, X[ 8], 11);
   III(aaa, bbb, ccc, ddd, X[ 1], 14);
   III(ddd, aaa, bbb, ccc, X[10], 14);
   III(ccc, ddd, aaa, bbb, X[ 3], 12);
   III(bbb, ccc, ddd, aaa, X[12],  6);

   tmp = aa; aa = aaa; aaa = tmp;

   /* round 2 */
   GG(aa, bb, cc, dd, X[ 7],  7);
   GG(dd, aa, bb, cc, X[ 4],  6);
   GG(cc, dd, aa, bb, X[13],  8);
   GG(bb, cc, dd, aa, X[ 1], 13);
   GG(aa, bb, cc, dd, X[10], 11);
   GG(dd, aa, bb, cc, X[ 6],  9);
   GG(cc, dd, aa, bb, X[15],  7);
   GG(bb, cc, dd, aa, X[ 3], 15);
   GG(aa, bb, cc, dd, X[12],  7);
   GG(dd, aa, bb, cc, X[ 0], 12);
   GG(cc, dd, aa, bb, X[ 9], 15);
   GG(bb, cc, dd, aa, X[ 5],  9);
   GG(aa, bb, cc, dd, X[ 2], 11);
   GG(dd, aa, bb, cc, X[14],  7);
   GG(cc, dd, aa, bb, X[11], 13);
   GG(bb, cc, dd, aa, X[ 8], 12);

   /* parallel round 2 */
   HHH(aaa, bbb, ccc, ddd, X[ 6],  9);
   HHH(ddd, aaa, bbb, ccc, X[11], 13);
   HHH(ccc, ddd, aaa, bbb, X[ 3], 15);
   HHH(bbb, ccc, ddd, aaa, X[ 7],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 0], 12);
   HHH(ddd, aaa, bbb, ccc, X[13],  8);
   HHH(ccc, ddd, aaa, bbb, X[ 5],  9);
   HHH(bbb, ccc, ddd, aaa, X[10], 11);
   HHH(aaa, bbb, ccc, ddd, X[14],  7);
   HHH(ddd, aaa, bbb, ccc, X[15],  7);
   HHH(ccc, ddd, aaa, bbb, X[ 8], 12);
   HHH(bbb, ccc, ddd, aaa, X[12],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 4],  6);
   HHH(ddd, aaa, bbb, ccc, X[ 9], 15);
   HHH(ccc, ddd, aaa, bbb, X[ 1], 13);
   HHH(bbb, ccc, ddd, aaa, X[ 2], 11);

   tmp = bb; bb = bbb; bbb = tmp;

   /* round 3 */
   HH(aa, bb, cc, dd, X[ 3], 11);
   HH(dd, aa, bb, cc, X[10], 13);
   HH(cc, dd, aa, bb, X[14],  6);
   HH(bb, cc, dd, aa, X[ 4],  7);
   HH(aa, bb, cc, dd, X[ 9], 14);
   HH(dd, aa, bb, cc, X[15],  9);
   HH(cc, dd, aa, bb, X[ 8], 13);
   HH(bb, cc, dd, aa, X[ 1], 15);
   HH(aa, bb, cc, dd, X[ 2], 14);
   HH(dd, aa, bb, cc, X[ 7],  8);
   HH(cc, dd, aa, bb, X[ 0], 13);
   HH(bb, cc, dd, aa, X[ 6],  6);
   HH(aa, bb, cc, dd, X[13],  5);
   HH(dd, aa, bb, cc, X[11], 12);
   HH(cc, dd, aa, bb, X[ 5],  7);
   HH(bb, cc, dd, aa, X[12],  5);

   /* parallel round 3 */
   GGG(aaa, bbb, ccc, ddd, X[15],  9);
   GGG(ddd, aaa, bbb, ccc, X[ 5],  7);
   GGG(ccc, ddd, aaa, bbb, X[ 1], 15);
   GGG(bbb, ccc, ddd, aaa, X[ 3], 11);
   GGG(aaa, bbb, ccc, ddd, X[ 7],  8);
   GGG(ddd, aaa, bbb, ccc, X[14],  6);
   GGG(ccc, ddd, aaa, bbb, X[ 6],  6);
   GGG(bbb, ccc, ddd, aaa, X[ 9], 14);
   GGG(aaa, bbb, ccc, ddd, X[11], 12);
   GGG(ddd, aaa, bbb, ccc, X[ 8], 13);
   GGG(ccc, ddd, aaa, bbb, X[12],  5);
   GGG(bbb, ccc, ddd, aaa, X[ 2], 14);
   GGG(aaa, bbb, ccc, ddd, X[10], 13);
   GGG(ddd, aaa, bbb, ccc, X[ 0], 13);
   GGG(ccc, ddd, aaa, bbb, X[ 4],  7);
   GGG(bbb, ccc, ddd, aaa, X[13],  5);

   tmp = cc; cc = ccc; ccc = tmp;

   /* round 4 */
   II(aa, bb, cc, dd, X[ 1], 11);
   II(dd, aa, bb, cc, X[ 9], 12);
   II(cc, dd, aa, bb, X[11], 14);
   II(bb, cc, dd, aa, X[10], 15);
   II(aa, bb, cc, dd, X[ 0], 14);
   II(dd, aa, bb, cc, X[ 8], 15);
   II(cc, dd, aa, bb, X[12],  9);
   II(bb, cc, dd, aa, X[ 4],  8);
   II(aa, bb, cc, dd, X[13],  9);
   II(dd, aa, bb, cc, X[ 3], 14);
   II(cc, dd, aa, bb, X[ 7],  5);
   II(bb, cc, dd, aa, X[15],  6);
   II(aa, bb, cc, dd, X[14],  8);
   II(dd, aa, bb, cc, X[ 5],  6);
   II(cc, dd, aa, bb, X[ 6],  5);
   II(bb, cc, dd, aa, X[ 2], 12);

   /* parallel round 4 */
   FFF(aaa, bbb, ccc, ddd, X[ 8], 15);
   FFF(ddd, aaa, bbb, ccc, X[ 6],  5);
   FFF(ccc, ddd, aaa, bbb, X[ 4],  8);
   FFF(bbb, ccc, ddd, aaa, X[ 1], 11);
   FFF(aaa, bbb, ccc, ddd, X[ 3], 14);
   FFF(ddd, aaa, bbb, ccc, X[11], 14);
   FFF(ccc, ddd, aaa, bbb, X[15],  6);
   FFF(bbb, ccc, ddd, aaa, X[ 0], 14);
   FFF(aaa, bbb, ccc, ddd, X[ 5],  6);
   FFF(ddd, aaa, bbb, ccc, X[12],  9);
   FFF(ccc, ddd, aaa, bbb, X[ 2], 12);
   FFF(bbb, ccc, ddd, aaa, X[13],  9);
   FFF(aaa, bbb, ccc, ddd, X[ 9], 12);
   FFF(ddd, aaa, bbb, ccc, X[ 7],  5);
   FFF(ccc, ddd, aaa, bbb, X[10], 15);
   FFF(bbb, ccc, ddd, aaa, X[14],  8);

   tmp = dd; dd = ddd; ddd = tmp;

   /* combine results */
   md->rmd256.state[0] += aa;
   md->rmd256.state[1] += bb;
   md->rmd256.state[2] += cc;
   md->rmd256.state[3] += dd;
   md->rmd256.state[4] += aaa;
   md->rmd256.state[5] += bbb;
   md->rmd256.state[6] += ccc;
   md->rmd256.state[7] += ddd;

   return CRYPT_OK;
}
Beispiel #29
0
static int  md4_compress(hash_state *md, unsigned char *buf)
#endif
{
    ulong32 x[16], a, b, c, d;
    int i;

    /* copy state */
    a = md->md4.state[0];
    b = md->md4.state[1];
    c = md->md4.state[2];
    d = md->md4.state[3];

    /* copy the state into 512-bits into W[0..15] */
    for (i = 0; i < 16; i++) {
        LOAD32L(x[i], buf + (4*i));
    }

    /* Round 1 */
    FF (a, b, c, d, x[ 0], S11); /* 1 */
    FF (d, a, b, c, x[ 1], S12); /* 2 */
    FF (c, d, a, b, x[ 2], S13); /* 3 */
    FF (b, c, d, a, x[ 3], S14); /* 4 */
    FF (a, b, c, d, x[ 4], S11); /* 5 */
    FF (d, a, b, c, x[ 5], S12); /* 6 */
    FF (c, d, a, b, x[ 6], S13); /* 7 */
    FF (b, c, d, a, x[ 7], S14); /* 8 */
    FF (a, b, c, d, x[ 8], S11); /* 9 */
    FF (d, a, b, c, x[ 9], S12); /* 10 */
    FF (c, d, a, b, x[10], S13); /* 11 */
    FF (b, c, d, a, x[11], S14); /* 12 */
    FF (a, b, c, d, x[12], S11); /* 13 */
    FF (d, a, b, c, x[13], S12); /* 14 */
    FF (c, d, a, b, x[14], S13); /* 15 */
    FF (b, c, d, a, x[15], S14); /* 16 */

    /* Round 2 */
    GG (a, b, c, d, x[ 0], S21); /* 17 */
    GG (d, a, b, c, x[ 4], S22); /* 18 */
    GG (c, d, a, b, x[ 8], S23); /* 19 */
    GG (b, c, d, a, x[12], S24); /* 20 */
    GG (a, b, c, d, x[ 1], S21); /* 21 */
    GG (d, a, b, c, x[ 5], S22); /* 22 */
    GG (c, d, a, b, x[ 9], S23); /* 23 */
    GG (b, c, d, a, x[13], S24); /* 24 */
    GG (a, b, c, d, x[ 2], S21); /* 25 */
    GG (d, a, b, c, x[ 6], S22); /* 26 */
    GG (c, d, a, b, x[10], S23); /* 27 */
    GG (b, c, d, a, x[14], S24); /* 28 */
    GG (a, b, c, d, x[ 3], S21); /* 29 */
    GG (d, a, b, c, x[ 7], S22); /* 30 */
    GG (c, d, a, b, x[11], S23); /* 31 */
    GG (b, c, d, a, x[15], S24); /* 32 */

    /* Round 3 */
    HH (a, b, c, d, x[ 0], S31); /* 33 */
    HH (d, a, b, c, x[ 8], S32); /* 34 */
    HH (c, d, a, b, x[ 4], S33); /* 35 */
    HH (b, c, d, a, x[12], S34); /* 36 */
    HH (a, b, c, d, x[ 2], S31); /* 37 */
    HH (d, a, b, c, x[10], S32); /* 38 */
    HH (c, d, a, b, x[ 6], S33); /* 39 */
    HH (b, c, d, a, x[14], S34); /* 40 */
    HH (a, b, c, d, x[ 1], S31); /* 41 */
    HH (d, a, b, c, x[ 9], S32); /* 42 */
    HH (c, d, a, b, x[ 5], S33); /* 43 */
    HH (b, c, d, a, x[13], S34); /* 44 */
    HH (a, b, c, d, x[ 3], S31); /* 45 */
    HH (d, a, b, c, x[11], S32); /* 46 */
    HH (c, d, a, b, x[ 7], S33); /* 47 */
    HH (b, c, d, a, x[15], S34); /* 48 */


    /* Update our state */
    md->md4.state[0] = md->md4.state[0] + a;
    md->md4.state[1] = md->md4.state[1] + b;
    md->md4.state[2] = md->md4.state[2] + c;
    md->md4.state[3] = md->md4.state[3] + d;

    return CRYPT_OK;
}
Beispiel #30
0
//对于每个block进行转换
void MD5::encryptWithBlock(const uint8_t block[64]) {
    
	uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
    
	decode(block, x, 64);
    
	/* Round 1 */
	FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
	FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
	FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
	FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
	FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
	FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
	FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
	FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
	FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
	FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
	FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
	FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
	FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
	FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
	FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
	FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
    
	/* Round 2 */
	GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
	GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
	GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
	GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
	GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
	GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
	GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
	GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
	GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
	GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
	GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
	GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
	GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
	GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
	GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
	GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
    
	/* Round 3 */
	HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
	HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
	HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
	HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
	HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
	HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
	HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
	HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
	HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
	HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
	HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
	HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
	HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
	HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
	HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
	HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
    
	/* Round 4 */
	II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
	II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
	II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
	II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
	II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
	II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
	II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
	II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
	II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
	II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
	II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
	II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
	II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
	II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
	II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
	II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
    
	state[0] += a;
	state[1] += b;
	state[2] += c;
	state[3] += d;
}