static struct avl_node *_avlInsert(struct avl_node *n, int key) { if(!n) return _avlNewNode(key); if(key < K(n)) { L(n) = _avlInsert(L(n), key); } else { R(n) = _avlInsert(R(n), key); } H(n) = MAX(HH(n->left), HH(n->right)) + 1; int bf = BF(n); if(bf > 1) { if(key > K(L(n))) { // LR case L(n) = _leftRotate(L(n)); } // else LL case return _rightRotate(n); } if(bf < -1) { if(key < K(R(n))) { // RL case R(n) = _rightRotate(R(n)); } // else RR case return _leftRotate(n); } return n; }
static struct avl_node *_leftRotate(struct avl_node *n) { struct avl_node *t = R(n); R(n) = L(t); L(t) = n; // Calculate H(n) first H(n) = MAX(HH(n->left), HH(n->right)) + 1; H(t) = MAX(HH(t->left), HH(t->right)) + 1; return t; }
static struct avl_node *_avl_delete(struct avl_node *n, int key) { if(!n) return NULL; if(key < K(n)) L(n) = _avl_delete(L(n), key); else if(key > K(n)) R(n) = _avl_delete(R(n), key); else { // key == K(n) if(NULL == L(n) || NULL == R(n)) { struct avl_node *tmp = L(n) ? L(n) : R(n); if(tmp) { *n = *tmp; // copy contents of child to n } else { // n is leaf tmp = n; n = NULL; } free(tmp); } else { // two children case struct avl_node *d = _avl_minimum(n); K(n) = K(d); R(n) = _avl_delete(d, key); } } // no child case if(!n) return NULL; H(n) = MAX(HH(n->left), HH(n->right)) + 1; int bf = BF(n); if(bf > 1) { if(0 > BF(L(n))) { // LR case L(n) = _leftRotate(L(n)); } // else LL case return _rightRotate(n); } if(bf < -1) { if(0 < BF(R(n))) { // RL case R(n) = _rightRotate(R(n)); } // else RR case return _leftRotate(n); } return n; }
static PetscErrorCode KSPGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it) { PetscScalar tt; PetscErrorCode ierr; PetscInt ii,k,j; KSP_GMRES *gmres = (KSP_GMRES*)(ksp->data); PetscFunctionBegin; /* Solve for solution vector that minimizes the residual */ /* If it is < 0, no gmres steps have been performed */ if (it < 0) { ierr = VecCopy(vs,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */ PetscFunctionReturn(0); } if (*HH(it,it) != 0.0) { nrs[it] = *GRS(it) / *HH(it,it); } else { ksp->reason = KSP_DIVERGED_BREAKDOWN; ierr = PetscInfo2(ksp,"Likely your matrix or preconditioner is singular. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));CHKERRQ(ierr); PetscFunctionReturn(0); } for (ii=1; ii<=it; ii++) { k = it - ii; tt = *GRS(k); for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j]; if (*HH(k,k) == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; ierr = PetscInfo1(ksp,"Likely your matrix or preconditioner is singular. HH(k,k) is identically zero; k = %D",k);CHKERRQ(ierr); PetscFunctionReturn(0); } nrs[k] = tt / *HH(k,k); } /* Accumulate the correction to the solution of the preconditioned problem in TEMP */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr); ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr); /* add solution to previous solution */ if (vdest != vs) { ierr = VecCopy(vs,vdest);CHKERRQ(ierr); } ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) { PetscScalar tt; PetscErrorCode ierr; PetscInt ii,k,j; KSP_FGMRES *fgmres = (KSP_FGMRES*)(ksp->data); PetscFunctionBegin; /* Solve for solution vector that minimizes the residual */ /* If it is < 0, no fgmres steps have been performed */ if (it < 0) { ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */ PetscFunctionReturn(0); } /* so fgmres steps HAVE been performed */ /* solve the upper triangular system - RS is the right side and HH is the upper triangular matrix - put soln in nrs */ if (*HH(it,it) != 0.0) { nrs[it] = *RS(it) / *HH(it,it); } else { nrs[it] = 0.0; } for (ii=1; ii<=it; ii++) { k = it - ii; tt = *RS(k); for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j]; nrs[k] = tt / *HH(k,k); } /* Accumulate the correction to the soln of the preconditioned prob. in VEC_TEMP - note that we use the preconditioned vectors */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP components to 0 */ ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr); /* put updated solution into vdest.*/ if (vdest != vguess) { ierr = VecCopy(VEC_TEMP,vdest);CHKERRQ(ierr); ierr = VecAXPY(vdest,1.0,vguess);CHKERRQ(ierr); } else { /* replace guess with solution */ ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) { PetscScalar tt; PetscErrorCode ierr; PetscInt k,j; KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscFunctionBegin; /* Solve for solution vector that minimizes the residual */ if (it < 0) { /* no pgmres steps have been performed */ ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */ PetscFunctionReturn(0); } /* solve the upper triangular system - RS is the right side and HH is the upper triangular matrix - put soln in nrs */ if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); else nrs[it] = 0.0; for (k=it-1; k>=0; k--) { tt = *RS(k); for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; nrs[k] = tt / *HH(k,k); } /* Accumulate the correction to the solution of the preconditioned problem in TEMP */ ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr); ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr); ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr); /* add solution to previous solution */ if (vdest == vguess) { ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); } else { ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res) { PetscScalar *hh,*cc,*ss,tt; PetscInt j; KSP_GMRES *gmres = (KSP_GMRES*)(ksp->data); PetscFunctionBegin; hh = HH(0,it); cc = CC(0); ss = SS(0); /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ for (j=1; j<=it; j++) { tt = *hh; *hh = PetscConj(*cc) * tt + *ss * *(hh+1); hh++; *hh = *cc++ * *hh - (*ss++ * tt); } /* compute the new plane rotation, and apply it to: 1) the right-hand-side of the Hessenberg system 2) the new column of the Hessenberg matrix thus obtaining the updated value of the residual */ if (!hapend) { tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1)); if (tt == 0.0) { ksp->reason = KSP_DIVERGED_NULL; PetscFunctionReturn(0); } *cc = *hh / tt; *ss = *(hh+1) / tt; *GRS(it+1) = -(*ss * *GRS(it)); *GRS(it) = PetscConj(*cc) * *GRS(it); *hh = PetscConj(*cc) * *hh + *ss * *(hh+1); *res = PetscAbsScalar(*GRS(it+1)); } else { /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply another rotation matrix (so RH doesn't change). The new residual is always the new sine term times the residual from last time (GRS(it)), but now the new sine rotation would be zero...so the residual should be zero...so we will multiply "zero" by the last residual. This might not be exactly what we want to do here -could just return "zero". */ *res = 0.0; } PetscFunctionReturn(0); }
int main() { FILE *out1; out1=fopen("out1Uniform.txt","w"); // Parameters!! int Nr=1000; int Nt=1; HankelMatrix HH(Nr,200.); waveUniform w; w.initialize(HH); printf("%d\n",w.Nr); double r0=100.; for(int i=0;i<HH.Nr;i++) { w.phi[i]=exp(-(w.r[i]-r0)*(w.r[i]-r0)/0.5/0.5); } printf("%e\n",w.norm()); w.normalize(); printf("%e\n",w.norm()); double dt=0.005; w.PrepareCrankArrays(dt); for (int ktime=0; ktime<1000; ktime++) { w.KineticPropCrankUniform(dt); if((ktime%10)==0) { for (int i=0; i<HH.Nr; i++) fprintf(out1,"%10.17e \n", w.r[i]*abs(w.phi[i]) ); //Save wave function multiply by rho axis } printf("%e\n",1.-w.norm()); } }
//using std; int main() { //complex I=complex(0.,1.); int N=1024;//50; int MM=12;//50; double R=.05; //Parametros para blas int lda=N; int ldb=1; int ldc=1; HankelMatrix HH(N,R); }
PetscErrorCode KSPGMRESModifiedGramSchmidtOrthogonalization(KSP ksp,PetscInt it) { KSP_GMRES *gmres = (KSP_GMRES*)(ksp->data); PetscErrorCode ierr; PetscInt j; PetscScalar *hh,*hes; PetscFunctionBegin; ierr = PetscLogEventBegin(KSP_GMRESOrthogonalization,ksp,0,0,0);CHKERRQ(ierr); /* update Hessenberg matrix and do Gram-Schmidt */ hh = HH(0,it); hes = HES(0,it); for (j=0; j<=it; j++) { /* (vv(it+1), vv(j)) */ ierr = VecDot(VEC_VV(it+1),VEC_VV(j),hh);CHKERRQ(ierr); KSPCheckDot(ksp,*hh); *hes++ = *hh; /* vv(it+1) <- vv(it+1) - hh[it+1][j] vv(j) */ ierr = VecAXPY(VEC_VV(it+1),-(*hh++),VEC_VV(j));CHKERRQ(ierr); } ierr = PetscLogEventEnd(KSP_GMRESOrthogonalization,ksp,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
function tradeOneNightStand() { vars Price = series(price()); vars SMA10 = series(SMA(Price, 10)); vars SMA40 = series(SMA(Price, 40)); //Stop = 3 * 90 * PIP; var BuyStop,SellStop; BuyStop = HH(10) + 1*PIP; SellStop = LL(10) - 1*PIP; if (dow() == 5 && NumOpenLong == 0 && NumPendingLong == 0 && SMA10[0] > SMA40[0]) enterLong(0,BuyStop); else if (dow() == 5 && NumOpenShort == 0 && NumPendingShort == 0 && SMA10[0] < SMA40[0]) enterShort(0,SellStop); if (dow() != 5 && dow() != 6 && dow() != 7) { exitLong(); exitShort(); } }
void md4_process_block(uint32_t state[4], const uint32_t block[MD4_BLOCK_SIZE / 4]) { unsigned a, b, c, d; a = state[0]; b = state[1]; c = state[2]; d = state[3]; FF(a, b, c, d, block[0], 3); /* 1 */ FF(d, a, b, c, block[1], 7); /* 2 */ FF(c, d, a, b, block[2], 11); /* 3 */ FF(b, c, d, a, block[3], 19); /* 4 */ FF(a, b, c, d, block[4], 3); /* 5 */ FF(d, a, b, c, block[5], 7); /* 6 */ FF(c, d, a, b, block[6], 11); /* 7 */ FF(b, c, d, a, block[7], 19); /* 8 */ FF(a, b, c, d, block[8], 3); /* 9 */ FF(d, a, b, c, block[9], 7); /* 10 */ FF(c, d, a, b, block[10], 11); /* 11 */ FF(b, c, d, a, block[11], 19); /* 12 */ FF(a, b, c, d, block[12], 3); /* 13 */ FF(d, a, b, c, block[13], 7); /* 14 */ FF(c, d, a, b, block[14], 11); /* 15 */ FF(b, c, d, a, block[15], 19); /* 16 */ GG(a, b, c, d, block[0], 3); /* 17 */ GG(d, a, b, c, block[4], 5); /* 18 */ GG(c, d, a, b, block[8], 9); /* 19 */ GG(b, c, d, a, block[12], 13); /* 20 */ GG(a, b, c, d, block[1], 3); /* 21 */ GG(d, a, b, c, block[5], 5); /* 22 */ GG(c, d, a, b, block[9], 9); /* 23 */ GG(b, c, d, a, block[13], 13); /* 24 */ GG(a, b, c, d, block[2], 3); /* 25 */ GG(d, a, b, c, block[6], 5); /* 26 */ GG(c, d, a, b, block[10], 9); /* 27 */ GG(b, c, d, a, block[14], 13); /* 28 */ GG(a, b, c, d, block[3], 3); /* 29 */ GG(d, a, b, c, block[7], 5); /* 30 */ GG(c, d, a, b, block[11], 9); /* 31 */ GG(b, c, d, a, block[15], 13); /* 32 */ HH(a, b, c, d, block[0], 3); /* 33 */ HH(d, a, b, c, block[8], 9); /* 34 */ HH(c, d, a, b, block[4], 11); /* 35 */ HH(b, c, d, a, block[12], 15); /* 36 */ HH(a, b, c, d, block[2], 3); /* 37 */ HH(d, a, b, c, block[10], 9); /* 38 */ HH(c, d, a, b, block[6], 11); /* 39 */ HH(b, c, d, a, block[14], 15); /* 40 */ HH(a, b, c, d, block[1], 3); /* 41 */ HH(d, a, b, c, block[9], 9); /* 42 */ HH(c, d, a, b, block[5], 11); /* 43 */ HH(b, c, d, a, block[13], 15); /* 44 */ HH(a, b, c, d, block[3], 3); /* 45 */ HH(d, a, b, c, block[11], 9); /* 46 */ HH(c, d, a, b, block[7], 11); /* 47 */ HH(b, c, d, a, block[15], 15); /* 48 */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; }
void parse_ip_meta(FILE* w, u8* ip_meta, bool first) { char temp[256]; const char* chm = (const char*)ip_meta; #define HH(ofs, len, name, first) { strncpy(temp, chm+ofs, len); temp[len]=0; for (int i=len-1; i>=0; i--) if(temp[i]==' ') temp[i]=0; else break; data_kvp(w, #name, temp, first); } fprintf(w, "%s\"meta-info\": {\n", first?"\n":",\n"); //data_kvp("type", "\"meta-info\""); HH(0x00, 16, hardwareId, true); HH(0x10, 16, makerId, false); HH(0x80, 128, productName, false); HH(0x4A, 6, productVersion, false); HH(0x50, 16, releaseDate, false); HH(0x40, 10, productId, false); HH(0x20, 16, discId, false); HH(0x30, 8, areas, false); HH(0x38, 8, peripherals, false); HH(0x60, 16, bootfile, false); HH(0x70, 16, publisher, false); fprintf(w, "\n}"); }
void MD5::Transform(const uint8_t Block[64], int& error) { uint32_t a = m_lMD5[0]; uint32_t b = m_lMD5[1]; uint32_t c = m_lMD5[2]; uint32_t d = m_lMD5[3]; uint32_t X[16]; ByteToUINT( X, Block, 64, error); //Round 1 Transformation FF (a, b, c, d, X[ 0], MD5_S11, MD5_T01); FF (d, a, b, c, X[ 1], MD5_S12, MD5_T02); FF (c, d, a, b, X[ 2], MD5_S13, MD5_T03); FF (b, c, d, a, X[ 3], MD5_S14, MD5_T04); FF (a, b, c, d, X[ 4], MD5_S11, MD5_T05); FF (d, a, b, c, X[ 5], MD5_S12, MD5_T06); FF (c, d, a, b, X[ 6], MD5_S13, MD5_T07); FF (b, c, d, a, X[ 7], MD5_S14, MD5_T08); FF (a, b, c, d, X[ 8], MD5_S11, MD5_T09); FF (d, a, b, c, X[ 9], MD5_S12, MD5_T10); FF (c, d, a, b, X[10], MD5_S13, MD5_T11); FF (b, c, d, a, X[11], MD5_S14, MD5_T12); FF (a, b, c, d, X[12], MD5_S11, MD5_T13); FF (d, a, b, c, X[13], MD5_S12, MD5_T14); FF (c, d, a, b, X[14], MD5_S13, MD5_T15); FF (b, c, d, a, X[15], MD5_S14, MD5_T16); //Round 2 Transformation GG (a, b, c, d, X[ 1], MD5_S21, MD5_T17); GG (d, a, b, c, X[ 6], MD5_S22, MD5_T18); GG (c, d, a, b, X[11], MD5_S23, MD5_T19); GG (b, c, d, a, X[ 0], MD5_S24, MD5_T20); GG (a, b, c, d, X[ 5], MD5_S21, MD5_T21); GG (d, a, b, c, X[10], MD5_S22, MD5_T22); GG (c, d, a, b, X[15], MD5_S23, MD5_T23); GG (b, c, d, a, X[ 4], MD5_S24, MD5_T24); GG (a, b, c, d, X[ 9], MD5_S21, MD5_T25); GG (d, a, b, c, X[14], MD5_S22, MD5_T26); GG (c, d, a, b, X[ 3], MD5_S23, MD5_T27); GG (b, c, d, a, X[ 8], MD5_S24, MD5_T28); GG (a, b, c, d, X[13], MD5_S21, MD5_T29); GG (d, a, b, c, X[ 2], MD5_S22, MD5_T30); GG (c, d, a, b, X[ 7], MD5_S23, MD5_T31); GG (b, c, d, a, X[12], MD5_S24, MD5_T32); //Round 3 Transformation HH (a, b, c, d, X[ 5], MD5_S31, MD5_T33); HH (d, a, b, c, X[ 8], MD5_S32, MD5_T34); HH (c, d, a, b, X[11], MD5_S33, MD5_T35); HH (b, c, d, a, X[14], MD5_S34, MD5_T36); HH (a, b, c, d, X[ 1], MD5_S31, MD5_T37); HH (d, a, b, c, X[ 4], MD5_S32, MD5_T38); HH (c, d, a, b, X[ 7], MD5_S33, MD5_T39); HH (b, c, d, a, X[10], MD5_S34, MD5_T40); HH (a, b, c, d, X[13], MD5_S31, MD5_T41); HH (d, a, b, c, X[ 0], MD5_S32, MD5_T42); HH (c, d, a, b, X[ 3], MD5_S33, MD5_T43); HH (b, c, d, a, X[ 6], MD5_S34, MD5_T44); HH (a, b, c, d, X[ 9], MD5_S31, MD5_T45); HH (d, a, b, c, X[12], MD5_S32, MD5_T46); HH (c, d, a, b, X[15], MD5_S33, MD5_T47); HH (b, c, d, a, X[ 2], MD5_S34, MD5_T48); //Round 4 Transformation II (a, b, c, d, X[ 0], MD5_S41, MD5_T49); II (d, a, b, c, X[ 7], MD5_S42, MD5_T50); II (c, d, a, b, X[14], MD5_S43, MD5_T51); II (b, c, d, a, X[ 5], MD5_S44, MD5_T52); II (a, b, c, d, X[12], MD5_S41, MD5_T53); II (d, a, b, c, X[ 3], MD5_S42, MD5_T54); II (c, d, a, b, X[10], MD5_S43, MD5_T55); II (b, c, d, a, X[ 1], MD5_S44, MD5_T56); II (a, b, c, d, X[ 8], MD5_S41, MD5_T57); II (d, a, b, c, X[15], MD5_S42, MD5_T58); II (c, d, a, b, X[ 6], MD5_S43, MD5_T59); II (b, c, d, a, X[13], MD5_S44, MD5_T60); II (a, b, c, d, X[ 4], MD5_S41, MD5_T61); II (d, a, b, c, X[11], MD5_S42, MD5_T62); II (c, d, a, b, X[ 2], MD5_S43, MD5_T63); II (b, c, d, a, X[ 9], MD5_S44, MD5_T64); m_lMD5[0] += a; m_lMD5[1] += b; m_lMD5[2] += c; m_lMD5[3] += d; }
/* * MD5 Compression Function */ void MD5::compress_n(const byte input[], size_t blocks) { u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3]; for(size_t i = 0; i != blocks; ++i) { load_le(&M[0], input, M.size()); FF(A,B,C,D,M[ 0], 7,0xD76AA478); FF(D,A,B,C,M[ 1],12,0xE8C7B756); FF(C,D,A,B,M[ 2],17,0x242070DB); FF(B,C,D,A,M[ 3],22,0xC1BDCEEE); FF(A,B,C,D,M[ 4], 7,0xF57C0FAF); FF(D,A,B,C,M[ 5],12,0x4787C62A); FF(C,D,A,B,M[ 6],17,0xA8304613); FF(B,C,D,A,M[ 7],22,0xFD469501); FF(A,B,C,D,M[ 8], 7,0x698098D8); FF(D,A,B,C,M[ 9],12,0x8B44F7AF); FF(C,D,A,B,M[10],17,0xFFFF5BB1); FF(B,C,D,A,M[11],22,0x895CD7BE); FF(A,B,C,D,M[12], 7,0x6B901122); FF(D,A,B,C,M[13],12,0xFD987193); FF(C,D,A,B,M[14],17,0xA679438E); FF(B,C,D,A,M[15],22,0x49B40821); GG(A,B,C,D,M[ 1], 5,0xF61E2562); GG(D,A,B,C,M[ 6], 9,0xC040B340); GG(C,D,A,B,M[11],14,0x265E5A51); GG(B,C,D,A,M[ 0],20,0xE9B6C7AA); GG(A,B,C,D,M[ 5], 5,0xD62F105D); GG(D,A,B,C,M[10], 9,0x02441453); GG(C,D,A,B,M[15],14,0xD8A1E681); GG(B,C,D,A,M[ 4],20,0xE7D3FBC8); GG(A,B,C,D,M[ 9], 5,0x21E1CDE6); GG(D,A,B,C,M[14], 9,0xC33707D6); GG(C,D,A,B,M[ 3],14,0xF4D50D87); GG(B,C,D,A,M[ 8],20,0x455A14ED); GG(A,B,C,D,M[13], 5,0xA9E3E905); GG(D,A,B,C,M[ 2], 9,0xFCEFA3F8); GG(C,D,A,B,M[ 7],14,0x676F02D9); GG(B,C,D,A,M[12],20,0x8D2A4C8A); HH(A,B,C,D,M[ 5], 4,0xFFFA3942); HH(D,A,B,C,M[ 8],11,0x8771F681); HH(C,D,A,B,M[11],16,0x6D9D6122); HH(B,C,D,A,M[14],23,0xFDE5380C); HH(A,B,C,D,M[ 1], 4,0xA4BEEA44); HH(D,A,B,C,M[ 4],11,0x4BDECFA9); HH(C,D,A,B,M[ 7],16,0xF6BB4B60); HH(B,C,D,A,M[10],23,0xBEBFBC70); HH(A,B,C,D,M[13], 4,0x289B7EC6); HH(D,A,B,C,M[ 0],11,0xEAA127FA); HH(C,D,A,B,M[ 3],16,0xD4EF3085); HH(B,C,D,A,M[ 6],23,0x04881D05); HH(A,B,C,D,M[ 9], 4,0xD9D4D039); HH(D,A,B,C,M[12],11,0xE6DB99E5); HH(C,D,A,B,M[15],16,0x1FA27CF8); HH(B,C,D,A,M[ 2],23,0xC4AC5665); II(A,B,C,D,M[ 0], 6,0xF4292244); II(D,A,B,C,M[ 7],10,0x432AFF97); II(C,D,A,B,M[14],15,0xAB9423A7); II(B,C,D,A,M[ 5],21,0xFC93A039); II(A,B,C,D,M[12], 6,0x655B59C3); II(D,A,B,C,M[ 3],10,0x8F0CCC92); II(C,D,A,B,M[10],15,0xFFEFF47D); II(B,C,D,A,M[ 1],21,0x85845DD1); II(A,B,C,D,M[ 8], 6,0x6FA87E4F); II(D,A,B,C,M[15],10,0xFE2CE6E0); II(C,D,A,B,M[ 6],15,0xA3014314); II(B,C,D,A,M[13],21,0x4E0811A1); II(A,B,C,D,M[ 4], 6,0xF7537E82); II(D,A,B,C,M[11],10,0xBD3AF235); II(C,D,A,B,M[ 2],15,0x2AD7D2BB); II(B,C,D,A,M[ 9],21,0xEB86D391); A = (digest[0] += A); B = (digest[1] += B); C = (digest[2] += C); D = (digest[3] += D); input += hash_block_size(); } }
/* . it - column of the Hessenberg that is complete, PGMRES is actually computing two columns ahead of this */ static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) { PetscScalar *hh,*cc,*ss,*rs; PetscInt j; PetscReal hapbnd; KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscErrorCode ierr; PetscFunctionBegin; hh = HH(0,it); /* pointer to beginning of column to update */ cc = CC(0); /* beginning of cosine rotations */ ss = SS(0); /* beginning of sine rotations */ rs = RS(0); /* right hand side of least squares system */ /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; /* check for the happy breakdown */ hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pgmres->haptol); if (PetscAbsScalar(hh[it+1]) < hapbnd) { ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); *hapend = PETSC_TRUE; } /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), and some refs have [c s ; -conj(s) c] (don't be confused!) */ for (j=0; j<it; j++) { PetscScalar hhj = hh[j]; hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; } /* compute the new plane rotation, and apply it to: 1) the right-hand-side of the Hessenberg system (RS) note: it affects RS(it) and RS(it+1) 2) the new column of the Hessenberg matrix note: it affects HH(it,it) which is currently pointed to by hh and HH(it+1, it) (*(hh+1)) thus obtaining the updated value of the residual... */ /* compute new plane rotation */ if (!*hapend) { PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); if (delta == 0.0) { ksp->reason = KSP_DIVERGED_NULL; PetscFunctionReturn(0); } cc[it] = hh[it] / delta; /* new cosine value */ ss[it] = hh[it+1] / delta; /* new sine value */ hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; rs[it+1] = -ss[it]*rs[it]; rs[it] = PetscConj(cc[it])*rs[it]; *res = PetscAbsScalar(rs[it+1]); } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply another rotation matrix (so RH doesn't change). The new residual is always the new sine term times the residual from last time (RS(it)), but now the new sine rotation would be zero...so the residual should be zero...so we will multiply "zero" by the last residual. This might not be exactly what we want to do here -could just return "zero". */ *res = 0.0; } PetscFunctionReturn(0); }
static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscReal res_norm,res,newnorm; PetscErrorCode ierr; PetscInt it = 0,j,k; PetscBool hapend = PETSC_FALSE; PetscFunctionBegin; if (itcount) *itcount = 0; ierr = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr); res = res_norm; *RS(0) = res_norm; /* check for the convergence */ ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = res; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); pgmres->it = it-2; ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if (!res) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); for (; !ksp->reason; it++) { Vec Zcur,Znext; if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) { ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr); } /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */ Zcur = VEC_VV(it); /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */ Znext = VEC_VV(it+1); /* This iteration will compute Znext, update with a deferred correction once we know how * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */ if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */ ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr); } if (it > 1) { /* Complete the pending reduction */ ierr = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr); *HH(it-1,it-2) = newnorm; } if (it > 0) { /* Finish the reduction computing the latest column of H */ ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr); } if (it > 1) { /* normalize the base vector from two iterations ago, basis is complete up to here */ ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr); ierr = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr); pgmres->it = it-2; ksp->its++; ksp->rnorm = res; ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */ ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } if (ksp->reason) break; /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break; /* The it-2 column of H was not scaled when we computed Zcur, apply correction */ ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* And Znext computed in this iteration was computed using the under-scaled Zcur */ ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */ for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2); /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This * column is complete except for HH(it,it-1) which we won't know until the next iteration. */ *HH(it-1,it-1) /= *HH(it-1,it-2); } if (it > 0) { PetscScalar *work; if (!pgmres->orthogwork) {ierr = PetscMalloc((pgmres->max_k + 2)*sizeof(PetscScalar),&pgmres->orthogwork);CHKERRQ(ierr);} work = pgmres->orthogwork; /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is * * Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1] * * where * * Z[j] = sum_{k=0}^j V[k] * H[k,j-1] * * substituting * * Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1] * * rearranging the iteration space from row-column to column-row * * Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1] * * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already * been transformed to upper triangular form. */ for (k=0; k<it+1; k++) { work[k] = 0; for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1); } ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr); ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr); /* Orthogonalize Zcur against existing basis vectors. */ for (k=0; k<it; k++) work[k] = -*HH(k,it-1); ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr); /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */ /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */ ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr); } /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */ ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr); /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr); } if (itcount) *itcount = it-1; /* Number of iterations actually completed. */ /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr); PetscFunctionReturn(0); }
extern "C" magma_int_t magma_cgmres( magma_c_sparse_matrix A, magma_c_vector b, magma_c_vector *x, magma_c_solver_par *solver_par, magma_queue_t queue ) { magma_int_t stat = 0; // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); magma_int_t stat_cpu = 0, stat_dev = 0; // prepare solver feedback solver_par->solver = Magma_GMRES; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // local variables magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE, c_mone = MAGMA_C_NEG_ONE; magma_int_t dofs = A.num_rows; magma_int_t i, j, k, m = 0; magma_int_t restart = min( dofs-1, solver_par->restart ); magma_int_t ldh = restart+1; float nom, rNorm, RNorm, nom0, betanom, r0 = 0.; // CPU workspace //magma_setdevice(0); magmaFloatComplex *H, *HH, *y, *h1; stat_cpu += magma_cmalloc_pinned( &H, (ldh+1)*ldh ); stat_cpu += magma_cmalloc_pinned( &y, ldh ); stat_cpu += magma_cmalloc_pinned( &HH, ldh*ldh ); stat_cpu += magma_cmalloc_pinned( &h1, ldh ); if( stat_cpu != 0){ magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_HOST_ALLOC; } // GPU workspace magma_c_vector r, q, q_t; magma_c_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_c_vinit( &q, Magma_DEV, dofs*(ldh+1), c_zero, queue ); q_t.memory_location = Magma_DEV; q_t.dval = NULL; q_t.num_rows = q_t.nnz = dofs; q_t.num_cols = 1; magmaFloatComplex *dy = NULL, *dH = NULL; stat_dev += magma_cmalloc( &dy, ldh ); stat_dev += magma_cmalloc( &dH, (ldh+1)*ldh ); if( stat_dev != 0){ magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); magma_free( dH ); magma_free( dy ); magma_free( dH ); magma_free( dy ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_DEVICE_ALLOC; } // GPU stream magma_queue_t stream[2]; magma_event_t event[1]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); magma_event_create( &event[0] ); //magmablasSetKernelStream(stream[0]); magma_cscal( dofs, c_zero, x->dval, 1 ); // x = 0 magma_ccopy( dofs, b.dval, 1, r.dval, 1 ); // r = b nom0 = betanom = magma_scnrm2( dofs, r.dval, 1 ); // nom0= || r|| nom = nom0 * nom0; solver_par->init_res = nom0; H(1,0) = MAGMA_C_MAKE( nom0, 0. ); magma_csetvector(1, &H(1,0), 1, &dH(1,0), 1); if ( (r0 = nom0 * solver_par->epsilon ) < ATOLERANCE ){ r0 = solver_par->epsilon; } if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { for(k=1; k<=restart; k++) { magma_ccopy(dofs, r.dval, 1, q(k-1), 1); // q[0] = 1.0/||r|| magma_cscal(dofs, 1./H(k,k-1), q(k-1), 1); // (to be fused) q_t.dval = q(k-1); //magmablasSetKernelStream(stream[0]); magma_c_spmv( c_one, A, q_t, c_zero, r, queue ); // r = A q[k] // if (solver_par->ortho == Magma_MGS ) { // modified Gram-Schmidt for (i=1; i<=k; i++) { H(i,k) =magma_cdotc(dofs, q(i-1), 1, r.dval, 1); // H(i,k) = q[i] . r magma_caxpy(dofs,-H(i,k), q(i-1), 1, r.dval, 1); // r = r - H(i,k) q[i] } H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // H(k+1,k) = ||r|| /*} else if (solver_par->ortho == Magma_FUSED_CGS ) { // fusing cgemv with scnrm2 in classical Gram-Schmidt magmablasSetKernelStream(stream[0]); magma_ccopy(dofs, r.dval, 1, q(k), 1); // dH(1:k+1,k) = q[0:k] . r magmablas_cgemv(MagmaTrans, dofs, k+1, c_one, q(0), dofs, r.dval, 1, c_zero, &dH(1,k), 1); // r = r - q[0:k-1] dH(1:k,k) magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), dofs, &dH(1,k), 1, c_one, r.dval, 1); // 1) dH(k+1,k) = sqrt( dH(k+1,k) - dH(1:k,k) ) magma_ccopyscale( dofs, k, r.dval, q(k), &dH(1,k) ); // 2) q[k] = q[k] / dH(k+1,k) magma_event_record( event[0], stream[0] ); magma_queue_wait_event( stream[1], event[0] ); magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); // asynch copy dH(1:(k+1),k) to H(1:(k+1),k) } else { // classical Gram-Schmidt (default) // > explicitly calling magmabls magmablasSetKernelStream(stream[0]); magmablas_cgemv(MagmaTrans, dofs, k, c_one, q(0), dofs, r.dval, 1, c_zero, &dH(1,k), 1, queue ); // dH(1:k,k) = q[0:k-1] . r #ifndef SCNRM2SCALE // start copying dH(1:k,k) to H(1:k,k) magma_event_record( event[0], stream[0] ); magma_queue_wait_event( stream[1], event[0] ); magma_cgetvector_async(k, &dH(1,k), 1, &H(1,k), 1, stream[1]); #endif // r = r - q[0:k-1] dH(1:k,k) magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), dofs, &dH(1,k), 1, c_one, r.dval, 1); #ifdef SCNRM2SCALE magma_ccopy(dofs, r.dval, 1, q(k), 1); // q[k] = r / H(k,k-1) magma_scnrm2scale(dofs, q(k), dofs, &dH(k+1,k) ); // dH(k+1,k) = sqrt(r . r) and r = r / dH(k+1,k) magma_event_record( event[0], stream[0] ); // start sending dH(1:k,k) to H(1:k,k) magma_queue_wait_event( stream[1], event[0] ); // can we keep H(k+1,k) on GPU and combine? magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); #else H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // H(k+1,k) = sqrt(r . r) if ( k<solver_par->restart ) { magmablasSetKernelStream(stream[0]); magma_ccopy(dofs, r.dval, 1, q(k), 1); // q[k] = 1.0/H[k][k-1] r magma_cscal(dofs, 1./H(k+1,k), q(k), 1); // (to be fused) } #endif }*/ /* Minimization of || b-Ax || in H_k */ for (i=1; i<=k; i++) { HH(k,i) = magma_cblas_cdotc( i+1, &H(1,k), 1, &H(1,i), 1 ); } h1[k] = H(1,k)*H(1,0); if (k != 1) { for (i=1; i<k; i++) { HH(k,i) = HH(k,i)/HH(i,i);// for (m=i+1; m<=k; m++) { HH(k,m) -= HH(k,i) * HH(m,i) * HH(i,i); } h1[k] -= h1[i] * HH(k,i); } } y[k] = h1[k]/HH(k,k); if (k != 1) for (i=k-1; i>=1; i--) { y[i] = h1[i]/HH(i,i); for (j=i+1; j<=k; j++) y[i] -= y[j] * HH(j,i); } m = k; rNorm = fabs(MAGMA_C_REAL(H(k+1,k))); }/* Minimization done */ // compute solution approximation magma_csetmatrix(m, 1, y+1, m, dy, m ); magma_cgemv(MagmaNoTrans, dofs, m, c_one, q(0), dofs, dy, 1, c_one, x->dval, 1); // compute residual magma_c_spmv( c_mone, A, *x, c_zero, r, queue ); // r = - A * x magma_caxpy(dofs, c_one, b.dval, 1, r.dval, 1); // r = r + b H(1,0) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // RNorm = H[1][0] = || r || RNorm = MAGMA_C_REAL( H(1,0) ); betanom = fabs(RNorm); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; magma_cresidual( A, b, *x, &residual, queue ); solver_par->iter_res = betanom; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } // free pinned memory magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); // free GPU memory magma_free(dy); if (dH != NULL ) magma_free(dH); magma_c_vfree(&r, queue ); magma_c_vfree(&q, queue ); // free GPU streams and events magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_event_destroy( event[0] ); //magmablasSetKernelStream(NULL); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_cgmres */
PetscErrorCode KSPGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_GMRES *gmres = (KSP_GMRES*)(ksp->data); PetscReal res_norm,res,hapbnd,tt; PetscErrorCode ierr; PetscInt it = 0, max_k = gmres->max_k; PetscBool hapend = PETSC_FALSE; PetscFunctionBegin; ierr = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr); res = res_norm; *GRS(0) = res_norm; /* check for the convergence */ ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = res; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); gmres->it = (it - 1); ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if (!res) { if (itcount) *itcount = 0; ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) { if (it) { ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } gmres->it = (it - 1); if (gmres->vv_allocated <= it + VEC_OFFSET + 1) { ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr); } ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);CHKERRQ(ierr); /* update hessenberg matrix and do Gram-Schmidt */ ierr = (*gmres->orthog)(ksp,it);CHKERRQ(ierr); /* vv(i+1) . vv(i+1) */ ierr = VecNormalize(VEC_VV(it+1),&tt);CHKERRQ(ierr); /* save the magnitude */ *HH(it+1,it) = tt; *HES(it+1,it) = tt; /* check for the happy breakdown */ hapbnd = PetscAbsScalar(tt / *GRS(it)); if (hapbnd > gmres->haptol) hapbnd = gmres->haptol; if (tt < hapbnd) { ierr = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %14.12e tt = %14.12e\n",(double)hapbnd,(double)tt);CHKERRQ(ierr); hapend = PETSC_TRUE; } ierr = KSPGMRESUpdateHessenberg(ksp,it,hapend,&res);CHKERRQ(ierr); it++; gmres->it = (it-1); /* For converged */ ksp->its++; ksp->rnorm = res; if (ksp->reason) break; ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (!ksp->reason) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } } } /* Monitor if we know that we will not return for a restart */ if (it && (ksp->reason || ksp->its >= ksp->max_it)) { ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } if (itcount) *itcount = it; /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ ierr = KSPGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);CHKERRQ(ierr); PetscFunctionReturn(0); }
static int rmd128_compress(hash_state *md, unsigned char *buf) #endif { ulong32 aa,bb,cc,dd,aaa,bbb,ccc,ddd,X[16]; int i; /* load words X */ for (i = 0; i < 16; i++){ LOAD32L(X[i], buf + (4 * i)); } /* load state */ aa = aaa = md->rmd128.state[0]; bb = bbb = md->rmd128.state[1]; cc = ccc = md->rmd128.state[2]; dd = ddd = md->rmd128.state[3]; /* round 1 */ FF(aa, bb, cc, dd, X[ 0], 11); FF(dd, aa, bb, cc, X[ 1], 14); FF(cc, dd, aa, bb, X[ 2], 15); FF(bb, cc, dd, aa, X[ 3], 12); FF(aa, bb, cc, dd, X[ 4], 5); FF(dd, aa, bb, cc, X[ 5], 8); FF(cc, dd, aa, bb, X[ 6], 7); FF(bb, cc, dd, aa, X[ 7], 9); FF(aa, bb, cc, dd, X[ 8], 11); FF(dd, aa, bb, cc, X[ 9], 13); FF(cc, dd, aa, bb, X[10], 14); FF(bb, cc, dd, aa, X[11], 15); FF(aa, bb, cc, dd, X[12], 6); FF(dd, aa, bb, cc, X[13], 7); FF(cc, dd, aa, bb, X[14], 9); FF(bb, cc, dd, aa, X[15], 8); /* round 2 */ GG(aa, bb, cc, dd, X[ 7], 7); GG(dd, aa, bb, cc, X[ 4], 6); GG(cc, dd, aa, bb, X[13], 8); GG(bb, cc, dd, aa, X[ 1], 13); GG(aa, bb, cc, dd, X[10], 11); GG(dd, aa, bb, cc, X[ 6], 9); GG(cc, dd, aa, bb, X[15], 7); GG(bb, cc, dd, aa, X[ 3], 15); GG(aa, bb, cc, dd, X[12], 7); GG(dd, aa, bb, cc, X[ 0], 12); GG(cc, dd, aa, bb, X[ 9], 15); GG(bb, cc, dd, aa, X[ 5], 9); GG(aa, bb, cc, dd, X[ 2], 11); GG(dd, aa, bb, cc, X[14], 7); GG(cc, dd, aa, bb, X[11], 13); GG(bb, cc, dd, aa, X[ 8], 12); /* round 3 */ HH(aa, bb, cc, dd, X[ 3], 11); HH(dd, aa, bb, cc, X[10], 13); HH(cc, dd, aa, bb, X[14], 6); HH(bb, cc, dd, aa, X[ 4], 7); HH(aa, bb, cc, dd, X[ 9], 14); HH(dd, aa, bb, cc, X[15], 9); HH(cc, dd, aa, bb, X[ 8], 13); HH(bb, cc, dd, aa, X[ 1], 15); HH(aa, bb, cc, dd, X[ 2], 14); HH(dd, aa, bb, cc, X[ 7], 8); HH(cc, dd, aa, bb, X[ 0], 13); HH(bb, cc, dd, aa, X[ 6], 6); HH(aa, bb, cc, dd, X[13], 5); HH(dd, aa, bb, cc, X[11], 12); HH(cc, dd, aa, bb, X[ 5], 7); HH(bb, cc, dd, aa, X[12], 5); /* round 4 */ II(aa, bb, cc, dd, X[ 1], 11); II(dd, aa, bb, cc, X[ 9], 12); II(cc, dd, aa, bb, X[11], 14); II(bb, cc, dd, aa, X[10], 15); II(aa, bb, cc, dd, X[ 0], 14); II(dd, aa, bb, cc, X[ 8], 15); II(cc, dd, aa, bb, X[12], 9); II(bb, cc, dd, aa, X[ 4], 8); II(aa, bb, cc, dd, X[13], 9); II(dd, aa, bb, cc, X[ 3], 14); II(cc, dd, aa, bb, X[ 7], 5); II(bb, cc, dd, aa, X[15], 6); II(aa, bb, cc, dd, X[14], 8); II(dd, aa, bb, cc, X[ 5], 6); II(cc, dd, aa, bb, X[ 6], 5); II(bb, cc, dd, aa, X[ 2], 12); /* parallel round 1 */ III(aaa, bbb, ccc, ddd, X[ 5], 8); III(ddd, aaa, bbb, ccc, X[14], 9); III(ccc, ddd, aaa, bbb, X[ 7], 9); III(bbb, ccc, ddd, aaa, X[ 0], 11); III(aaa, bbb, ccc, ddd, X[ 9], 13); III(ddd, aaa, bbb, ccc, X[ 2], 15); III(ccc, ddd, aaa, bbb, X[11], 15); III(bbb, ccc, ddd, aaa, X[ 4], 5); III(aaa, bbb, ccc, ddd, X[13], 7); III(ddd, aaa, bbb, ccc, X[ 6], 7); III(ccc, ddd, aaa, bbb, X[15], 8); III(bbb, ccc, ddd, aaa, X[ 8], 11); III(aaa, bbb, ccc, ddd, X[ 1], 14); III(ddd, aaa, bbb, ccc, X[10], 14); III(ccc, ddd, aaa, bbb, X[ 3], 12); III(bbb, ccc, ddd, aaa, X[12], 6); /* parallel round 2 */ HHH(aaa, bbb, ccc, ddd, X[ 6], 9); HHH(ddd, aaa, bbb, ccc, X[11], 13); HHH(ccc, ddd, aaa, bbb, X[ 3], 15); HHH(bbb, ccc, ddd, aaa, X[ 7], 7); HHH(aaa, bbb, ccc, ddd, X[ 0], 12); HHH(ddd, aaa, bbb, ccc, X[13], 8); HHH(ccc, ddd, aaa, bbb, X[ 5], 9); HHH(bbb, ccc, ddd, aaa, X[10], 11); HHH(aaa, bbb, ccc, ddd, X[14], 7); HHH(ddd, aaa, bbb, ccc, X[15], 7); HHH(ccc, ddd, aaa, bbb, X[ 8], 12); HHH(bbb, ccc, ddd, aaa, X[12], 7); HHH(aaa, bbb, ccc, ddd, X[ 4], 6); HHH(ddd, aaa, bbb, ccc, X[ 9], 15); HHH(ccc, ddd, aaa, bbb, X[ 1], 13); HHH(bbb, ccc, ddd, aaa, X[ 2], 11); /* parallel round 3 */ GGG(aaa, bbb, ccc, ddd, X[15], 9); GGG(ddd, aaa, bbb, ccc, X[ 5], 7); GGG(ccc, ddd, aaa, bbb, X[ 1], 15); GGG(bbb, ccc, ddd, aaa, X[ 3], 11); GGG(aaa, bbb, ccc, ddd, X[ 7], 8); GGG(ddd, aaa, bbb, ccc, X[14], 6); GGG(ccc, ddd, aaa, bbb, X[ 6], 6); GGG(bbb, ccc, ddd, aaa, X[ 9], 14); GGG(aaa, bbb, ccc, ddd, X[11], 12); GGG(ddd, aaa, bbb, ccc, X[ 8], 13); GGG(ccc, ddd, aaa, bbb, X[12], 5); GGG(bbb, ccc, ddd, aaa, X[ 2], 14); GGG(aaa, bbb, ccc, ddd, X[10], 13); GGG(ddd, aaa, bbb, ccc, X[ 0], 13); GGG(ccc, ddd, aaa, bbb, X[ 4], 7); GGG(bbb, ccc, ddd, aaa, X[13], 5); /* parallel round 4 */ FFF(aaa, bbb, ccc, ddd, X[ 8], 15); FFF(ddd, aaa, bbb, ccc, X[ 6], 5); FFF(ccc, ddd, aaa, bbb, X[ 4], 8); FFF(bbb, ccc, ddd, aaa, X[ 1], 11); FFF(aaa, bbb, ccc, ddd, X[ 3], 14); FFF(ddd, aaa, bbb, ccc, X[11], 14); FFF(ccc, ddd, aaa, bbb, X[15], 6); FFF(bbb, ccc, ddd, aaa, X[ 0], 14); FFF(aaa, bbb, ccc, ddd, X[ 5], 6); FFF(ddd, aaa, bbb, ccc, X[12], 9); FFF(ccc, ddd, aaa, bbb, X[ 2], 12); FFF(bbb, ccc, ddd, aaa, X[13], 9); FFF(aaa, bbb, ccc, ddd, X[ 9], 12); FFF(ddd, aaa, bbb, ccc, X[ 7], 5); FFF(ccc, ddd, aaa, bbb, X[10], 15); FFF(bbb, ccc, ddd, aaa, X[14], 8); /* combine results */ ddd += cc + md->rmd128.state[1]; /* final result for MDbuf[0] */ md->rmd128.state[1] = md->rmd128.state[2] + dd + aaa; md->rmd128.state[2] = md->rmd128.state[3] + aa + bbb; md->rmd128.state[3] = md->rmd128.state[0] + bb + ccc; md->rmd128.state[0] = ddd; return CRYPT_OK; }
int main() { printf("\a\t\tIFPB - Campus Joao Pessoa\n"); printf("\t\tCurso Superior de Engenharia Eletrica\n"); printf("\t\tDisciplina de Algoritmos e Logica de Programacao\n"); printf("\t\tProfessor: Erick\n"); printf("\t\tProva 2: Parte 2, Jogo de Batalha Naval\n"); printf("\t\tGrupo: Paulo Felipe, Josivaldo Gomes e Marcello Aires\n\n"); printf("\n--------------------------------------------------------------------------------\n"); char M[N][N], MB[N][N]; int i, j, opcao, ME[N][N]={}; GerarEspiral(ME); ZeraT(M, MB);//JOGADOR 1 opcao = Menu(); if(opcao==1) { HH(MB); contpos = cont(MB); printf("\nSecao de Jogadas do Jogador 1\n\n"); for(i=0;i<N*N;i++) { Imprimir(M); Jogada(M, MB, 0); printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog1, cont1, contpos); if(contpos == cont1) break; } Imprimir(M); printf("\n\nSECAO DE JOGADAS DO JOGADOR 1 TERMINADA\n\n"); } else if(opcao==2) { CH(MB); contpos = cont(MB); printf("\nSecao de Jogadas do Jogador 1\n\n"); for(i=0;i<N*N;i++) { Imprimir(M); Jogada(M, MB, 0); printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog1, cont1, contpos); if(contpos == cont1) break; } Imprimir(M); printf("\n\nSECAO DE JOGADAS DO JOGADOR 1 TERMINADA\n\n"); } else if(opcao==3) { int X, Y, i, estado = 0, ult=0; HC(MB, M); contpos = cont(MB); for(i=0;i<N*N;i++) { do { estado = 0; X = (rand()%N);//VAI DE 0 A N-1 Y = (rand()%N);//VAI DE 0 A N-1 if(MB[X][Y]=='~' && M[X][Y]=='~') { M[X][Y] = '*'; contjog1++; estado = 1; } else if(MB[X][Y]=='B' && M[X][Y]=='~') { M[X][Y] = 'X'; contjog1++; cont1++; i += AcertouNavio(M, MB, X, Y, 0); estado = 1; } }while(estado!=1); ImprimirAux(0); Imprimir(M); if(cont1==contpos) { printf("\n\nSECAO DE JOGADAS DO JOGADOR 1 TERMINADA\n\n"); break; } } } ZeraT(M, MB);//JOGADOR 2 opcao = Menu(); if(opcao==1) { HH(MB); contpos = cont(MB); printf("\nSecao de Jogadas do Jogador 2\n\n"); for(i=0;i<N*N;i++) { Imprimir(M); Jogada(M, MB, 1); printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog2, cont2, contpos); if(contpos == cont2) break; } Imprimir(M); printf("\n\nSECAO DE JOGADAS DO JOGADOR 2 TERMINADA\n\n"); } else if(opcao==2) { CH(MB); contpos = cont(MB); printf("\nSecao de Jogadas do Jogador 2\n\n"); for(i=0;i<N*N;i++) { Imprimir(M); Jogada(M, MB, 1); printf("\nQuantidade de Jogadas: %d\nQuantidade de Acertos: %d\nQuantidade Total de Posicoes de Barco: %d\n\n", contjog2, cont2, contpos); if(contpos == cont2) break; } Imprimir(M); printf("\n\nSECAO DE JOGADAS DO JOGADOR 2 TERMINADA\n\n"); } else if(opcao==3) { int X, Y, i, estado = 0, ult=0; HC(MB, M); contpos = cont(MB); for(i=0;i<N*N;i++) { do { estado = 0; X = (rand()%N);//VAI DE 0 A N-1 Y = (rand()%N);//VAI DE 0 A N-1 if(MB[X][Y]=='~' && M[X][Y]=='~') { M[X][Y] = '*'; contjog2++; estado = 1; } else if(MB[X][Y]=='B' && M[X][Y]=='~') { M[X][Y] = 'X'; contjog2++; cont2++; i+=AcertouNavio(M, MB, X, Y, 1); estado = 1; } }while(estado!=1); ImprimirAux(1); Imprimir(M); if(cont2==contpos) { printf("\n\nSECAO DE JOGADAS DO JOGADOR 2 TERMINADA\n\n"); break; } } } if(contjog1<contjog2) printf("\nJOGADOR 1 VENCEU!!! %d x %d\n\n", contjog1, contjog2); else if(contjog1>contjog2) printf("\nJOGADOR 2 VENCEU!!! %d x %d\n\n", contjog2, contjog1); else printf("\nJOGO EMPATADO!!! %d x %d\n\n", contjog1, contjog2); return 0; }
void MD5::transform(unsigned char *input) { unsigned int a,b,c,d,x[16]; a=state[0]; b=state[1]; c=state[2]; d=state[3]; // byte to dword for (int i=0;i<16;i++) { int j=i<<2; unsigned int w1,w2,w3,w4; w1=input[j]; w2=input[j+1]; w3=input[j+2]; w4=input[j+3]; x[i]=((unsigned int)w1)|(((unsigned int)w2)<<8)| (((unsigned int)w3)<<16)|(((unsigned int)w4)<<24); } #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 /* Round 1*/ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ /* Round 2 */ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ /* Round 3 */ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ /* Round 4 */ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ state[0]+=a; state[1]+=b; state[2]+=c; state[3]+=d; }
PetscErrorCode KSPFGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_FGMRES *fgmres = (KSP_FGMRES*)(ksp->data); PetscReal res_norm; PetscReal hapbnd,tt; PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ PetscErrorCode ierr; PetscInt loc_it; /* local count of # of dir. in Krylov space */ PetscInt max_k = fgmres->max_k; /* max # of directions Krylov space */ Mat Amat,Pmat; MatStructure pflag; PetscFunctionBegin; /* Number of pseudo iterations since last restart is the number of prestart directions */ loc_it = 0; /* note: (fgmres->it) is always set one less than (loc_it) It is used in KSPBUILDSolution_FGMRES, where it is passed to KSPFGMRESBuildSoln. Note that when KSPFGMRESBuildSoln is called from this function, (loc_it -1) is passed, so the two are equivalent */ fgmres->it = (loc_it - 1); /* initial residual is in VEC_VV(0) - compute its norm*/ ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); /* first entry in right-hand-side of hessenberg system is just the initial residual norm */ *RS(0) = res_norm; ksp->rnorm = res_norm; ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); /* check for the convergence - maybe the current guess is good enough */ ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { if (itcount) *itcount = 0; PetscFunctionReturn(0); } /* scale VEC_VV (the initial residual) */ ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); /* MAIN ITERATION LOOP BEGINNING*/ /* keep iterating until we have converged OR generated the max number of directions OR reached the max number of iterations for the method */ while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { if (loc_it) { ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); } fgmres->it = (loc_it - 1); /* see if more space is needed for work vectors */ if (fgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { ierr = KSPFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); /* (loc_it+1) is passed in as number of the first vector that should be allocated */ } /* CHANGE THE PRECONDITIONER? */ /* ModifyPC is the callback function that can be used to change the PC or its attributes before its applied */ (*fgmres->modifypc)(ksp,ksp->its,loc_it,res_norm,fgmres->modifyctx); /* apply PRECONDITIONER to direction vector and store with preconditioned vectors in prevec */ ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); /* Multiply preconditioned vector by operator - put in VEC_VV(loc_it+1) */ ierr = MatMult(Amat,PREVEC(loc_it),VEC_VV(1+loc_it));CHKERRQ(ierr); /* update hessenberg matrix and do Gram-Schmidt - new direction is in VEC_VV(1+loc_it)*/ ierr = (*fgmres->orthog)(ksp,loc_it);CHKERRQ(ierr); /* new entry in hessenburg is the 2-norm of our new direction */ ierr = VecNorm(VEC_VV(loc_it+1),NORM_2,&tt);CHKERRQ(ierr); *HH(loc_it+1,loc_it) = tt; *HES(loc_it+1,loc_it) = tt; /* Happy Breakdown Check */ hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); /* RS(loc_it) contains the res_norm from the last iteration */ hapbnd = PetscMin(fgmres->haptol,hapbnd); if (tt > hapbnd) { /* scale new direction by its norm */ ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); } else { /* This happens when the solution is exactly reached. */ /* So there is no new direction... */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ hapend = PETSC_TRUE; } /* note that for FGMRES we could get HES(loc_it+1, loc_it) = 0 and the current solution would not be exact if HES was singular. Note that HH non-singular implies that HES is no singular, and HES is guaranteed to be nonsingular when PREVECS are linearly independent and A is nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity of HES). So we should really add a check to verify that HES is nonsingular.*/ /* Now apply rotations to new col of hessenberg (and right side of system), calculate new rotation, and get new residual norm at the same time*/ ierr = KSPFGMRESUpdateHessenberg(ksp,loc_it,hapend,&res_norm);CHKERRQ(ierr); if (ksp->reason) break; loc_it++; fgmres->it = (loc_it-1); /* Add this here in case it has converged */ ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = res_norm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (!ksp->reason) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res_norm); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } } } /* END OF ITERATION LOOP */ ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); /* Monitor if we know that we will not return for a restart */ if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); } if (itcount) *itcount = loc_it; /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ /* Note: must pass in (loc_it-1) for iteration count so that KSPFGMRESBuildSoln properly navigates */ ierr = KSPFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); PetscFunctionReturn(0); }
/** @brief md5 basic transformation. Transforms state based on block. */ static void md5_transform(uint32_t state[4], uint8_t block[64]) { uint32_t a = state[0]; uint32_t b = state[1]; uint32_t c = state[2]; uint32_t d = state[3]; uint32_t x[16]; md5_decode(x, block, 64); // Round 1 a = FF(a, b, c, d, x[0], S11, 0xd76aa478); // 1 d = FF(d, a, b, c, x[1], S12, 0xe8c7b756); // 2 c = FF(c, d, a, b, x[2], S13, 0x242070db); // 3 b = FF(b, c, d, a, x[3], S14, 0xc1bdceee); // 4 a = FF(a, b, c, d, x[4], S11, 0xf57c0faf); // 5 d = FF(d, a, b, c, x[5], S12, 0x4787c62a); // 6 c = FF(c, d, a, b, x[6], S13, 0xa8304613); // 7 b = FF(b, c, d, a, x[7], S14, 0xfd469501); // 8 a = FF(a, b, c, d, x[8], S11, 0x698098d8); // 9 d = FF(d, a, b, c, x[9], S12, 0x8b44f7af); // 10 c = FF(c, d, a, b, x[10], S13, 0xffff5bb1); // 11 b = FF(b, c, d, a, x[11], S14, 0x895cd7be); // 12 a = FF(a, b, c, d, x[12], S11, 0x6b901122); // 13 d = FF(d, a, b, c, x[13], S12, 0xfd987193); // 14 c = FF(c, d, a, b, x[14], S13, 0xa679438e); // 15 b = FF(b, c, d, a, x[15], S14, 0x49b40821); // 16 // Round 2 a = GG(a, b, c, d, x[1], S21, 0xf61e2562); // 17 d = GG(d, a, b, c, x[6], S22, 0xc040b340); // 18 c = GG(c, d, a, b, x[11], S23, 0x265e5a51); // 19 b = GG(b, c, d, a, x[0], S24, 0xe9b6c7aa); // 20 a = GG(a, b, c, d, x[5], S21, 0xd62f105d); // 21 d = GG(d, a, b, c, x[10], S22, 0x2441453); // 22 c = GG(c, d, a, b, x[15], S23, 0xd8a1e681); // 23 b = GG(b, c, d, a, x[4], S24, 0xe7d3fbc8); // 24 a = GG(a, b, c, d, x[9], S21, 0x21e1cde6); // 25 d = GG(d, a, b, c, x[14], S22, 0xc33707d6); // 26 c = GG(c, d, a, b, x[3], S23, 0xf4d50d87); // 27 b = GG(b, c, d, a, x[8], S24, 0x455a14ed); // 28 a = GG(a, b, c, d, x[13], S21, 0xa9e3e905); // 29 d = GG(d, a, b, c, x[2], S22, 0xfcefa3f8); // 30 c = GG(c, d, a, b, x[7], S23, 0x676f02d9); // 31 b = GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); // 32 // Round 3 a = HH(a, b, c, d, x[5], S31, 0xfffa3942); // 33 d = HH(d, a, b, c, x[8], S32, 0x8771f681); // 34 c = HH(c, d, a, b, x[11], S33, 0x6d9d6122); // 35 b = HH(b, c, d, a, x[14], S34, 0xfde5380c); // 36 a = HH(a, b, c, d, x[1], S31, 0xa4beea44); // 37 d = HH(d, a, b, c, x[4], S32, 0x4bdecfa9); // 38 c = HH(c, d, a, b, x[7], S33, 0xf6bb4b60); // 39 b = HH(b, c, d, a, x[10], S34, 0xbebfbc70); // 40 a = HH(a, b, c, d, x[13], S31, 0x289b7ec6); // 41 d = HH(d, a, b, c, x[0], S32, 0xeaa127fa); // 42 c = HH(c, d, a, b, x[3], S33, 0xd4ef3085); // 43 b = HH(b, c, d, a, x[6], S34, 0x4881d05); // 44 a = HH(a, b, c, d, x[9], S31, 0xd9d4d039); // 45 d = HH(d, a, b, c, x[12], S32, 0xe6db99e5); // 46 c = HH(c, d, a, b, x[15], S33, 0x1fa27cf8); // 47 b = HH(b, c, d, a, x[2], S34, 0xc4ac5665); // 48 // Round 4 a = II(a, b, c, d, x[0], S41, 0xf4292244); // 49 d = II(d, a, b, c, x[7], S42, 0x432aff97); // 50 c = II(c, d, a, b, x[14], S43, 0xab9423a7); // 51 b = II(b, c, d, a, x[5], S44, 0xfc93a039); // 52 a = II(a, b, c, d, x[12], S41, 0x655b59c3); // 53 d = II(d, a, b, c, x[3], S42, 0x8f0ccc92); // 54 c = II(c, d, a, b, x[10], S43, 0xffeff47d); // 55 b = II(b, c, d, a, x[1], S44, 0x85845dd1); // 56 a = II(a, b, c, d, x[8], S41, 0x6fa87e4f); // 57 d = II(d, a, b, c, x[15], S42, 0xfe2ce6e0); // 58 c = II(c, d, a, b, x[6], S43, 0xa3014314); // 59 b = II(b, c, d, a, x[13], S44, 0x4e0811a1); // 60 a = II(a, b, c, d, x[4], S41, 0xf7537e82); // 61 d = II(d, a, b, c, x[11], S42, 0xbd3af235); // 62 c = II(c, d, a, b, x[2], S43, 0x2ad7d2bb); // 63 b = II(b, c, d, a, x[9], S44, 0xeb86d391); // 64 state[0] += a; state[1] += b; state[2] += c; state[3] += d; // Zero sensitive information. memset(&x,0,sizeof(x)); }
//MD5摘要 MD5VAL md5(char * str, unsigned int size) { if(size==0) size=strlen(str); unsigned int m=size%64; unsigned int lm=size-m; //数据整块长度 unsigned int ln; //数据补位后长度 if(m<56) ln=lm+64; else ln=lm+128; char * strw=new char[ln]; unsigned int i; //复制原字串到缓冲区strw for(i=0;i<size;i++) strw[i]=str[i]; //补位 strw[i++]=0x80; for(i;i<ln-8;i++) strw[i]=0x00; //补长度 unsigned int * x=(unsigned int *)(strw+i); *(x++)=size<<3; *(x++)=size>>29; //初始化MD5参数 MD5VAL val={0x67452301,0xefcdab89,0x98badcfe,0x10325476}; unsigned int &a=val.a, &b=val.b, &c=val.c, &d=val.d; unsigned int aa,bb,cc,dd; for(i=0;i<ln;i+=64) { x=(unsigned int *)(strw+i); // Save the values aa=a; bb=b; cc=c; dd=d; // Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ // Round 2 GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ // Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ // Round 4 */ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ // Add the original values a+=aa; b+=bb; c+=cc; d+=dd; } delete[] strw; return val; }
//MD5文件摘要 MD5VAL md5File(FILE * fpin) { if(!Buffer) Buffer=new char[BUFFER_SIZE+64]; char * buf=Buffer; MD5VAL val={0x67452301,0xefcdab89,0x98badcfe,0x10325476}; unsigned int &a=val.a, &b=val.b, &c=val.c, &d=val.d; unsigned int aa,bb,cc,dd; unsigned int i,j,count,co; unsigned int * x; i=0; do { count=fread(buf,1,BUFFER_SIZE,fpin); i+=count; if(count==BUFFER_SIZE) co=BUFFER_SIZE; else { j=count; buf[j++]=0x80; for(j;j%64!=56;j++) buf[j]=0x00; *(unsigned int *)(buf+j)=i<<3; j+=4; *(unsigned int *)(buf+j)=i>>29; j+=4; co=j; } for(j=0;j<co;j+=64) { x=(unsigned int *)(buf+j); // Save the values aa=a; bb=b; cc=c; dd=d; // Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ // Round 2 GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ // Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ // Round 4 */ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ // Add the original values a+=aa; b+=bb; c+=cc; d+=dd; } } while(count==BUFFER_SIZE); return val; }
static PetscErrorCode KSPFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res) { PetscScalar *hh,*cc,*ss,tt; PetscInt j; KSP_FGMRES *fgmres = (KSP_FGMRES*)(ksp->data); PetscFunctionBegin; hh = HH(0,it); /* pointer to beginning of column to update - so incrementing hh "steps down" the (it+1)th col of HH*/ cc = CC(0); /* beginning of cosine rotations */ ss = SS(0); /* beginning of sine rotations */ /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), and some refs have [c s ; -conj(s) c] (don't be confused!) */ for (j=1; j<=it; j++) { tt = *hh; *hh = PetscConj(*cc) * tt + *ss * *(hh+1); hh++; *hh = *cc++ * *hh - (*ss++ * tt); /* hh, cc, and ss have all been incremented one by end of loop */ } /* compute the new plane rotation, and apply it to: 1) the right-hand-side of the Hessenberg system (RS) note: it affects RS(it) and RS(it+1) 2) the new column of the Hessenberg matrix note: it affects HH(it,it) which is currently pointed to by hh and HH(it+1, it) (*(hh+1)) thus obtaining the updated value of the residual... */ /* compute new plane rotation */ if (!hapend) { tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1)); if (tt == 0.0) { ksp->reason = KSP_DIVERGED_NULL; PetscFunctionReturn(0); } *cc = *hh / tt; /* new cosine value */ *ss = *(hh+1) / tt; /* new sine value */ /* apply to 1) and 2) */ *RS(it+1) = -(*ss * *RS(it)); *RS(it) = PetscConj(*cc) * *RS(it); *hh = PetscConj(*cc) * *hh + *ss * *(hh+1); /* residual is the last element (it+1) of right-hand side! */ *res = PetscAbsScalar(*RS(it+1)); } else { /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply another rotation matrix (so RH doesn't change). The new residual is always the new sine term times the residual from last time (RS(it)), but now the new sine rotation would be zero...so the residual should be zero...so we will multiply "zero" by the last residual. This might not be exactly what we want to do here -could just return "zero". */ *res = 0.0; } PetscFunctionReturn(0); }
static int rmd256_compress(hash_state *md, unsigned char *buf) #endif { ulong32 aa,bb,cc,dd,aaa,bbb,ccc,ddd,tmp,X[16]; int i; /* load words X */ for (i = 0; i < 16; i++){ LOAD32L(X[i], buf + (4 * i)); } /* load state */ aa = md->rmd256.state[0]; bb = md->rmd256.state[1]; cc = md->rmd256.state[2]; dd = md->rmd256.state[3]; aaa = md->rmd256.state[4]; bbb = md->rmd256.state[5]; ccc = md->rmd256.state[6]; ddd = md->rmd256.state[7]; /* round 1 */ FF(aa, bb, cc, dd, X[ 0], 11); FF(dd, aa, bb, cc, X[ 1], 14); FF(cc, dd, aa, bb, X[ 2], 15); FF(bb, cc, dd, aa, X[ 3], 12); FF(aa, bb, cc, dd, X[ 4], 5); FF(dd, aa, bb, cc, X[ 5], 8); FF(cc, dd, aa, bb, X[ 6], 7); FF(bb, cc, dd, aa, X[ 7], 9); FF(aa, bb, cc, dd, X[ 8], 11); FF(dd, aa, bb, cc, X[ 9], 13); FF(cc, dd, aa, bb, X[10], 14); FF(bb, cc, dd, aa, X[11], 15); FF(aa, bb, cc, dd, X[12], 6); FF(dd, aa, bb, cc, X[13], 7); FF(cc, dd, aa, bb, X[14], 9); FF(bb, cc, dd, aa, X[15], 8); /* parallel round 1 */ III(aaa, bbb, ccc, ddd, X[ 5], 8); III(ddd, aaa, bbb, ccc, X[14], 9); III(ccc, ddd, aaa, bbb, X[ 7], 9); III(bbb, ccc, ddd, aaa, X[ 0], 11); III(aaa, bbb, ccc, ddd, X[ 9], 13); III(ddd, aaa, bbb, ccc, X[ 2], 15); III(ccc, ddd, aaa, bbb, X[11], 15); III(bbb, ccc, ddd, aaa, X[ 4], 5); III(aaa, bbb, ccc, ddd, X[13], 7); III(ddd, aaa, bbb, ccc, X[ 6], 7); III(ccc, ddd, aaa, bbb, X[15], 8); III(bbb, ccc, ddd, aaa, X[ 8], 11); III(aaa, bbb, ccc, ddd, X[ 1], 14); III(ddd, aaa, bbb, ccc, X[10], 14); III(ccc, ddd, aaa, bbb, X[ 3], 12); III(bbb, ccc, ddd, aaa, X[12], 6); tmp = aa; aa = aaa; aaa = tmp; /* round 2 */ GG(aa, bb, cc, dd, X[ 7], 7); GG(dd, aa, bb, cc, X[ 4], 6); GG(cc, dd, aa, bb, X[13], 8); GG(bb, cc, dd, aa, X[ 1], 13); GG(aa, bb, cc, dd, X[10], 11); GG(dd, aa, bb, cc, X[ 6], 9); GG(cc, dd, aa, bb, X[15], 7); GG(bb, cc, dd, aa, X[ 3], 15); GG(aa, bb, cc, dd, X[12], 7); GG(dd, aa, bb, cc, X[ 0], 12); GG(cc, dd, aa, bb, X[ 9], 15); GG(bb, cc, dd, aa, X[ 5], 9); GG(aa, bb, cc, dd, X[ 2], 11); GG(dd, aa, bb, cc, X[14], 7); GG(cc, dd, aa, bb, X[11], 13); GG(bb, cc, dd, aa, X[ 8], 12); /* parallel round 2 */ HHH(aaa, bbb, ccc, ddd, X[ 6], 9); HHH(ddd, aaa, bbb, ccc, X[11], 13); HHH(ccc, ddd, aaa, bbb, X[ 3], 15); HHH(bbb, ccc, ddd, aaa, X[ 7], 7); HHH(aaa, bbb, ccc, ddd, X[ 0], 12); HHH(ddd, aaa, bbb, ccc, X[13], 8); HHH(ccc, ddd, aaa, bbb, X[ 5], 9); HHH(bbb, ccc, ddd, aaa, X[10], 11); HHH(aaa, bbb, ccc, ddd, X[14], 7); HHH(ddd, aaa, bbb, ccc, X[15], 7); HHH(ccc, ddd, aaa, bbb, X[ 8], 12); HHH(bbb, ccc, ddd, aaa, X[12], 7); HHH(aaa, bbb, ccc, ddd, X[ 4], 6); HHH(ddd, aaa, bbb, ccc, X[ 9], 15); HHH(ccc, ddd, aaa, bbb, X[ 1], 13); HHH(bbb, ccc, ddd, aaa, X[ 2], 11); tmp = bb; bb = bbb; bbb = tmp; /* round 3 */ HH(aa, bb, cc, dd, X[ 3], 11); HH(dd, aa, bb, cc, X[10], 13); HH(cc, dd, aa, bb, X[14], 6); HH(bb, cc, dd, aa, X[ 4], 7); HH(aa, bb, cc, dd, X[ 9], 14); HH(dd, aa, bb, cc, X[15], 9); HH(cc, dd, aa, bb, X[ 8], 13); HH(bb, cc, dd, aa, X[ 1], 15); HH(aa, bb, cc, dd, X[ 2], 14); HH(dd, aa, bb, cc, X[ 7], 8); HH(cc, dd, aa, bb, X[ 0], 13); HH(bb, cc, dd, aa, X[ 6], 6); HH(aa, bb, cc, dd, X[13], 5); HH(dd, aa, bb, cc, X[11], 12); HH(cc, dd, aa, bb, X[ 5], 7); HH(bb, cc, dd, aa, X[12], 5); /* parallel round 3 */ GGG(aaa, bbb, ccc, ddd, X[15], 9); GGG(ddd, aaa, bbb, ccc, X[ 5], 7); GGG(ccc, ddd, aaa, bbb, X[ 1], 15); GGG(bbb, ccc, ddd, aaa, X[ 3], 11); GGG(aaa, bbb, ccc, ddd, X[ 7], 8); GGG(ddd, aaa, bbb, ccc, X[14], 6); GGG(ccc, ddd, aaa, bbb, X[ 6], 6); GGG(bbb, ccc, ddd, aaa, X[ 9], 14); GGG(aaa, bbb, ccc, ddd, X[11], 12); GGG(ddd, aaa, bbb, ccc, X[ 8], 13); GGG(ccc, ddd, aaa, bbb, X[12], 5); GGG(bbb, ccc, ddd, aaa, X[ 2], 14); GGG(aaa, bbb, ccc, ddd, X[10], 13); GGG(ddd, aaa, bbb, ccc, X[ 0], 13); GGG(ccc, ddd, aaa, bbb, X[ 4], 7); GGG(bbb, ccc, ddd, aaa, X[13], 5); tmp = cc; cc = ccc; ccc = tmp; /* round 4 */ II(aa, bb, cc, dd, X[ 1], 11); II(dd, aa, bb, cc, X[ 9], 12); II(cc, dd, aa, bb, X[11], 14); II(bb, cc, dd, aa, X[10], 15); II(aa, bb, cc, dd, X[ 0], 14); II(dd, aa, bb, cc, X[ 8], 15); II(cc, dd, aa, bb, X[12], 9); II(bb, cc, dd, aa, X[ 4], 8); II(aa, bb, cc, dd, X[13], 9); II(dd, aa, bb, cc, X[ 3], 14); II(cc, dd, aa, bb, X[ 7], 5); II(bb, cc, dd, aa, X[15], 6); II(aa, bb, cc, dd, X[14], 8); II(dd, aa, bb, cc, X[ 5], 6); II(cc, dd, aa, bb, X[ 6], 5); II(bb, cc, dd, aa, X[ 2], 12); /* parallel round 4 */ FFF(aaa, bbb, ccc, ddd, X[ 8], 15); FFF(ddd, aaa, bbb, ccc, X[ 6], 5); FFF(ccc, ddd, aaa, bbb, X[ 4], 8); FFF(bbb, ccc, ddd, aaa, X[ 1], 11); FFF(aaa, bbb, ccc, ddd, X[ 3], 14); FFF(ddd, aaa, bbb, ccc, X[11], 14); FFF(ccc, ddd, aaa, bbb, X[15], 6); FFF(bbb, ccc, ddd, aaa, X[ 0], 14); FFF(aaa, bbb, ccc, ddd, X[ 5], 6); FFF(ddd, aaa, bbb, ccc, X[12], 9); FFF(ccc, ddd, aaa, bbb, X[ 2], 12); FFF(bbb, ccc, ddd, aaa, X[13], 9); FFF(aaa, bbb, ccc, ddd, X[ 9], 12); FFF(ddd, aaa, bbb, ccc, X[ 7], 5); FFF(ccc, ddd, aaa, bbb, X[10], 15); FFF(bbb, ccc, ddd, aaa, X[14], 8); tmp = dd; dd = ddd; ddd = tmp; /* combine results */ md->rmd256.state[0] += aa; md->rmd256.state[1] += bb; md->rmd256.state[2] += cc; md->rmd256.state[3] += dd; md->rmd256.state[4] += aaa; md->rmd256.state[5] += bbb; md->rmd256.state[6] += ccc; md->rmd256.state[7] += ddd; return CRYPT_OK; }
static int md4_compress(hash_state *md, unsigned char *buf) #endif { ulong32 x[16], a, b, c, d; int i; /* copy state */ a = md->md4.state[0]; b = md->md4.state[1]; c = md->md4.state[2]; d = md->md4.state[3]; /* copy the state into 512-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD32L(x[i], buf + (4*i)); } /* Round 1 */ FF (a, b, c, d, x[ 0], S11); /* 1 */ FF (d, a, b, c, x[ 1], S12); /* 2 */ FF (c, d, a, b, x[ 2], S13); /* 3 */ FF (b, c, d, a, x[ 3], S14); /* 4 */ FF (a, b, c, d, x[ 4], S11); /* 5 */ FF (d, a, b, c, x[ 5], S12); /* 6 */ FF (c, d, a, b, x[ 6], S13); /* 7 */ FF (b, c, d, a, x[ 7], S14); /* 8 */ FF (a, b, c, d, x[ 8], S11); /* 9 */ FF (d, a, b, c, x[ 9], S12); /* 10 */ FF (c, d, a, b, x[10], S13); /* 11 */ FF (b, c, d, a, x[11], S14); /* 12 */ FF (a, b, c, d, x[12], S11); /* 13 */ FF (d, a, b, c, x[13], S12); /* 14 */ FF (c, d, a, b, x[14], S13); /* 15 */ FF (b, c, d, a, x[15], S14); /* 16 */ /* Round 2 */ GG (a, b, c, d, x[ 0], S21); /* 17 */ GG (d, a, b, c, x[ 4], S22); /* 18 */ GG (c, d, a, b, x[ 8], S23); /* 19 */ GG (b, c, d, a, x[12], S24); /* 20 */ GG (a, b, c, d, x[ 1], S21); /* 21 */ GG (d, a, b, c, x[ 5], S22); /* 22 */ GG (c, d, a, b, x[ 9], S23); /* 23 */ GG (b, c, d, a, x[13], S24); /* 24 */ GG (a, b, c, d, x[ 2], S21); /* 25 */ GG (d, a, b, c, x[ 6], S22); /* 26 */ GG (c, d, a, b, x[10], S23); /* 27 */ GG (b, c, d, a, x[14], S24); /* 28 */ GG (a, b, c, d, x[ 3], S21); /* 29 */ GG (d, a, b, c, x[ 7], S22); /* 30 */ GG (c, d, a, b, x[11], S23); /* 31 */ GG (b, c, d, a, x[15], S24); /* 32 */ /* Round 3 */ HH (a, b, c, d, x[ 0], S31); /* 33 */ HH (d, a, b, c, x[ 8], S32); /* 34 */ HH (c, d, a, b, x[ 4], S33); /* 35 */ HH (b, c, d, a, x[12], S34); /* 36 */ HH (a, b, c, d, x[ 2], S31); /* 37 */ HH (d, a, b, c, x[10], S32); /* 38 */ HH (c, d, a, b, x[ 6], S33); /* 39 */ HH (b, c, d, a, x[14], S34); /* 40 */ HH (a, b, c, d, x[ 1], S31); /* 41 */ HH (d, a, b, c, x[ 9], S32); /* 42 */ HH (c, d, a, b, x[ 5], S33); /* 43 */ HH (b, c, d, a, x[13], S34); /* 44 */ HH (a, b, c, d, x[ 3], S31); /* 45 */ HH (d, a, b, c, x[11], S32); /* 46 */ HH (c, d, a, b, x[ 7], S33); /* 47 */ HH (b, c, d, a, x[15], S34); /* 48 */ /* Update our state */ md->md4.state[0] = md->md4.state[0] + a; md->md4.state[1] = md->md4.state[1] + b; md->md4.state[2] = md->md4.state[2] + c; md->md4.state[3] = md->md4.state[3] + d; return CRYPT_OK; }
//对于每个block进行转换 void MD5::encryptWithBlock(const uint8_t block[64]) { uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16]; decode(block, x, 64); /* Round 1 */ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ /* Round 2 */ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ /* Round 3 */ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ /* Round 4 */ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; }