/*-------------------------------------------------------------------------* * PL_KEEP_REST_FOR_PROLOG * * * * Update CP in choices points to be used by classical Prolog engine * * (some CPB(b) have been set to Call_Prolog_Success due to Call_Prolog). * *-------------------------------------------------------------------------*/ void Pl_Keep_Rest_For_Prolog(WamWord *query_b) { WamWord *b, *e, *query_e; for (b = B; b > query_b; b = BB(b)) if (CPB(b) == Adjust_CP(Call_Prolog_Success)) CPB(b) = CP; query_e = EB(query_b); for (e = EB(B); e > query_e; e = EE(e)) if (CPE(e) == Adjust_CP(Call_Prolog_Success)) CPE(e) = CP; }
/* void RSA_encrypt_chunk(mpz_t, string, RSA_PUBLIC) * encrypts one part of message using one's public key, part fits the * requirements for itself, so that |part| <= k - 11 */ void RSA_encrypt_chunk(mpz_t &cipher, string &message, RSA_PUBLIC &pub) { // encrypting chunk has below format: // EB = 00 || 02 || PS || 00 || D // where k = |n| (it's in bytes) // D = message, |D|<=k-11 long long k = mpz_sizeinbase(pub.n, 2) / 8; // PS = random octets, |PS|=k-|D|-3 long long ps_len = k - 3 - message.length(); string EB("0002"); // EB = 00 || 02 EB.reserve(k*2); string PS_tab; PS_tab.reserve(2*ps_len); for (int i=0; i<ps_len; i++) { PS_tab.append( decbyte2hex( random(255)+1 ) ); } EB.append(PS_tab); // EB = 00 || 02 || PS EB.append("00"); // EB = 00 || 02 || PS || 00 for (unsigned int i=0; i<message.length(); i++) { EB.append( decbyte2hex((unsigned int)message[i]) ); } // EB = 00 || 02 || PS || 00 || D // now we have EB generated // algorithm for faster computing y = x^e (mod n) // e = e(k-1)e(k-2)...e(1)e(0) // e has the value of 11, 10001 or 10000000000000001 (3, 17, 65537) string e_str = mpz_get_str(NULL, 2, pub.e); int k_e = e_str.length(); mpz_t y, x; mpz_inits(y, x, NULL); mpz_set_str(x, EB.c_str(), 16); // x == EB mpz_set(y, x); // y == x for (long long i=1; i<k_e; i++) { mpz_powm_ui(y, y, 2, pub.n); if (e_str[i]=='1') { mpz_mul(y, y, x); mpz_mod(y, y, pub.n); } } mpz_set(cipher, y); // cipher is now encrypted EB mpz_clears(y, x, NULL); // tidying }
void abstd_unittest() { { static int endian_check = 0x0D0C0B0A; #define EB(i) (((char*)&endian_check)[i]) #ifdef __LITTLE_ENDIAN UTASSERT(__BYTE_ORDER == __LITTLE_ENDIAN); UTASSERT(EB(0) == 0xA && EB(1) == 0xB && EB(2) == 0xC && EB(3) == 0xD); #elif defined(__BIG_ENDIAN) UTASSERT(__BYTE_ORDER == __BIG_ENDIAN && (EB(0) == 0xD && EB(1) == 0xC && EB(2) == 0xB && EB(3) == 0xA)); #endif } { char *s = strdup("asdf"); free(s); } }
// [[Rcpp::export]] SEXP hpbcpp(SEXP eta, SEXP beta, SEXP doc_ct, SEXP mu, SEXP siginv, SEXP sigmaentropy){ Rcpp::NumericVector etav(eta); arma::vec etas(etav.begin(), etav.size(), false); Rcpp::NumericMatrix betam(beta); arma::mat betas(betam.begin(), betam.nrow(), betam.ncol()); Rcpp::NumericVector doc_ctv(doc_ct); arma::vec doc_cts(doc_ctv.begin(), doc_ctv.size(), false); Rcpp::NumericVector muv(mu); arma::vec mus(muv.begin(), muv.size(), false); Rcpp::NumericMatrix siginvm(siginv); arma::mat siginvs(siginvm.begin(), siginvm.nrow(), siginvm.ncol(), false); Rcpp::NumericVector sigmaentropym(sigmaentropy); arma::vec entropy(sigmaentropym); //Performance Nots from 3/6/2015 // I tried a few different variants and benchmarked this one as roughly twice as // fast as the R code for a K=100 problem. Key to performance was not creating // too many objects and being selective in how things were flagged as triangular. // Some additional notes in the code below. // // Some things this doesn't have or I haven't tried // - I didn't tweak the arguments much. sigmaentropy is a double, and I'm still // passing beta in the same way. I tried doing a ", false" for beta but it didn't // change much so I left it the same as in gradient. // - I tried treating the factors for doc_cts and colSums(EB) as a diagonal matrix- much slower. // Haven't Tried/Done // - each_row() might be much slower (not sure but arma is column order). Maybe transpose in place? // - depending on costs there are some really minor calculations that could be precomputed: // - sum(doc_ct) // - sqrt(doc_ct) // More on passing by reference here: // - Hypothetically we could alter beta (because hessian is last thing we do) however down // the road we may want to explore treating nonPD hessians by optimization at which point // we would need it again. arma::colvec expeta(etas.size()+1); expeta.fill(1); int neta = etas.size(); for(int j=0; j <neta; j++){ expeta(j) = exp(etas(j)); } arma::vec theta = expeta/sum(expeta); //create a new version of the matrix so we can mess with it arma::mat EB(betam.begin(), betam.nrow(), betam.ncol()); //multiply each column by expeta EB.each_col() %= expeta; //this should be fastest as its column-major ordering //divide out by the column sums EB.each_row() %= arma::trans(sqrt(doc_cts))/sum(EB,0); //Combine the pieces of the Hessian which are matrices arma::mat hess = EB*EB.t() - sum(doc_cts)*(theta*theta.t()); //we don't need EB any more so we turn it into phi EB.each_row() %= arma::trans(sqrt(doc_cts)); //Now alter just the diagonal of the Hessian hess.diag() -= sum(EB,1) - sum(doc_cts)*theta; //Drop the last row and column hess.shed_row(neta); hess.shed_col(neta); //Now we can add in siginv hess = hess + siginvs; //At this point the Hessian is complete. //This next bit of code is from http://arma.sourceforge.net/docs.html#logging //It basically keeps arma from printing errors from chol to the console. std::ostream nullstream(0); arma::set_stream_err2(nullstream); //// //Invert via cholesky decomposition //// //Start by initializing an object arma::mat nu = arma::mat(hess.n_rows, hess.n_rows); //This version of chol generates a boolean which tells us if it failed. bool worked = arma::chol(nu,hess); if(!worked) { //It failed! Oh Nos. // So the matrix wasn't positive definite. In practice this means that it hasn't // converged probably along some minor aspect of the dimension. //Here we make it positive definite through diagonal dominance arma::vec dvec = hess.diag(); //find the magnitude of the diagonal arma::vec magnitudes = sum(abs(hess), 1) - abs(dvec); //iterate over each row and set the minimum value of the diagonal to be the magnitude of the other terms int Km1 = dvec.size(); for(int j=0; j < Km1; j++){ if(arma::as_scalar(dvec(j)) < arma::as_scalar(magnitudes(j))) dvec(j) = magnitudes(j); //enforce diagonal dominance } //overwrite the diagonal of the hessian with our new object hess.diag() = dvec; //that was sufficient to ensure positive definiteness so we now do cholesky nu = arma::chol(hess); } //compute 1/2 the determinant from the cholesky decomposition double detTerm = -sum(log(nu.diag())); //Now finish constructing nu nu = arma::inv(arma::trimatu(nu)); nu = nu * nu.t(); //trimatu doesn't do anything for multiplication so it would just be timesink to signal here. //Precompute the difference since we use it twice arma::vec diff = etas - mus; //Now generate the bound and make it a scalar double bound = arma::as_scalar(log(arma::trans(theta)*betas)*doc_cts + detTerm - .5*diff.t()*siginvs*diff - entropy); // Generate a return list that mimics the R output return Rcpp::List::create( Rcpp::Named("phis") = EB, Rcpp::Named("eta") = Rcpp::List::create(Rcpp::Named("lambda")=etas, Rcpp::Named("nu")=nu), Rcpp::Named("bound") = bound ); }
/***************************************************************************** ** ** OptimizedStrassenMultiply ** ** For large matrices A, B, and C of size MatrixSize * MatrixSize this ** function performs the operation C = A x B efficiently. ** ** INPUT: ** C = (*C WRITE) Address of top left element of matrix C. ** A = (*A IS READ ONLY) Address of top left element of matrix A. ** B = (*B IS READ ONLY) Address of top left element of matrix B. ** MatrixSize = Size of matrices (for n*n matrix, MatrixSize = n) ** RowWidthA = Number of elements in memory between A[x,y] and A[x,y+1] ** RowWidthB = Number of elements in memory between B[x,y] and B[x,y+1] ** RowWidthC = Number of elements in memory between C[x,y] and C[x,y+1] ** ** OUTPUT: ** C = (*C WRITE) Matrix C contains A x B. (Initial value of *C undefined.) ** *****************************************************************************/ VOID_TASK_7(OptimizedStrassenMultiply, REAL *, C, REAL *, A, REAL *, B, unsigned, MatrixSize, unsigned, RowWidthC, unsigned, RowWidthA, unsigned, RowWidthB ) { unsigned QuadrantSize = MatrixSize >> 1; /* MatixSize / 2 */ unsigned QuadrantSizeInBytes = sizeof(REAL) * QuadrantSize * QuadrantSize + 32; unsigned Column, Row; /************************************************************************ ** For each matrix A, B, and C, we'll want pointers to each quandrant ** in the matrix. These quandrants will be addressed as follows: ** -- -- ** | A11 A12 | ** | | ** | A21 A22 | ** -- -- ************************************************************************/ REAL /* *A11, *B11, *C11, */ *A12, *B12, *C12, *A21, *B21, *C21, *A22, *B22, *C22; REAL *S1,*S2,*S3,*S4,*S5,*S6,*S7,*S8,*M2,*M5,*T1sMULT; #define T2sMULT C22 #define NumberOfVariables 11 PTR TempMatrixOffset = 0; PTR MatrixOffsetA = 0; PTR MatrixOffsetB = 0; char *Heap; void *StartHeap; /* Distance between the end of a matrix row and the start of the next row */ PTR RowIncrementA = ( RowWidthA - QuadrantSize ) << 3; PTR RowIncrementB = ( RowWidthB - QuadrantSize ) << 3; PTR RowIncrementC = ( RowWidthC - QuadrantSize ) << 3; if (MatrixSize <= SizeAtWhichDivideAndConquerIsMoreEfficient) { MultiplyByDivideAndConquer(C, A, B, MatrixSize, RowWidthC, RowWidthA, RowWidthB, 0); return; } /* Initialize quandrant matrices */ #define A11 A #define B11 B #define C11 C A12 = A11 + QuadrantSize; B12 = B11 + QuadrantSize; C12 = C11 + QuadrantSize; A21 = A + (RowWidthA * QuadrantSize); B21 = B + (RowWidthB * QuadrantSize); C21 = C + (RowWidthC * QuadrantSize); A22 = A21 + QuadrantSize; B22 = B21 + QuadrantSize; C22 = C21 + QuadrantSize; /* Allocate Heap Space Here */ StartHeap = Heap = malloc(QuadrantSizeInBytes * NumberOfVariables); /* ensure that heap is on cache boundary */ if ( ((PTR) Heap) & 31) Heap = (char*) ( ((PTR) Heap) + 32 - ( ((PTR) Heap) & 31) ); /* Distribute the heap space over the variables */ S1 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S2 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S3 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S4 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S5 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S6 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S7 = (REAL*) Heap; Heap += QuadrantSizeInBytes; S8 = (REAL*) Heap; Heap += QuadrantSizeInBytes; M2 = (REAL*) Heap; Heap += QuadrantSizeInBytes; M5 = (REAL*) Heap; Heap += QuadrantSizeInBytes; T1sMULT = (REAL*) Heap; Heap += QuadrantSizeInBytes; /*************************************************************************** ** Step through all columns row by row (vertically) ** (jumps in memory by RowWidth => bad locality) ** (but we want the best locality on the innermost loop) ***************************************************************************/ for (Row = 0; Row < QuadrantSize; Row++) { /************************************************************************* ** Step through each row horizontally (addressing elements in each column) ** (jumps linearly througn memory => good locality) *************************************************************************/ for (Column = 0; Column < QuadrantSize; Column++) { /*********************************************************** ** Within this loop, the following holds for MatrixOffset: ** MatrixOffset = (Row * RowWidth) + Column ** (note: that the unit of the offset is number of reals) ***********************************************************/ /* Element of Global Matrix, such as A, B, C */ #define E(Matrix) (* (REAL*) ( ((PTR) Matrix) + TempMatrixOffset ) ) #define EA(Matrix) (* (REAL*) ( ((PTR) Matrix) + MatrixOffsetA ) ) #define EB(Matrix) (* (REAL*) ( ((PTR) Matrix) + MatrixOffsetB ) ) /* FIXME - may pay to expand these out - got higher speed-ups below */ /* S4 = A12 - ( S2 = ( S1 = A21 + A22 ) - A11 ) */ E(S4) = EA(A12) - ( E(S2) = ( E(S1) = EA(A21) + EA(A22) ) - EA(A11) ); /* S8 = (S6 = B22 - ( S5 = B12 - B11 ) ) - B21 */ E(S8) = ( E(S6) = EB(B22) - ( E(S5) = EB(B12) - EB(B11) ) ) - EB(B21); /* S3 = A11 - A21 */ E(S3) = EA(A11) - EA(A21); /* S7 = B22 - B12 */ E(S7) = EB(B22) - EB(B12); TempMatrixOffset += sizeof(REAL); MatrixOffsetA += sizeof(REAL); MatrixOffsetB += sizeof(REAL); } /* end row loop*/ MatrixOffsetA += RowIncrementA; MatrixOffsetB += RowIncrementB; } /* end column loop */ /* M2 = A11 x B11 */ SPAWN(OptimizedStrassenMultiply, M2, A11, B11, QuadrantSize, QuadrantSize, RowWidthA, RowWidthB); /* M5 = S1 * S5 */ SPAWN(OptimizedStrassenMultiply, M5, S1, S5, QuadrantSize, QuadrantSize, QuadrantSize, QuadrantSize); /* Step 1 of T1 = S2 x S6 + M2 */ SPAWN(OptimizedStrassenMultiply, T1sMULT, S2, S6, QuadrantSize, QuadrantSize, QuadrantSize, QuadrantSize); /* Step 1 of T2 = T1 + S3 x S7 */ SPAWN(OptimizedStrassenMultiply, C22, S3, S7, QuadrantSize, RowWidthC /*FIXME*/, QuadrantSize, QuadrantSize); /* Step 1 of C11 = M2 + A12 * B21 */ SPAWN(OptimizedStrassenMultiply, C11, A12, B21, QuadrantSize, RowWidthC, RowWidthA, RowWidthB); /* Step 1 of C12 = S4 x B22 + T1 + M5 */ SPAWN(OptimizedStrassenMultiply, C12, S4, B22, QuadrantSize, RowWidthC, QuadrantSize, RowWidthB); /* Step 1 of C21 = T2 - A22 * S8 */ SPAWN(OptimizedStrassenMultiply, C21, A22, S8, QuadrantSize, RowWidthC, RowWidthA, QuadrantSize); /********************************************** ** Synchronization Point **********************************************/ SYNC(OptimizedStrassenMultiply); SYNC(OptimizedStrassenMultiply); SYNC(OptimizedStrassenMultiply); SYNC(OptimizedStrassenMultiply); SYNC(OptimizedStrassenMultiply); SYNC(OptimizedStrassenMultiply); SYNC(OptimizedStrassenMultiply); /*************************************************************************** ** Step through all columns row by row (vertically) ** (jumps in memory by RowWidth => bad locality) ** (but we want the best locality on the innermost loop) ***************************************************************************/ for (Row = 0; Row < QuadrantSize; Row++) { /************************************************************************* ** Step through each row horizontally (addressing elements in each column) ** (jumps linearly througn memory => good locality) *************************************************************************/ for (Column = 0; Column < QuadrantSize; Column += 4) { REAL LocalM5_0 = *(M5); REAL LocalM5_1 = *(M5+1); REAL LocalM5_2 = *(M5+2); REAL LocalM5_3 = *(M5+3); REAL LocalM2_0 = *(M2); REAL LocalM2_1 = *(M2+1); REAL LocalM2_2 = *(M2+2); REAL LocalM2_3 = *(M2+3); REAL T1_0 = *(T1sMULT) + LocalM2_0; REAL T1_1 = *(T1sMULT+1) + LocalM2_1; REAL T1_2 = *(T1sMULT+2) + LocalM2_2; REAL T1_3 = *(T1sMULT+3) + LocalM2_3; REAL T2_0 = *(C22) + T1_0; REAL T2_1 = *(C22+1) + T1_1; REAL T2_2 = *(C22+2) + T1_2; REAL T2_3 = *(C22+3) + T1_3; (*(C11)) += LocalM2_0; (*(C11+1)) += LocalM2_1; (*(C11+2)) += LocalM2_2; (*(C11+3)) += LocalM2_3; (*(C12)) += LocalM5_0 + T1_0; (*(C12+1)) += LocalM5_1 + T1_1; (*(C12+2)) += LocalM5_2 + T1_2; (*(C12+3)) += LocalM5_3 + T1_3; (*(C22)) = LocalM5_0 + T2_0; (*(C22+1)) = LocalM5_1 + T2_1; (*(C22+2)) = LocalM5_2 + T2_2; (*(C22+3)) = LocalM5_3 + T2_3; (*(C21 )) = (- *(C21 )) + T2_0; (*(C21+1)) = (- *(C21+1)) + T2_1; (*(C21+2)) = (- *(C21+2)) + T2_2; (*(C21+3)) = (- *(C21+3)) + T2_3; M5 += 4; M2 += 4; T1sMULT += 4; C11 += 4; C12 += 4; C21 += 4; C22 += 4; } C11 = (REAL*) ( ((PTR) C11 ) + RowIncrementC); C12 = (REAL*) ( ((PTR) C12 ) + RowIncrementC); C21 = (REAL*) ( ((PTR) C21 ) + RowIncrementC); C22 = (REAL*) ( ((PTR) C22 ) + RowIncrementC); } free(StartHeap); }
void EncodingEDSRSA(char *M_fname, char *nA_fname, char *eA_fname, char *dA_fname, char *nB_fname, char *eB_fname, char *dB_fname) { std::ifstream in(M_fname); int *M_hash = (int*)md5(&in), i; BigInt M(intToChar(M_hash[3])), NA(nA_fname, false), EA(eA_fname, false), DA(dA_fname, false); M *= BigInt("10000000000"); M += BigInt(intToChar(M_hash[2])); M *= BigInt("10000000000"); M += BigInt(intToChar(M_hash[1])); M *= BigInt("10000000000"); M += BigInt(intToChar(M_hash[0])); BigInt NB(nB_fname, false), EB(eB_fname, false), DB(dB_fname, false); BigInt Signature("1"), Code("1"), Encode("1"), CheckSign("1"); BigInt DegreeNet[RNet]; DegreeNet[0] = M; DegreeNet[0] %= NA; for(i = 1; i < RNet; i++) { DegreeNet[i] = DegreeNet[i-1] * DegreeNet[i-1]; DegreeNet[i] %= NA; } BigInt degreeNum[RNet]; degreeNum[0] = BigInt("1"); for(int i = 1; i < RNet; i++) degreeNum[i] = degreeNum[i-1] * BigInt("2"); BigInt I("0"); for(int j = RNet-1; j >= 0;) { if(DA >= I + degreeNum[j]) { Signature *= DegreeNet[j]; Signature %= NA; I += degreeNum[j]; } else j--; } ////////////////////////////// DegreeNet[0] = Signature; DegreeNet[0] %= NB; for(i = 1; i < RNet; i++) { DegreeNet[i] = DegreeNet[i-1] * DegreeNet[i-1]; DegreeNet[i] %= NB; } I = BigInt("0"); for(int j = RNet-1; j >= 0;) { if(EB >= I + degreeNum[j]) { Code *= DegreeNet[j]; Code %= NB; I += degreeNum[j]; } else j--; } ////////////////////////////// DegreeNet[0] = Code; DegreeNet[0] %= NB; for(i = 1; i < RNet; i++) { DegreeNet[i] = DegreeNet[i-1] * DegreeNet[i-1]; DegreeNet[i] %= NB; } I = BigInt("0"); for(int j = RNet-1; j >= 0;) { if(DB >= I + degreeNum[j]) { Encode *= DegreeNet[j]; Encode %= NB; I += degreeNum[j]; } else j--; } ////////////////////////////// DegreeNet[0] = Encode; DegreeNet[0] %= NA; for(i = 1; i < RNet; i++) { DegreeNet[i] = DegreeNet[i-1] * DegreeNet[i-1]; DegreeNet[i] %= NA; } I = BigInt("0"); for(int j = RNet - 1; j >= 0;) { if(EA >= I + degreeNum[j]) { CheckSign *= DegreeNet[j]; CheckSign %= NA; I += degreeNum[j]; } else j--; } ////////////////////////////// M.TextWrite("hash.txt"); Code.TextWrite("code.txt"); Encode.TextWrite("encode.txt"); CheckSign.TextWrite("checksign.txt"); if( M % NA == CheckSign) std::cout<<"OK\n"; else std::cout<<"NOT OK\n"; }