void bii_modexpodd(bigint *dest, bigint a, bigint e, bigint n){
	/*
	 * Modular exponentiation, returns a**e mod n
	 * Uses the Mongtomery Product algorithm to speed up a standard repeated-squaring routine
	 */
	 	 
	bigint r, nprime, tmp1, tmp2, abar, xbar;
	
	bigint_init(&r);
	bigint_init(&nprime);
	bigint_init(&tmp1);
	bigint_init(&tmp2);
	bigint_init(&abar);
	bigint_init(&xbar);

	//r is the least power of 2 which is larger than n. 
	bigint_setval32(&r,1);
	uint32_t rpow = bii_sigbits(n)-1;
	bigint_leftshift(&r, rpow);
	if(bigint_cmp(r,n)==-1){
		bigint_leftshift(&r,1);
		rpow++;
	}
	 
	//r * r^{-1} - n*n' = 1. Use the euclidean algorithm to find n'.
	bigint_setval32(&tmp1, 1);
	bigint_egcd(r,n, tmp1, &tmp2, &nprime);
	bigint_setpos(&nprime);
	 
	//abar = a * r mod n
	bigint_multiply(&tmp1,a,r);
	bigint_divide(&tmp2,&abar,tmp1,n);
	 
	bigint_divide(&tmp1,&xbar,r,n);
	 
	fflush(stdout);
	for(int i = bii_sigbits(e)-1; i>=0; i--){
		bii_monproduct(&tmp1, xbar, xbar, n, nprime, rpow);
		bigint_setval(&xbar, tmp1);
		bigint_setval(&tmp2,e);
		bigint_rightshift(&tmp2,i);
		if(bigint_parity(tmp2)==1){
			bii_monproduct(&tmp1,abar,xbar,n,nprime,rpow);
			bigint_setval(&xbar,tmp1);
		}
	}
	bigint_setval32(&tmp1, 1);
	bii_monproduct(dest, xbar, tmp1, n, nprime, rpow);
	 
	bigint_free(&r);
	bigint_free(&nprime);
	bigint_free(&tmp1);
	bigint_free(&tmp2);
	bigint_free(&abar);
	bigint_free(&xbar);
}
void test_Div()
{
	bigint_create_buffer();
    myInt64 cycles;
    myInt64 start;
    int num_runs = NUM_RUNS;

	BigInt x = GET_BIGINT_PTR(BI_TESTS_X_TAG);

	BigInt a = bigint_from_hex_string(BI_TESTS_A_TAG, "7cd73b6fc007dfee34a23caf363ae67e8bb8782600000000032accceb");
	BigInt b = bigint_from_hex_string(BI_TESTS_B_TAG, "c69d8b898f0e43b4643a018e7b0569de6f8cf328e0bf6d59ace4e3bc2ca28d10");
	BigInt p = bigint_from_hex_string(BI_TESTS_P_TAG, "ffffffff00000001000000000000000000000000ffffffffffffffffffffffff");
	
    start = start_tsc();
	for(int i = 0; i < num_runs; i++)
    {
	bigint_divide(x, b, a, p);
    }
    cycles = stop_tsc(start);

    mul_opcount = mul_opcount/num_runs;
    add_opcount = add_opcount/num_runs; 
    shift_opcount = shift_opcount/num_runs;
    avx_opcount = avx_opcount/num_runs;

    global_index_count = global_index_count/num_runs;
    double r;  
    r = cycles / num_runs;
    printf("RDTSC instruction:\n %lf cycles measured => %lf seconds, assuming frequency is %lf MHz. (change in source file if different)\n", r, r/(FREQUENCY), (FREQUENCY)/1e6);     
    bigint_destroy_buffer();
}
void test_div_bigint(void){
    bigint_t a, b, c, d;
    printf_P(PSTR("\ndiv test\n"));
    for (;;) {
        printf_P(PSTR("\nenter a:"));
        if (bigint_read_hex_echo(&a, 0)) {
            printf_P(PSTR("\n end div test"));
            return;
        }
        printf_P(PSTR("\nenter b:"));
        if (bigint_read_hex_echo(&b, 0)) {
            free(a.wordv);
            printf_P(PSTR("\n end div test"));
            return;
        }
        printf_P(PSTR("\n "));
        bigint_print_hex(&a);
        printf_P(PSTR(" / "));
        bigint_print_hex(&b);
        printf_P(PSTR(" = "));
        memset(&c, 0, sizeof(c));
        memset(&d, 0, sizeof(d));
        bigint_divide(&d, &c, &a, &b);
        bigint_print_hex(&d);
        printf_P(PSTR("; R = "));
        bigint_print_hex(&c);
        printf_P(PSTR("\n"));
        bigint_free(&d);
        bigint_free(&c);
        bigint_free(&b);
        bigint_free(&a);
    }
}
void test_reduce_bigint(void){
	bigint_t a, b, c;
	cli_putstr_P(PSTR("\r\nreduce test\r\n"));
	for (;;) {
		cli_putstr_P(PSTR("\r\nenter a:"));
		if (bigint_read_hex_echo(&a, 0)) {
			cli_putstr_P(PSTR("\r\n end reduce test"));
			return;
		}
		cli_putstr_P(PSTR("\r\nenter b:"));
		if (bigint_read_hex_echo(&b, 0)) {
			free(a.wordv);
			cli_putstr_P(PSTR("\r\n end reduce test"));
			return;
		}
		cli_putstr_P(PSTR("\r\n "));
		bigint_print_hex(&a);
		cli_putstr_P(PSTR(" % "));
		bigint_print_hex(&b);
		cli_putstr_P(PSTR(" = "));
		memset(&c, 0, sizeof(c));
		bigint_divide(NULL, &c, &a, &b);
		bigint_print_hex(&c);
		cli_putstr_P(PSTR("\r\n"));
        bigint_free(&c);
        bigint_free(&b);
		bigint_free(&a);
	}
}
void bigint_egcd(bigint a, bigint b, bigint c, bigint *x, bigint *y){

	bigint r0, r1, r2, s1, s2, t1, t2, q, r, tmp1, tmp2;
	int revflag = 0;
	
	bigint_init(&r0);
	bigint_init(&r1);
	bigint_init(&r2);
	bigint_init(&s1);
	bigint_init(&s2);
	bigint_init(&t1);
	bigint_init(&t2);
	bigint_init(&q);
	bigint_init(&r);
	bigint_init(&tmp1);
	bigint_init(&tmp2);
	
	if(bigint_cmp(a,b)==-1){
		revflag = 1;
		bigint_setval(&r0, b);
		bigint_setval(&r1, a);
	} else{
		bigint_setval(&r0, a);
		bigint_setval(&r1, b);
	}
	bigint_divide(&q,&r2,r0,r1);
	
	bigint_setval32(&s1, 0);
	bigint_setval32(&s2, 1);
	
	bigint_setval32(&t1, 1);
	bigint_setval(&t2, q);
	bigint_negate(&t2);
	
	if(bigint_isval(r2,0)){
		bigint_divide(&tmp1,&tmp2,c,b);
		if(bigint_isval(tmp2,0)){
			if(revflag == 0){
				bigint_setval32(x,0);
				bigint_setval(y,tmp1);
				return;
			}
			bigint_setval(x,tmp1);
			bigint_setval32(y,0);
			return;
		}
		bigint_setval32(x,0);
		bigint_setval32(y,0);
		return;
	}
	while(!bigint_isval(r2,0)){
		bigint_divide(&q,&r,r1,r2);
		
		bigint_setval(&r1, r2);
		bigint_setval(&r2, r);
		
		bigint_multiply(&tmp1,s2,q);
		bigint_subtract(&tmp2,s1,tmp1);
		bigint_setval(&s1, s2);
		bigint_setval(&s2, tmp2);
		
		bigint_multiply(&tmp1,t2,q);
		bigint_subtract(&tmp2,t1,tmp1);
		bigint_setval(&t1, t2);
		bigint_setval(&t2, tmp2);
	}

	bigint_divide(&q,&r,c,r1);
	if(!bigint_isval(r,0)){
		bigint_setval32(x,0);
		bigint_setval32(y,0);
		return;
	}
	if(revflag==1){
		bigint_setval(&tmp1, s1);
		bigint_setval(&s1, t1);
		bigint_setval(&t1, tmp1);
	}
	
	if(s1.sign == BII_NEG){
		bigint_setval(&tmp1, s1);
		bigint_setpos(&tmp1);
		bigint_divide(&tmp2,&r,tmp1,b); 
		bigint_setval(&tmp1, tmp2); 
		if(!bigint_isval(r,0)) bigint_incr(&tmp1);
		bigint_multiply(&tmp2,tmp1,b);
		bigint_plusequals(&s1,tmp2);
		bigint_multiply(&tmp2,tmp1,a);
		bigint_minusequals(&t1,tmp2);
	}

	
	bigint_multiply(x,q,s1);
	bigint_multiply(y,q,t1);
	

	bigint_free(&r0);
	bigint_free(&r1);
	bigint_free(&r2);
	bigint_free(&s1);
	bigint_free(&s2);
	bigint_free(&t1);
	bigint_free(&t2);
	bigint_free(&q);
	bigint_free(&r);
	bigint_free(&tmp1);
	bigint_free(&tmp2);
}
void bii_modexpeven(bigint *dest, bigint a, bigint e, bigint n){
	/*
	 * Returns a**e mod n for the case when n is even. 
	 * This algorithm is from the paper:
	 * Montgomery reduction with even modulus
	 * Koc,C.K.
	 * IEE Proceedings - Computers and Digital Techniques(1994),141(5):314
	 * http://dx.doi.org/10.1049/ip-cdt:19941291
	 */
	 
	bigint q, j, A, E, x1, x2, y,  qinv, tmp1, tmp2;
	bigint_init(&q);
	bigint_init(&j);
	bigint_init(&A);
	bigint_init(&E);
	bigint_init(&x1);
	bigint_init(&x2);
	bigint_init(&y);
	bigint_init(&qinv);
	bigint_init(&tmp1);
	bigint_init(&tmp2);
	
	//n = q * (2**jpow)
	bigint_setval(&q,n);
	uint32_t jpow = 0;
	bigint_setval32(&j,1);
	while(bigint_parity(q)==0){
		bigint_rightshift(&q,1);
		bigint_leftshift(&j,1);
		jpow++;
	}
	
	bigint_divide(&tmp1,&A,a,q);
	
	bii_modexpodd(&x1, A, e, q);
	
	bigint_getsigbits(&A,a,jpow);
	bigint_getsigbits(&E, e, jpow-1);
	
	bii_binarymodpow_powerof2(&x2,A,E,jpow);
	
	bigint_setval32(&tmp1, 1);
	bigint_egcd(q,j,tmp1, &qinv,&tmp2);
	
	bigint_subtract(&tmp1,x2,x1);
	
	bigint_multiply(&tmp2,tmp1,qinv);
	
	bigint_divide(&tmp1,&y,tmp2,j);

	bigint_multiply(&tmp1,q,y);
	bigint_add(dest,x1,tmp1);
	
	bigint_free(&q);
	bigint_free(&j);
	bigint_free(&A);
	bigint_free(&E);
	bigint_free(&x1);
	bigint_free(&x2);
	bigint_free(&y);
	bigint_free(&qinv);
	bigint_free(&tmp1);
	bigint_free(&tmp2);

}