void karatsuba(T A[], T B[], T R[], int n) { if(n <= 4) return gradeSchool(A, B, R, n); T *Ar = A, *Al = A + n/2, *Br = B, *Bl = B + n/2, *Asum = R + n*5, *Bsum = R + n*5 + n/2, *X1 = R + n*0, *X2 = R + n*1, *X3 = R + n*2; for(int i = 0; i < n / 2; ++i) { Asum[i] = Al[i] + Ar[i]; Bsum[i] = Bl[i] + Br[i]; } karatsuba(Ar, Br, X1, n/2); karatsuba(Al, Bl, X2, n/2); karatsuba(Asum, Bsum, X3, n/2); for(int i = 0; i < n; ++i) X3[i] = X3[i] - X1[i] - X2[i]; for(int i = 0; i < n; ++i) R[i + n/2] += X3[i]; }
static void karatsuba( uint16_t *res1, /* out - a * b in Z[x], must be length 2k */ uint16_t *tmp1, /* in - k coefficients of scratch space */ uint16_t const *a, /* in - polynomial */ uint16_t const *b, /* in - polynomial */ uint16_t const k) /* in - number of coefficients in a and b */ { uint16_t i; /* Grade school multiplication for small / odd inputs */ if(k <= 38 || (k & 1) != 0) { grade_school_mul(res1,a,b,k); return; } uint16_t const p = k>>1; uint16_t *res2 = res1+p; uint16_t *res3 = res1+k; uint16_t *res4 = res1+k+p; uint16_t *tmp2 = tmp1+p; uint16_t const *a2 = a+p; uint16_t const *b2 = b+p; for(i=0; i<p; i++) { res1[i] = a[i] - a2[i]; res2[i] = b2[i] - b[i]; } karatsuba(tmp1, res3, res1, res2, p); karatsuba(res3, res1, a2, b2, p); for(i=0; i<p; i++) { tmp1[i] += res3[i]; } for(i=0; i<p; i++) { res2[i] = tmp1[i]; tmp2[i] += res4[i]; res3[i] += tmp2[i]; } karatsuba(tmp1, res1, a, b, p); for(i=0; i<p; i++) { res1[i] = tmp1[i]; res2[i] += tmp1[i] + tmp2[i]; res3[i] += tmp2[i]; } return; }
std::vector<int> karatsuba(const std::vector<int> &a, const std::vector<int> &b) { // a = a2 * k^n + a1 // b = b2 * k^n + b1 // a * b = (a2 * b2) * k^2n + (a2 * b1 + a1 * b2) * k^n + (a1 * b1) // = (a2 * b2) * k^2n + ((a1 + a2) * (b1 + b2) - (a2 * b2) - (a1 * b1)) * k^n + (a1 * b1) // = z0 * k^2n + z2 * k^n + z1 // z0 = a2 * b2 // z1 = a1 * b1 if (a.size() < b.size()) return karatsuba(b, a); if (b.size() == 0) return std::vector<int>(); //if (a.size() <= 50) // return multiply(a, b); if (a.size() < 3) return multiply(a, b); size_t half = a.size() / 2; size_t b_half = std::min(half, size_t(b.end() - b.begin())); std::vector<int> a1(a.begin(), a.begin() + half); std::vector<int> a2(a.begin() + half, a.end()); std::vector<int> b1(b.begin(), b.begin() + b_half); std::vector<int> b2(b.begin() + b_half, b.end()); std::vector<int> z0 = karatsuba(a2, b2); std::vector<int> z1 = karatsuba(a1, b1); std::vector<int> a3 = a1; addTo(a3, a2, 0); std::vector<int> b3 = b1; addTo(b3, b2, 0); std::vector<int> z2 = karatsuba(a3, b3); subFrom(z2, z0, 0); subFrom(z2, z1, 0); std::vector<int> ret; ret.reserve(a.size() + b.size()); addTo(ret, z1, 0); addTo(ret, z2, half); addTo(ret, z0, half * 2); normalize(ret); return ret; }
void ntru_ring_mult_coefficients( uint16_t const *a, /* in - pointer to polynomial a */ uint16_t const *b, /* in - pointer to polynomial b */ uint16_t N, /* in - degree of (x^N - 1) */ uint16_t q, /* in - large modulus */ uint16_t *tmp, /* in - temp buffer of 3*padN elements */ uint16_t *c) /* out - address for polynomial c */ { uint16_t i; uint16_t q_mask = q-1; memset(tmp, 0, 3*PAD(N)*sizeof(uint16_t)); karatsuba(tmp, tmp+2*PAD(N), a, b, PAD(N)); for(i=0; i<N; i++) { c[i] = (tmp[i] + tmp[i+N]) & q_mask; } for(; i<PAD(N); i++) { c[i] = 0; } return; }
BigInt BigInt::operator*(BigInt b) { if (*this == 0 || b == 0) return 0; BigInt a = *this; bool negative = 0 xor a.negative_ xor b.negative_; if (a.negative_) { complement(a.value_); a.negative_ = false; } if (b.negative_) { complement(b.value_); b.negative_ = false; } match_size_of_digits(a, b); a = karatsuba(a, b); a.negative_ = (a != 0) and negative ; if (a.negative_) complement(a.value_); return a; }
int main() { std::vector<int> a, b; int N, tmp; scanf("%d", &N); a.reserve(N); for (int i = 0; i < N; ++i) { scanf("%d", &tmp); a.push_back(tmp); } scanf("%d", &N); b.reserve(N); for (int i = 0; i < N; ++i) { scanf("%d", &tmp); b.push_back(tmp); } //std::vector<int> ret = multiply(a, b); std::vector<int> ret = karatsuba(a, b); std::copy(ret.rbegin(), ret.rend(), std::ostream_iterator<int, char>(std::cout, " ")); }
int main(){ num a,b; char s1[1010],s2[1010]; scanf("%s%s",s1,s2); if(s1[0]=='-'){ s_to_n(s1,1,strlen(s1)-1,&a); a.sbit=MINUS; }else{ s_to_n(s1,0,strlen(s1)-1,&a); a.sbit=PLUS; } if(s2[0]=='-'){ s_to_n(s2,1,strlen(s2)-1,&b); b.sbit=MINUS; }else{ s_to_n(s2,0,strlen(s2)-1,&b); b.sbit=PLUS; } num ad,sb,ml; add(&a,&b,&ad); printNum(&ad); sub(&a,&b,&sb); printNum(&sb); karatsuba(&a,&b,&ml); ml.sbit=a.sbit*b.sbit; printNum(&ml); return 0; }
BigInt karatsuba(BICR op1, BICR op2) { BigInt x0, x1, y0, y1; BigInt z0, z1, z2; if (op1.digits.size()<30 || op2.digits.size()<30) return op1*op2; int m; split(op1,op2,x0,x1,y0,y1,m); z0=karatsuba(x0,y0); z2=karatsuba(x1,y1); z1=(karatsuba(x0+x1,y0+y1)-z2)-z0; z1<<=m; z2<<=2*m; return z0+z1+z2; }
int *karatsuba(int *array1, int *array2, int size, int *s) { int *result; *s=2*size; result=(int*)malloc(*s*sizeof(int)); //Base case, this ensures a fast code other wise time taken by karatsuba increases if(size<=100) { result=grade_school(array1,array2,size,s); return result; } else { int mid=size/2; mid=size-mid; int *o1, *o2, *o3, *o4, *o5;//Temporary variables //variables to store size int s1, s2, s3, s4, s5; o1=karatsuba(array1, array2, mid, &s1);//ac o2=karatsuba((array1+mid), (array2+mid), (size-mid), &s2);//bd o4=add(array1, mid, (array1+mid), (size-mid), &s4);//(a+b) o5=add(array2, mid, (array2+mid), (size-mid), &s5);//(c+d) o3=karatsuba(o4,o5,s4, &s3);//(a+b)(c+d) o4=add(o1,s1, o2, s2, &s4);//ac+bd o5=subtract(o3, s3, o4, s4, &s5);//(a+b)(c+d)-ac-bd o1=shift(o1, s1, 2*(size-mid), &s1);//ac*2^2m o2=add(o1, s1, o2, s2, &s2);//ac*2^2m+bd o5=shift(o5, s5, (size-mid), &s5);//(ad+bc)*2^m result=add(o5, s5, o2, s2, s);//ac*2^2m+(ad+bc)*2^m+bd free(o1); free(o2); free(o3); free(o4); free(o5); return result; } }
/** * @brief Perform a square operation on a bigint. * @param ctx [in] The bigint session context. * @param bia [in] A bigint. * @return The result of the multiplication. */ bigint *ICACHE_FLASH_ATTR bi_square(BI_CTX *ctx, bigint *bia) { check(bia); #ifdef CONFIG_BIGINT_KARATSUBA if (bia->size < SQU_KARATSUBA_THRESH) { return regular_square(ctx, bia); } return karatsuba(ctx, bia, NULL, 1); #else return regular_square(ctx, bia); #endif }
/** * @brief Perform a multiplication operation between two bigints. * @param ctx [in] The bigint session context. * @param bia [in] A bigint. * @param bib [in] Another bigint. * @return The result of the multiplication. */ bigint *ICACHE_FLASH_ATTR bi_multiply(BI_CTX *ctx, bigint *bia, bigint *bib) { check(bia); check(bib); #ifdef CONFIG_BIGINT_KARATSUBA if (min(bia->size, bib->size) < MUL_KARATSUBA_THRESH) { return regular_multiply(ctx, bia, bib, 0, 0); } return karatsuba(ctx, bia, bib, 0); #else return regular_multiply(ctx, bia, bib, 0, 0); #endif }
void time_karatsuba(int *array1, int *array2, int size, int *s) { clock_t start,end; double time; int i; int *result; //Clocking the time start=clock(); result=karatsuba(array1, array2, size, s); end=clock(); time=((end-start)/1000000.0); printf("\n%d %lf", size, time); free(result);//Freeing space }
/* puts in a[K-1]..a[2K-2] the K high terms of the product of b[0..K-1] and c[0..K-1]. Assumes K >= 1, and a[0..2K-2] exist. Needs space for list_mul_mem(K) in t. */ void list_mul_high (listz_t a, listz_t b, listz_t c, unsigned int K, listz_t t) { #ifdef KS_MULTIPLY /* ks is faster */ LIST_MULT_N (a, b, c, K, t); #else unsigned int p, q; ASSERT(K > 0); switch (K) { case 1: mpz_mul (a[0], b[0], c[0]); return; case 2: mpz_mul (a[2], b[1], c[1]); mpz_mul (a[1], b[1], c[0]); mpz_addmul (a[1], b[0], c[1]); return; case 3: karatsuba (a + 2, b + 1, c + 1, 2, t); mpz_addmul (a[2], b[0], c[2]); mpz_addmul (a[2], b[2], c[0]); return; default: /* MULT is 2 for Karatsuba, 3 for Toom3, 4 for Toom4 */ for (p = 1; MULT * p <= K; p *= MULT); p = (K / p) * p; q = K - p; LIST_MULT_N (a + 2 * q, b + q, c + q, p, t); if (q) { list_mul_high (t, b + p, c, q, t + 2 * q - 1); list_add (a + K - 1, a + K - 1, t + q - 1, q); list_mul_high (t, c + p, b, q, t + 2 * q - 1); list_add (a + K - 1, a + K - 1, t + q - 1, q); } } #endif }
void test_mul () { // 1 024 //const unit_type a_array[] = { 4, 2, 0, 1 }; // 2 048 //const unit_type b_array[] = { 8, 4, 0, 2 }; // 1 024 const unit_type a_array[] = { 1, 4 }; // 2 048 const unit_type b_array[] = { 2, 3 }; unit_vec_type a (a_array, a_array + 2); unit_vec_type b (b_array, b_array + 2); unit_vec_type result; karatsuba (result, b, a); return; }
vector<T> Multiply(vector<T> &argA, vector<T> &argB) { int N = argA.size() + argB.size() + 1; while(N != (N & -N)) ++N; fill(A, A + N, 0); for(int i = 0; i < argA.size(); ++i) A[i] = argA[i]; fill(B, B + N, 0); for(int i = 0; i < argB.size(); ++i) B[i] = argB[i]; karatsuba(A, B, R, N); vector<T> ret(argA.size() + argB.size()); for(int i = 0; i < ret.size(); ++i) ret[i] = R[i]; return ret; }
/* puts in a[0]..a[K-1] the K low terms of the product of b[0..K-1] and c[0..K-1]. Assumes K >= 1, and a[0..2K-2] exist. Needs space for list_mul_mem(K) in t. */ static void list_mul_low (listz_t a, listz_t b, listz_t c, unsigned int K, listz_t t, mpz_t n) { unsigned int p, q; ASSERT(K > 0); switch (K) { case 1: mpz_mul (a[0], b[0], c[0]); return; case 2: mpz_mul (a[0], b[0], c[0]); mpz_mul (a[1], b[0], c[1]); mpz_addmul (a[1], b[1], c[0]); return; case 3: karatsuba (a, b, c, 2, t); mpz_addmul (a[2], b[2], c[0]); mpz_addmul (a[2], b[0], c[2]); return; default: /* MULT is 2 for Karatsuba, 3 for Toom3, 4 for Toom4 */ for (p = 1; MULT * p <= K; p *= MULT); /* p = greatest power of MULT <=K */ p = (K / p) * p; ASSERTD(list_check(b,p,n) && list_check(c,p,n)); LIST_MULT_N (a, b, c, p, t); if ((q = K - p)) { list_mul_low (t, b + p, c, q, t + 2 * q - 1, n); list_add (a + p, a + p, t, q); list_mul_low (t, c + p, b, q, t + 2 * q - 1, n); list_add (a + p, a + p, t, q); } } }
big_unsigned& operator *= (const big_unsigned& __rhs) { return (*this) = karatsuba(*this, __rhs); }
static inline karatsuba(num *a,num *b,num *c){ /* * a=[hi1:lo1] * b=[h2:lo2] * a=hi1*B^m2+lo1 * b=hi2*B^m2+lo2 * a*b=(hi1*hi2)B^(2*m2)+ (l1h2+l2h1) B^m2+lo1*lo2 */ if(a->n<2){ mul1_(b,a->a[0],c); return; } if(b->n<2){ mul1_(a,b->a[0],c); return; } /* printf("%d %d\n",a->n,b->n); printNum(a); printNum(b); printf("case #3\n");*/ int m=max2(a->n,b->n); int m2=m/2; //printf("m2=%d\n",m2); num hi1,lo1,hi2,lo2,z0,z1,z2,t1,t2,lh1,lh2; split_at(a,m2,&hi1,&lo1); split_at(b,m2,&hi2,&lo2); /*puts("--- hi1 ---"); printNum(&hi1); puts("--- lo1 ---"); printNum(&lo1); puts("--- hi2 ---"); printNum(&hi2); puts("--- lo2 ---"); printNum(&lo2); */ add_(&lo1,&hi1,&lh1); add_(&lo2,&hi2,&lh2); karatsuba(&lh1,&lh2,&z1); karatsuba(&lo1,&lo2,&z0); karatsuba(&hi1,&hi2,&z2); /*puts(" z0 z1 z2 "); printNum(&z0); printNum(&z1); printNum(&z2); */ add_(&z0,&z2,&t1); //t1=z0+z2 sub_(&z1,&t1,&t2);//t2=z1-t1 //puts("t2= "); //printNum(&t2); shift_left(&z2,2*m2);//z2*B^2*m2 shift_left(&t2,m2); //t2*B^m2 //printNum(&z2); //printNum(&t2); //printNum(&z0); add_(&z0,&z2,c); //printNum(c); add_(c,&t2,c); rm0(c); //printNum(c); }
/* Puts in a[0..2K-2] the product of b[0..K-1] and c[0..K-1]. The auxiliary memory M(K) necessary in T satisfies: M(1)=0, M(K) = max(3*l-1,2*l-2+M(l)) <= 2*K-1 where l = ceil(K/2). Assumes K >= 1. */ void karatsuba (listz_t a, listz_t b, listz_t c, unsigned int K, listz_t t) { if (K == 1) { mpz_mul (a[0], b[0], c[0]); } else if (K == 2) /* basic Karatsuba scheme */ { mpz_add (t[0], b[0], b[1]); /* t0 = b_0 + b_1 */ mpz_add (a[1], c[0], c[1]); /* a1 = c_0 + c_1 */ mpz_mul (a[1], a[1], t[0]); /* a1 = b_0*c_0 + b_0*c_1 + b_1*c_0 + b_1*c_1 */ mpz_mul (a[0], b[0], c[0]); /* a0 = b_0 * c_0 */ mpz_mul (a[2], b[1], c[1]); /* a2 = b_1 * c_1 */ mpz_sub (a[1], a[1], a[0]); /* a1 = b_0*c_1 + b_1*c_0 + b_1*c_1 */ mpz_sub (a[1], a[1], a[2]); /* a1 = b_0*c_1 + b_1*c_0 */ } else if (K == 3) { /* implement Weimerskirch/Paar trick in 6 muls and 13 adds http://www.crypto.ruhr-uni-bochum.de/Publikationen/texte/kaweb.pdf */ /* diagonal terms */ mpz_mul (a[0], b[0], c[0]); mpz_mul (a[2], b[1], c[1]); mpz_mul (a[4], b[2], c[2]); /* (0,1) rectangular term */ mpz_add (t[0], b[0], b[1]); mpz_add (t[1], c[0], c[1]); mpz_mul (a[1], t[0], t[1]); mpz_sub (a[1], a[1], a[0]); mpz_sub (a[1], a[1], a[2]); /* (1,2) rectangular term */ mpz_add (t[0], b[1], b[2]); mpz_add (t[1], c[1], c[2]); mpz_mul (a[3], t[0], t[1]); mpz_sub (a[3], a[3], a[2]); mpz_sub (a[3], a[3], a[4]); /* (0,2) rectangular term */ mpz_add (t[0], b[0], b[2]); mpz_add (t[1], c[0], c[2]); mpz_mul (t[2], t[0], t[1]); mpz_sub (t[2], t[2], a[0]); mpz_sub (t[2], t[2], a[4]); mpz_add (a[2], a[2], t[2]); } else { unsigned int i, k, l; listz_t z; k = K / 2; l = K - k; z = t + 2 * l - 1; /* improved code with 7*k-3 additions, contributed by Philip McLaughlin <*****@*****.**> */ for (i = 0; i < k; i++) { mpz_sub (z[i], b[i], b[l+i]); mpz_sub (a[i], c[i], c[l+i]); } if (l > k) /* case K odd */ { mpz_set (z[k], b[k]); mpz_set (a[k], c[k]); } /* as b[0..l-1] + b[l..K-1] is stored in t[2l-1..3l-2], we need here at least 3l-1 entries in t */ karatsuba (t, z, a, l, a + l); /* fills t[0..2l-2] */ /* trick: save t[2l-2] in a[2l-1] to enable M(K) <= 2*K-1 */ z = t + 2 * l - 2; mpz_set (a[2*l-1], t[2*l-2]); karatsuba (a, b, c, l, z); /* fill a[0..2l-2] */ karatsuba (a + 2 * l, b + l, c + l, k, z); /* fills a[2l..2K-2] */ mpz_set (t[2*l-2], a[2*l-1]); /* restore t[2*l-2] */ mpz_set_ui (a[2*l-1], 0); /* l l-1 1 l 2k-1-l _________________________________________________ | a0 | a1 |0| a2 | a3 | ------------------------------------------------- l l-1 ________________________ | t0 | t1 | ------------------------ We want to replace [a1, a2] by [a1 + a0 + a2 - t0, a2 + a1 + a3 - t1] i.e. [a12 + a0 - t0, a12 + a3 - t1] where a12 = a1 + a2. */ list_add (a + 2 * l, a + 2 * l, a + l, l-1); /* a[2l..3l-1] <- a1+a2 */ if (k > 1) { list_add (a + l, a + 2 * l, a, l); /* a[l..2l-1] <- a0 + a1 + a2 */ list_add (a + 2 * l, a + 2 * l, a + 3 * l, 2 * k - 1 - l); } else /* k=1, i.e. K=2 or K=3, and a2 has only one entry */ { mpz_add (a[l], a[2*l], a[0]); if (K == 3) mpz_set (a[l+1], a[1]); } list_sub (a + l, a + l, t, 2 * l - 1); } }
// Return a * b. This method is known as Karatsuba algorithm. // Complexity: O(n^(log_2 3)) ~ O(n^1.585) // References: // [1] Algorithms and Data Structures: The Basic Toolbox, Kurt Hehlhorn, et al. // [2] en.wikipedia.org/wiki/Karatsuba_algorithm Integer Integer::karatsuba(Integer& a, Integer& b) { unsigned int alength = a.size(); unsigned int blength = b.size(); unsigned int half = (max(alength, blength) + 1) / 2; // For small numbers the long multiplication is more efficient if (max(alength, blength) < MIN_KARATSUBA) { return multiply(a, b); } Integer a0, a1, b0, b1; split(a, a0, a1, half); split(b, b0, b1, half); Integer p1 = karatsuba(a1, b1); Integer p0 = karatsuba(a0, b0); Integer sum1, sum2; add(a0, a1, sum1); add(b0, b1, sum2); Integer p2 = karatsuba(sum1, sum2); unsigned int i; int n, carry = 0; Integer ans; ans.set_size(alength + blength); for (i = 0; i < 2 * half; i++) { ans[i] = p0[i]; } for (i = 2 * half; i < ans.size(); i++) { ans[i] = p1[i - 2 * half]; } // Subtracts for (i = 0; i < p2.size(); i++) { n = p2[i] - p0[i] - carry; carry = (n < 0) ? 1 : 0; if (n < 0) { n += BASE; } p2[i] = n; } p2.adjust(); for (i = 0; i < p2.size(); i++) { n = p2[i] - p1[i] - carry; carry = (n < 0) ? 1 : 0; if (n < 0) { n += BASE; } p2[i] = n; } p2.adjust(); for (i = half; i < ans.size(); i++) { n = ans[i] + p2[i - half] + carry; carry = n / BASE; ans[i] = n % BASE; } ans.adjust(); return ans; }