void mpvecmul(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *p) { int i; mpdigit d; mpdigit *t; // both mpvecdigmuladd and karatsuba are fastest when a is the longer vector if(alen < blen){ i = alen; alen = blen; blen = i; t = a; a = b; b = t; } if(blen == 0){ memset(p, 0, Dbytes*(alen+blen)); return; } if(alen >= KARATSUBAMIN && blen > 1){ // O(n^1.585) mpkaratsuba(a, alen, b, blen, p); } else { // O(n^2) for(i = 0; i < blen; i++){ d = b[i]; if(d != 0) mpvecdigmuladd(a, alen, d, &p[i]); } } }
mpint* mpfactorial(ulong n) { int i; ulong k; unsigned cnt; int max, mmax; mpdigit p, pp[2]; mpint *r, *s, *stk[31]; cnt = 0; max = mmax = -1; p = 1; r = mpnew(0); for(k=2; k<=n; k++) { pp[0] = 0; pp[1] = 0; mpvecdigmuladd(&p, 1, (mpdigit)k, pp); if(pp[1] == 0) /* !overflow */ p = pp[0]; else { cnt++; if((cnt & 1) == 0) { s = stk[max]; mpbits(r, Dbits*(s->top+1+1)); memset(r->p, 0, Dbytes*(s->top+1+1)); mpvecmul(s->p, s->top, &p, 1, r->p); r->sign = 1; r->top = s->top+1+1; /* XXX: norm */ mpassign(r, s); for(i=4; (cnt & (i-1)) == 0; i=i<<1) { mpmul(stk[max], stk[max-1], r); mpassign(r, stk[max-1]); max--; } } else { max++; if(max > mmax) { mmax++; if(max > nelem(stk)) abort(); stk[max] = mpnew(Dbits); } stk[max]->top = 1; stk[max]->p[0] = p; } p = (mpdigit)k; } } if(max < 0) { mpbits(r, Dbits); r->top = 1; r->sign = 1; r->p[0] = p; } else { s = stk[max--]; mpbits(r, Dbits*(s->top+1+1)); memset(r->p, 0, Dbytes*(s->top+1+1)); mpvecmul(s->p, s->top, &p, 1, r->p); r->sign = 1; r->top = s->top+1+1; /* XXX: norm */ } while(max >= 0) mpmul(r, stk[max--], r); for(max=mmax; max>=0; max--) mpfree(stk[max]); mpnorm(r); return r; }
void mpdiv(mpint *dividend, mpint *divisor, mpint *quotient, mpint *remainder) { int j, s, vn, sign; mpdigit qd, *up, *vp, *qp; mpint *u, *v, *t; // divide bv zero if(divisor->top == 0) sysfatal("mpdiv: divide by zero"); // quick check if(mpmagcmp(dividend, divisor) < 0){ if(remainder != nil) mpassign(dividend, remainder); if(quotient != nil) mpassign(mpzero, quotient); return; } // D1: shift until divisor, v, has hi bit set (needed to make trial // divisor accurate) qd = divisor->p[divisor->top-1]; for(s = 0; (qd & mpdighi) == 0; s++) qd <<= 1; u = mpnew((dividend->top+2)*Dbits + s); if(s == 0 && divisor != quotient && divisor != remainder) { mpassign(dividend, u); v = divisor; } else { mpleft(dividend, s, u); v = mpnew(divisor->top*Dbits); mpleft(divisor, s, v); } up = u->p+u->top-1; vp = v->p+v->top-1; vn = v->top; // D1a: make sure high digit of dividend is less than high digit of divisor if(*up >= *vp){ *++up = 0; u->top++; } // storage for multiplies t = mpnew(4*Dbits); qp = nil; if(quotient != nil){ mpbits(quotient, (u->top - v->top)*Dbits); quotient->top = u->top - v->top; qp = quotient->p+quotient->top-1; } // D2, D7: loop on length of dividend for(j = u->top; j > vn; j--){ // D3: calculate trial divisor mpdigdiv(up-1, *vp, &qd); // D3a: rule out trial divisors 2 greater than real divisor if(vn > 1) for(;;){ memset(t->p, 0, 3*Dbytes); // mpvecdigmuladd adds to what's there mpvecdigmuladd(vp-1, 2, qd, t->p); if(mpveccmp(t->p, 3, up-2, 3) > 0) qd--; else break; } // D4: u -= v*qd << j*Dbits sign = mpvecdigmulsub(v->p, vn, qd, up-vn); if(sign < 0){ // D6: trial divisor was too high, add back borrowed // value and decrease divisor mpvecadd(up-vn, vn+1, v->p, vn, up-vn); qd--; } // D5: save quotient digit if(qp != nil) *qp-- = qd; // push top of u down one u->top--; *up-- = 0; } if(qp != nil){ mpnorm(quotient); if(dividend->sign != divisor->sign) quotient->sign = -1; } if(remainder != nil){ mpright(u, s, remainder); // u is the remainder shifted remainder->sign = dividend->sign; } mpfree(t); mpfree(u); if(v != divisor) mpfree(v); }